diff --git a/DESCRIPTION b/DESCRIPTION index 9002911c1..3dbbef169 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xcms -Version: 2.99.1 -Date: 2017-05-10 +Version: 2.99.2 +Date: 2017-05-30 Title: LC/MS and GC/MS Data Analysis Author: Colin A. Smith , Ralf Tautenhahn , diff --git a/R/do_findChromPeaks-functions.R b/R/do_findChromPeaks-functions.R index 401e50172..9c610e2e2 100644 --- a/R/do_findChromPeaks-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -14,95 +14,106 @@ ## Conclusion: ## o speed improvement can only come from internal methods called withihn. ## -##' @title Core API function for centWave peak detection -##' -##' @description This function performs peak density and wavelet based -##' chromatographic peak detection for high resolution LC/MS data in centroid -##' mode [Tautenhahn 2008]. -##' -##' @details This algorithm is most suitable for high resolution -##' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase the -##' method identifies \emph{regions of interest} (ROIs) representing mass traces -##' that are characterized as regions with less than \code{ppm} m/z deviation in -##' consecutive scans in the LC/MS map. These ROIs are then subsequently -##' analyzed using continuous wavelet transform (CWT) to locate chromatographic -##' peaks on different scales. The first analysis step is skipped, if regions -##' of interest are passed with the \code{roiList} parameter. -##' -##' @note The \emph{centWave} was designed to work on centroided mode, thus it -##' is expected that such data is presented to the function. -##' -##' This function exposes core chromatographic peak detection functionality of -##' the \emph{centWave} method. While this function can be called directly, -##' users will generally call the corresponding method for the data object -##' instead. -##' -##' @param mz Numeric vector with the individual m/z values from all scans/ -##' spectra of one file/sample. -##' @param int Numeric vector with the individual intensity values from all -##' scans/spectra of one file/sample. -##' @param scantime Numeric vector of length equal to the number of -##' spectra/scans of the data representing the retention time of each scan. -##' @param valsPerSpect Numeric vector with the number of values for each -##' spectrum. -##' @inheritParams findChromPeaks-centWave -##' -##' @family core peak detection functions -##' @references -##' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly -##' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} -##' 2008, 9:504 -##' @return -##' A matrix, each row representing an identified chromatographic peak, -##' with columns: -##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -##' \item{mzmin}{Minimum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rt}{Retention time of the peak's midpoint.} -##' \item{rtmin}{Minimum retention time of the peak.} -##' \item{rtmax}{Maximum retention time of the peak.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{intb}{Per-peak baseline corrected integrated peak intensity.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, -##' \code{sd} being the standard deviation of local chromatographic noise.} -##' \item{egauss}{RMSE of Gaussian fit.} -##' } -##' Additional columns for \code{verboseColumns = TRUE}: -##' \describe{ -##' \item{mu}{Gaussian parameter mu.} -##' \item{sigma}{Gaussian parameter sigma.} -##' \item{h}{Gaussian parameter h.} -##' \item{f}{Region number of the m/z ROI where the peak was localized.} -##' \item{dppm}{m/z deviation of mass trace across scanns in ppk.} -##' \item{scale}{Scale on which the peak was localized.} -##' \item{scpos}{Peak position found by wavelet analysis (scan number).} -##' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} -##' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} -##' } -##' @author Ralf Tautenhahn, Johannes Rainer -##' -##' @seealso \code{\link{centWave}} for the standard user interface method. -##' -##' @examples -##' ## Load the test file -##' library(faahKO) -##' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -##' xr <- xcmsRaw(fs, profstep = 0) -##' -##' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -##' mzVals <- xr@env$mz -##' intVals <- xr@env$intensity -##' ## Define the values per spectrum: -##' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) -##' -##' ## Calling the function. We're using a large value for noise to speed up -##' ## the call in the example performance - in a real use case we would either -##' ## set the value to a reasonable value or use the default value. -##' res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, -##' scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) -##' head(res) +#' @title Core API function for centWave peak detection +#' +#' @description This function performs peak density and wavelet based +#' chromatographic peak detection for high resolution LC/MS data in centroid +#' mode [Tautenhahn 2008]. +#' +#' @details This algorithm is most suitable for high resolution +#' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase +#' the method identifies \emph{regions of interest} (ROIs) representing +#' mass traces that are characterized as regions with less than \code{ppm} +#' m/z deviation in consecutive scans in the LC/MS map. These ROIs are then +#' subsequently analyzed using continuous wavelet transform (CWT) to locate +#' chromatographic peaks on different scales. The first analysis step is +#' skipped, if regions of interest are passed with the \code{roiList} +#' parameter. +#' +#' @note The \emph{centWave} was designed to work on centroided mode, thus it +#' is expected that such data is presented to the function. +#' +#' This function exposes core chromatographic peak detection functionality +#' of the \emph{centWave} method. While this function can be called +#' directly, users will generally call the corresponding method for the +#' data object instead. +#' +#' @param mz Numeric vector with the individual m/z values from all scans/ +#' spectra of one file/sample. +#' +#' @param int Numeric vector with the individual intensity values from all +#' scans/spectra of one file/sample. +#' +#' @param scantime Numeric vector of length equal to the number of +#' spectra/scans of the data representing the retention time of each scan. +#' +#' @param valsPerSpect Numeric vector with the number of values for each +#' spectrum. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @family core peak detection functions +#' +#' @references +#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +#' sensitive feature detection for high resolution LC/MS" +#' \emph{BMC Bioinformatics} 2008, 9:504 +#' +#' @return +#' A matrix, each row representing an identified chromatographic peak, +#' with columns: +#' \describe{ +#' +#' \item{mz}{Intensity weighted mean of m/z values of the peak across +#' scans.} +#' \item{mzmin}{Minimum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rt}{Retention time of the peak's midpoint.} +#' \item{rtmin}{Minimum retention time of the peak.} +#' \item{rtmax}{Maximum retention time of the peak.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{intb}{Per-peak baseline corrected integrated peak intensity.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, +#' \code{sd} being the standard deviation of local chromatographic noise.} +#' \item{egauss}{RMSE of Gaussian fit.} +#' } +#' Additional columns for \code{verboseColumns = TRUE}: +#' \describe{ +#' +#' \item{mu}{Gaussian parameter mu.} +#' \item{sigma}{Gaussian parameter sigma.} +#' \item{h}{Gaussian parameter h.} +#' \item{f}{Region number of the m/z ROI where the peak was localized.} +#' \item{dppm}{m/z deviation of mass trace across scans in ppm.} +#' \item{scale}{Scale on which the peak was localized.} +#' \item{scpos}{Peak position found by wavelet analysis (scan number).} +#' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} +#' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} +#' } +#' +#' @author Ralf Tautenhahn, Johannes Rainer +#' +#' @seealso \code{\link{centWave}} for the standard user interface method. +#' +#' @examples +#' ## Load the test file +#' library(faahKO) +#' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") +#' xr <- xcmsRaw(fs, profstep = 0) +#' +#' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave +#' mzVals <- xr@env$mz +#' intVals <- xr@env$intensity +#' ## Define the values per spectrum: +#' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +#' +#' ## Calling the function. We're using a large value for noise to speed up +#' ## the call in the example performance - in a real use case we would either +#' ## set the value to a reasonable value or use the default value. +#' res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, +#' scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) +#' head(res) do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), @@ -1182,74 +1193,84 @@ do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ############################################################ ## massifquant ## -##' @title Core API function for massifquant peak detection -##' -##' @description Massifquant is a Kalman filter (KF)-based chromatographic peak -##' detection for XC-MS data in centroid mode. The identified peaks -##' can be further refined with the \emph{centWave} method (see -##' \code{\link{do_findChromPeaks_centWave}} for details on centWave) -##' by specifying \code{withWave = TRUE}. -##' -##' @details This algorithm's performance has been tested rigorously -##' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -##' Simultaneous kalman filters identify peaks and calculate their -##' area under the curve. The default parameters are set to operate on -##' a complex LC-MS Orbitrap sample. Users will find it useful to do some -##' simple exploratory data analysis to find out where to set a minimum -##' intensity, and identify how many scans an average peak spans. The -##' \code{consecMissedLimit} parameter has yielded good performance on -##' Orbitrap data when set to (\code{2}) and on TOF data it was found best -##' to be at (\code{1}). This may change as the algorithm has yet to be -##' tested on many samples. The \code{criticalValue} parameter is perhaps -##' most dificult to dial in appropriately and visual inspection of peak -##' identification is the best suggested tool for quick optimization. -##' The \code{ppm} and \code{checkBack} parameters have shown less influence -##' than the other parameters and exist to give users flexibility and -##' better accuracy. -##' @inheritParams do_findChromPeaks_centWave -##' @inheritParams findChromPeaks-centWave -##' @inheritParams findChromPeaks-massifquant -##' @return -##' A matrix, each row representing an identified chromatographic peak, -##' with columns: -##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the peaks across -##' scans.} -##' \item{mzmin}{Minumum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rtmin}{Minimum retention time of the peak.} -##' \item{rtmax}{Maximum retention time of the peak.} -##' \item{rt}{Retention time of the peak's midpoint.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{maxo}{Maximum intensity of the peak.} -##' } -##' If \code{withWave} is set to \code{TRUE}, the result is the same as -##' returned by the \code{\link{do_findChromPeaks_centWave}} method. -##' @family core peak detection functions -##' @seealso \code{\link{massifquant}} for the standard user interface method. -##' @references -##' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT -##' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature -##' detection" \emph{Bioinformatics} 2014, 30(18):2636-43. -##' @author Christopher Conley -##' @examples -##' library(faahKO) -##' library(xcms) -##' cdfpath <- system.file("cdf", package = "faahKO") -##' cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) -##' -##' ## Read the first file -##' xraw <- xcmsRaw(cdffiles[1]) -##' ## Extract the required data -##' mzVals <- xraw@env$mz -##' intVals <- xraw@env$intensity -##' ## Define the values per spectrum: -##' valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) -##' -##' ## Perform the peak detection using massifquant -##' res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, -##' scantime = xraw@scantime, valsPerSpect = valsPerSpect) -##' head(res) +#' @title Core API function for massifquant peak detection +#' +#' @description Massifquant is a Kalman filter (KF)-based chromatographic peak +#' detection for XC-MS data in centroid mode. The identified peaks +#' can be further refined with the \emph{centWave} method (see +#' \code{\link{do_findChromPeaks_centWave}} for details on centWave) +#' by specifying \code{withWave = TRUE}. +#' +#' @details This algorithm's performance has been tested rigorously +#' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. +#' Simultaneous kalman filters identify peaks and calculate their +#' area under the curve. The default parameters are set to operate on +#' a complex LC-MS Orbitrap sample. Users will find it useful to do some +#' simple exploratory data analysis to find out where to set a minimum +#' intensity, and identify how many scans an average peak spans. The +#' \code{consecMissedLimit} parameter has yielded good performance on +#' Orbitrap data when set to (\code{2}) and on TOF data it was found best +#' to be at (\code{1}). This may change as the algorithm has yet to be +#' tested on many samples. The \code{criticalValue} parameter is perhaps +#' most dificult to dial in appropriately and visual inspection of peak +#' identification is the best suggested tool for quick optimization. +#' The \code{ppm} and \code{checkBack} parameters have shown less influence +#' than the other parameters and exist to give users flexibility and +#' better accuracy. +#' +#' @inheritParams do_findChromPeaks_centWave +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @inheritParams findChromPeaks-massifquant +#' +#' @return +#' A matrix, each row representing an identified chromatographic peak, +#' with columns: +#' \describe{ +#' \item{mz}{Intensity weighted mean of m/z values of the peaks across +#' scans.} +#' \item{mzmin}{Minumum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rtmin}{Minimum retention time of the peak.} +#' \item{rtmax}{Maximum retention time of the peak.} +#' \item{rt}{Retention time of the peak's midpoint.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{maxo}{Maximum intensity of the peak.} +#' } +#' +#' If \code{withWave} is set to \code{TRUE}, the result is the same as +#' returned by the \code{\link{do_findChromPeaks_centWave}} method. +#' +#' @family core peak detection functions +#' +#' @seealso \code{\link{massifquant}} for the standard user interface method. +#' +#' @references +#' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT +#' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature +#' detection" \emph{Bioinformatics} 2014, 30(18):2636-43. +#' +#' @author Christopher Conley +#' +#' @examples +#' library(faahKO) +#' library(xcms) +#' cdfpath <- system.file("cdf", package = "faahKO") +#' cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) +#' +#' ## Read the first file +#' xraw <- xcmsRaw(cdffiles[1]) +#' ## Extract the required data +#' mzVals <- xraw@env$mz +#' intVals <- xraw@env$intensity +#' ## Define the values per spectrum: +#' valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) +#' +#' ## Perform the peak detection using massifquant +#' res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, +#' scantime = xraw@scantime, valsPerSpect = valsPerSpect) +#' head(res) do_findChromPeaks_massifquant <- function(mz, int, scantime, @@ -1396,80 +1417,89 @@ do_findChromPeaks_massifquant <- function(mz, ## impute: none (=bin), binlin, binlinbase, intlin ## baseValue default: min(int)/2 (smallest value in the whole data set). ## -##' @title Core API function for matchedFilter peak detection -##' -##' @description This function identifies peaks in the chromatographic -##' time domain as described in [Smith 2006]. The intensity values are -##' binned by cutting The LC/MS data into slices (bins) of a mass unit -##' (\code{binSize} m/z) wide. Within each bin the maximal intensity is -##' selected. The peak detection is then performed in each bin by -##' extending it based on the \code{steps} parameter to generate slices -##' comprising bins \code{current_bin - steps +1} to \code{current_bin + steps - 1}. -##' Each of these slices is then filtered with matched filtration using -##' a second-derative Gaussian as the model peak shape. After filtration -##' peaks are detected using a signal-to-ration cut-off. For more details -##' and illustrations see [Smith 2006]. -##' -##' @details The intensities are binned by the provided m/z values within each -##' spectrum (scan). Binning is performed such that the bins are centered around -##' the m/z values (i.e. the first bin includes all m/z values between -##' \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). -##' -##' For more details on binning and missing value imputation see -##' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. -##' -##' @note -##' This function exposes core peak detection functionality of -##' the \emph{matchedFilter} method. While this function can be called directly, -##' users will generally call the corresponding method for the data object -##' instead (e.g. the \code{link{findPeaks.matchedFilter}} method). -##' -##' @inheritParams do_findChromPeaks_centWave -##' @inheritParams findChromPeaks-centWave -##' @inheritParams imputeLinInterpol -##' @inheritParams findChromPeaks-matchedFilter -##' -##' @return A matrix, each row representing an identified chromatographic peak, -##' with columns: -##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -##' \item{mzmin}{Minimum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rt}{Retention time of the peak's midpoint.} -##' \item{rtmin}{Minimum retention time of the peak.} -##' \item{rtmax}{Maximum retention time of the peak.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{intf}{Integrated intensity of the filtered peak.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{maxf}{Maximum intensity of the filtered peak.} -##' \item{i}{Rank of peak in merged EIC (\code{<= max}).} -##' \item{sn}{Signal to noise ratio of the peak} -##' } -##' @references -##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and -##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite -##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -##' \emph{Anal. Chem.} 2006, 78:779-787. -##' @author Colin A Smith, Johannes Rainer -##' @family core peak detection functions -##' @seealso \code{\link{binYonX}} for a binning function, -##' \code{\link{imputeLinInterpol}} for the interpolation of missing values. -##' \code{\link{matchedFilter}} for the standard user interface method. -##' @examples -##' ## Load the test file -##' library(faahKO) -##' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") -##' xr <- xcmsRaw(fs) -##' -##' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave -##' mzVals <- xr@env$mz -##' intVals <- xr@env$intensity -##' ## Define the values per spectrum: -##' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) -##' -##' res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, -##' scantime = xr@scantime, valsPerSpect = valsPerSpect) -##' head(res) +#' @title Core API function for matchedFilter peak detection +#' +#' @description This function identifies peaks in the chromatographic +#' time domain as described in [Smith 2006]. The intensity values are +#' binned by cutting The LC/MS data into slices (bins) of a mass unit +#' (\code{binSize} m/z) wide. Within each bin the maximal intensity is +#' selected. The peak detection is then performed in each bin by +#' extending it based on the \code{steps} parameter to generate slices +#' comprising bins \code{current_bin - steps +1} to +#' \code{current_bin + steps - 1}. +#' Each of these slices is then filtered with matched filtration using +#' a second-derative Gaussian as the model peak shape. After filtration +#' peaks are detected using a signal-to-ration cut-off. For more details +#' and illustrations see [Smith 2006]. +#' +#' @details The intensities are binned by the provided m/z values within each +#' spectrum (scan). Binning is performed such that the bins are centered +#' around the m/z values (i.e. the first bin includes all m/z values between +#' \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). +#' +#' For more details on binning and missing value imputation see +#' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. +#' +#' @note This function exposes core peak detection functionality of +#' the \emph{matchedFilter} method. While this function can be called +#' directly, users will generally call the corresponding method for the +#' data object instead (e.g. the \code{link{findPeaks.matchedFilter}} +#' method). +#' +#' @inheritParams do_findChromPeaks_centWave +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @inheritParams imputeLinInterpol +#' +#' @inheritParams findChromPeaks-matchedFilter +#' +#' @return A matrix, each row representing an identified chromatographic peak, +#' with columns: +#' \describe{ +#' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +#' \item{mzmin}{Minimum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rt}{Retention time of the peak's midpoint.} +#' \item{rtmin}{Minimum retention time of the peak.} +#' \item{rtmax}{Maximum retention time of the peak.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{intf}{Integrated intensity of the filtered peak.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{maxf}{Maximum intensity of the filtered peak.} +#' \item{i}{Rank of peak in merged EIC (\code{<= max}).} +#' \item{sn}{Signal to noise ratio of the peak} +#' } +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +#' +#' @author Colin A Smith, Johannes Rainer +#' +#' @family core peak detection functions +#' +#' @seealso \code{\link{binYonX}} for a binning function, +#' \code{\link{imputeLinInterpol}} for the interpolation of missing values. +#' \code{\link{matchedFilter}} for the standard user interface method. +#' +#' @examples +#' ## Load the test file +#' library(faahKO) +#' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") +#' xr <- xcmsRaw(fs) +#' +#' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave +#' mzVals <- xr@env$mz +#' intVals <- xr@env$intensity +#' ## Define the values per spectrum: +#' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) +#' +#' res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, +#' scantime = xr@scantime, valsPerSpect = valsPerSpect) +#' head(res) do_findChromPeaks_matchedFilter <- function(mz, int, scantime, @@ -1863,44 +1893,47 @@ do_findChromPeaks_matchedFilter <- function(mz, ############################################################ ## MSW ## -##' @title Core API function for single-spectrum non-chromatography MS data -##' peak detection -##' -##' @description This function performs peak detection in mass spectrometry -##' direct injection spectrum using a wavelet based algorithm. -##' -##' @details This is a wrapper around the peak picker in Bioconductor's -##' \code{MassSpecWavelet} package calling -##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and -##' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the -##' \emph{xcmsDirect} vignette for more information. -##' -##' @inheritParams do_findChromPeaks_centWave -##' @inheritParams findChromPeaks-centWave -##' @param ... Additional parameters to be passed to the -##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} function. -##' -##' @return -##' A matrix, each row representing an identified peak, with columns: -##' \describe{ -##' \item{mz}{m/z value of the peak at the centroid position.} -##' \item{mzmin}{Minimum m/z of the peak.} -##' \item{mzmax}{Maximum m/z of the peak.} -##' \item{rt}{Always \code{-1}.} -##' \item{rtmin}{Always \code{-1}.} -##' \item{rtmax}{Always \code{-1}.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{intf}{Always \code{NA}.} -##' \item{maxf}{Maximum MSW-filter response of the peak.} -##' \item{sn}{Signal to noise ratio.} -##' } -##' -##' @family core peak detection functions -##' @seealso ##' \code{\link{MSW}} for the standard user interface -##' method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the -##' \code{MassSpecWavelet} package. -##' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer +#' @title Core API function for single-spectrum non-chromatography MS data +#' peak detection +#' +#' @description This function performs peak detection in mass spectrometry +#' direct injection spectrum using a wavelet based algorithm. +#' +#' @details This is a wrapper around the peak picker in Bioconductor's +#' \code{MassSpecWavelet} package calling +#' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and +#' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the +#' \emph{xcmsDirect} vignette for more information. +#' +#' @inheritParams do_findChromPeaks_centWave +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @param ... Additional parameters to be passed to the +#' \code{\link[MassSpecWavelet]{peakDetectionCWT}} function. +#' +#' @return A matrix, each row representing an identified peak, with columns: +#' \describe{ +#' \item{mz}{m/z value of the peak at the centroid position.} +#' \item{mzmin}{Minimum m/z of the peak.} +#' \item{mzmax}{Maximum m/z of the peak.} +#' \item{rt}{Always \code{-1}.} +#' \item{rtmin}{Always \code{-1}.} +#' \item{rtmax}{Always \code{-1}.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{intf}{Always \code{NA}.} +#' \item{maxf}{Maximum MSW-filter response of the peak.} +#' \item{sn}{Signal to noise ratio.} +#' } +#' +#' @family core peak detection functions +#' +#' @seealso \code{\link{MSW}} for the standard user interface +#' method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the +#' \code{MassSpecWavelet} package. +#' +#' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer do_findPeaks_MSW <- function(mz, int, snthresh = 3, verboseColumns = FALSE, ...) { ## Input argument checking. @@ -2192,15 +2225,15 @@ do_findPeaks_MSW <- function(mz, int, snthresh = 3, ## } ## Tuned from the original code. -##' @param peaks. \code{matrix} or \code{data.frame} with peaks for which -##' isotopes should be predicted. Required columns are \code{"mz"}, -##' \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, -##' \code{"intb"} and \code{"scale"}. -##' -##' @return a \code{matrix} with columns \code{"mz"}, \code{"mzmin"}, -##' \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, \code{"length"} (always -1), -##' \code{"intensity"} (always -1) and \code{"scale"}. -##' @noRd +#' @param peaks. \code{matrix} or \code{data.frame} with peaks for which +#' isotopes should be predicted. Required columns are \code{"mz"}, +#' \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, +#' \code{"intb"} and \code{"scale"}. +#' +#' @return a \code{matrix} with columns \code{"mz"}, \code{"mzmin"}, +#' \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, \code{"length"} (always -1), +#' \code{"intensity"} (always -1) and \code{"scale"}. +#' @noRd do_define_isotopes <- function(peaks., maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE) { req_cols <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "scale") @@ -2241,11 +2274,14 @@ do_define_isotopes <- function(peaks., maxCharge = 3, maxIso = 5, return(do.call(rbind, newRois)) } -##' param @peaks. see do_define_isotopes -##' @param polarity character(1) defining the polarity, either \code{"positive"} -##' or \code{"negative"}. -##' @return see do_define_isotopes. -##' @noRd +#' @param peaks. see do_define_isotopes +#' +#' @param polarity character(1) defining the polarity, either \code{"positive"} +#' or \code{"negative"}. +#' +#' @return see do_define_isotopes. +#' +#' @noRd do_define_adducts <- function(peaks., polarity = "positive") { req_cols <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "scale") if (is.null(dim(peaks.))) @@ -2461,56 +2497,60 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect, ## 3) centWave on the predicted isotope ROIs. ## 4) combine both lists of identified peaks removing overlapping ones by ## keeping the peak with the largest signal intensity. -##' @title Core API function for two-step centWave peak detection with isotopes -##' -##' @description The \code{do_findChromPeaks_centWaveWithPredIsoROIs} performs a -##' two-step centWave based peak detection: chromatographic peaks are identified -##' using centWave followed by a prediction of the location of the identified -##' peaks' isotopes in the mz-retention time space. These locations are fed as -##' \emph{regions of interest} (ROIs) to a subsequent centWave run. All non -##' overlapping peaks from these two peak detection runs are reported as -##' the final list of identified peaks. -##' -##' @details For more details on the centWave algorithm see -##' \code{\link{centWave}}. -##' -##' @inheritParams findChromPeaks-centWave -##' @inheritParams findChromPeaks-centWaveWithPredIsoROIs -##' @inheritParams do_findChromPeaks_centWave -##' -##' @family core peak detection functions -##' @return -##' A matrix, each row representing an identified chromatographic peak. All -##' non-overlapping peaks identified in both centWave runs are reported. -##' The matrix columns are: -##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} -##' \item{mzmin}{Minimum m/z of the peaks.} -##' \item{mzmax}{Maximum m/z of the peaks.} -##' \item{rt}{Retention time of the peak's midpoint.} -##' \item{rtmin}{Minimum retention time of the peak.} -##' \item{rtmax}{Maximum retention time of the peak.} -##' \item{into}{Integrated (original) intensity of the peak.} -##' \item{intb}{Per-peak baseline corrected integrated peak intensity.} -##' \item{maxo}{Maximum intensity of the peak.} -##' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, -##' \code{sd} being the standard deviation of local chromatographic noise.} -##' \item{egauss}{RMSE of Gaussian fit.} -##' } -##' Additional columns for \code{verboseColumns = TRUE}: -##' \describe{ -##' \item{mu}{Gaussian parameter mu.} -##' \item{sigma}{Gaussian parameter sigma.} -##' \item{h}{Gaussian parameter h.} -##' \item{f}{Region number of the m/z ROI where the peak was localized.} -##' \item{dppm}{m/z deviation of mass trace across scanns in ppk.} -##' \item{scale}{Scale on which the peak was localized.} -##' \item{scpos}{Peak position found by wavelet analysis (scan number).} -##' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} -##' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} -##' } -##' @rdname do_findChromPeaks_centWaveWithPredIsoROIs -##' @author Hendrik Treutler, Johannes Rainer +#' @title Core API function for two-step centWave peak detection with isotopes +#' +#' @description The \code{do_findChromPeaks_centWaveWithPredIsoROIs} performs a +#' two-step centWave based peak detection: chromatographic peaks are +#' identified using centWave followed by a prediction of the location of +#' the identified peaks' isotopes in the mz-retention time space. These +#' locations are fed as \emph{regions of interest} (ROIs) to a subsequent +#' centWave run. All non overlapping peaks from these two peak detection +#' runs are reported as the final list of identified peaks. +#' +#' @details For more details on the centWave algorithm see +#' \code{\link{centWave}}. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @inheritParams findChromPeaks-centWaveWithPredIsoROIs +#' +#' @inheritParams do_findChromPeaks_centWave +#' +#' @family core peak detection functions +#' +#' @return A matrix, each row representing an identified chromatographic peak. +#' All non-overlapping peaks identified in both centWave runs are reported. +#' The matrix columns are: +#' \describe{ +#' \item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} +#' \item{mzmin}{Minimum m/z of the peaks.} +#' \item{mzmax}{Maximum m/z of the peaks.} +#' \item{rt}{Retention time of the peak's midpoint.} +#' \item{rtmin}{Minimum retention time of the peak.} +#' \item{rtmax}{Maximum retention time of the peak.} +#' \item{into}{Integrated (original) intensity of the peak.} +#' \item{intb}{Per-peak baseline corrected integrated peak intensity.} +#' \item{maxo}{Maximum intensity of the peak.} +#' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, +#' \code{sd} being the standard deviation of local chromatographic noise.} +#' \item{egauss}{RMSE of Gaussian fit.} +#' } +#' Additional columns for \code{verboseColumns = TRUE}: +#' \describe{ +#' \item{mu}{Gaussian parameter mu.} +#' \item{sigma}{Gaussian parameter sigma.} +#' \item{h}{Gaussian parameter h.} +#' \item{f}{Region number of the m/z ROI where the peak was localized.} +#' \item{dppm}{m/z deviation of mass trace across scans in ppm.} +#' \item{scale}{Scale on which the peak was localized.} +#' \item{scpos}{Peak position found by wavelet analysis (scan number).} +#' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} +#' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} +#' } +#' +#' @rdname do_findChromPeaks_centWaveWithPredIsoROIs +#' +#' @author Hendrik Treutler, Johannes Rainer do_findChromPeaks_centWaveWithPredIsoROIs <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", @@ -2558,30 +2598,32 @@ do_findChromPeaks_centWaveWithPredIsoROIs <- mzIntervalExtension = mzIntervalExtension, polarity = polarity)) } -##' @description The \code{do_findChromPeaks_centWaveAddPredIsoROIs} performs -##' centWave based peak detection based in regions of interest (ROIs) -##' representing predicted isotopes for the peaks submitted with argument -##' \code{peaks.}. The function returns a matrix with the identified peaks -##' consisting of all input peaks and peaks representing predicted isotopes -##' of these (if found by the centWave algorithm). -##' -##' @param peaks. A matrix or \code{xcmsPeaks} object such as one returned by -##' a call to \code{link{do_findChromPeaks_centWave}} or -##' \code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) -##' with the peaks for which isotopes should be predicted and used for an -##' additional peak detectoin using the centWave method. Required columns are: -##' \code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, -##' \code{"scale"} and \code{"into"}. -##' -##' @param snthresh For \code{do_findChromPeaks_addPredIsoROIs}: -##' numeric(1) defining the signal to noise threshold for the centWave algorithm. -##' For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: numeric(1) defining the -##' signal to noise threshold for the initial (first) centWave run. -##' -##' @inheritParams findChromPeaks-centWave -##' @inheritParams do_findChromPeaks_centWave -##' -##' @rdname do_findChromPeaks_centWaveWithPredIsoROIs +#' @description The \code{do_findChromPeaks_centWaveAddPredIsoROIs} performs +#' centWave based peak detection based in regions of interest (ROIs) +#' representing predicted isotopes for the peaks submitted with argument +#' \code{peaks.}. The function returns a matrix with the identified peaks +#' consisting of all input peaks and peaks representing predicted isotopes +#' of these (if found by the centWave algorithm). +#' +#' @param peaks. A matrix or \code{xcmsPeaks} object such as one returned by +#' a call to \code{link{do_findChromPeaks_centWave}} or +#' \code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) +#' with the peaks for which isotopes should be predicted and used for an +#' additional peak detectoin using the centWave method. Required columns +#' are: \code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, +#' \code{"scmax"}, \code{"scale"} and \code{"into"}. +#' +#' @param snthresh For \code{do_findChromPeaks_addPredIsoROIs}: +#' numeric(1) defining the signal to noise threshold for the centWave +#' algorithm. For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: +#' numeric(1) defining the signal to noise threshold for the initial +#' (first) centWave run. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @inheritParams do_findChromPeaks_centWave +#' +#' @rdname do_findChromPeaks_centWaveWithPredIsoROIs do_findChromPeaks_addPredIsoROIs <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 6.25, prefilter = c(3, 100), mzCenterFun = "wMean", diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R index c118a2e57..930fd065c 100644 --- a/R/do_groupChromPeaks-functions.R +++ b/R/do_groupChromPeaks-functions.R @@ -26,6 +26,9 @@ ##' ##' @inheritParams groupChromPeaks-density ##' +##' @param sleep \code{numeric(1)} defining the time to \emph{sleep} between +##' iterations and plot the result from the current iteration. +##' ##' @return A \code{list} with elements \code{"featureDefinitions"} and ##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row ##' representing a (mz-rt) feature (i.e. a peak group) with columns: @@ -71,7 +74,8 @@ ##' head(res$peakIndex) do_groupChromPeaks_density <- function(peaks, sampleGroups, bw = 30, minFraction = 0.5, minSamples = 1, - binSize = 0.25, maxFeatures = 50) { + binSize = 0.25, maxFeatures = 50, + sleep = 0) { if (missing(sampleGroups)) stop("Parameter 'sampleGroups' is missing! This should be a vector of ", "length equal to the number of samples specifying the group ", @@ -82,6 +86,8 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, stop("'peaks' has to be a 'matrix' or a 'data.frame'!") ## Check that we've got all required columns .reqCols <- c("mz", "rt", "sample") + if (sleep > 0) + .reqCols <- c(.reqCols, "into") if (!all(.reqCols %in% colnames(peaks))) stop("Required columns ", paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", @@ -164,6 +170,23 @@ do_groupChromPeaks_density <- function(peaks, sampleGroups, groupmat[num, 7 + seq(along = gcount)] <- gcount groupindex[[num]] <- sort(peakOrder[(startIdx:endIdx)[gidx]]) } + if (sleep > 0) { + ## Plot the density + plot(den, main = paste(round(min(curMat[,"mz"]), 2), "-", + round(max(curMat[,"mz"]), 2))) + ## Highlight peaks per sample group. + for (j in 1:nSampleGroups) { + ## Which peaks belong to this sample group. + cur_group_samples <- which(sampleGroups == sampleGroupNames[j]) + idx <- curMat[, "sample"] %in% cur_group_samples + points(curMat[idx, "rt"], curMat[idx, "into"] / + max(curMat[, "into"]) * maxden, + col = j, pch=20) + } + for (j in seq(length = snum)) + abline(v = groupmat[num - snum + j, 5:6], lty = "dashed", col = j) + Sys.sleep(sleep) + } } message("OK") diff --git a/R/functions-utils.R b/R/functions-utils.R index 5915f5207..70fcb0dc3 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -287,8 +287,8 @@ useOriginalCode <- function(x) { #' @title Weighted mean around maximum #' -#' @describe Calculate a weighted mean of the values around the value with the -#' largest weight. \code{x} could e.g. be mz values and \code{w} the +#' @description Calculate a weighted mean of the values around the value with +#' the largest weight. \code{x} could e.g. be mz values and \code{w} the #' corresponding intensity values. #' #' @param x \code{numeric} vector from which the weighted mean should be diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index ff0a8cc87..8d97c7de2 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -59,7 +59,17 @@ setMethod("findChromPeaks", ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) ## Check if the data is centroided - if (!isCentroided(object[[1]])) + centroided <- isCentroided(object[[1]]) + ## issue #181: if there are too few mass peaks the function + ## returns NA. + if (is.na(centroided)) { + ## check all spectra in the file - takes longer. + centroided <- isCentroided(object) + if (length(which(centroided)) > 0 & + length(which(!centroided)) == 0) + centroided <- TRUE + } + if (!centroided) warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") ## (1) split the object per file. @@ -627,7 +637,17 @@ setMethod("findChromPeaks", ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) ## Check if the data is centroided - if (!isCentroided(object[[1]])) + centroided <- isCentroided(object[[1]]) + ## issue #181: if there are too few mass peaks the function + ## returns NA. + if (is.na(centroided)) { + ## check all spectra in the file - takes longer. + centroided <- isCentroided(object) + if (length(which(centroided)) > 0 & + length(which(!centroided)) == 0) + centroided <- TRUE + } + if (!centroided) warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") ## (1) split the object per file. diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 5082d47a4..116ff3636 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -350,7 +350,8 @@ setMethod("group.density", "xcmsSet", function(object, bw = 30, minfrac = 0.5, minFraction = minfrac, minSamples = minsamp, binSize = mzwid, - maxFeatures = max) + maxFeatures = max, + sleep = sleep) groups(object) <- res$featureDefinitions groupidx(object) <- res$peakIndex diff --git a/inst/NEWS b/inst/NEWS index 0380de3a0..328668c96 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,3 +1,14 @@ +CHANGES IN VERSION 2.99.2 +------------------------- + +BUG FIXES: +- issue #181: problem when isCentroided,Spectrum method returns NA because of + too few peaks in a spectrum. Fixed by checking in such cases all spectra in + the file. +- issue #184: add parameter sleep to do_groupChromPeaks_density function to be + backwards compatible with the old group.density code. + + CHANGES IN VERSION 2.99.1 ------------------------- diff --git a/man/do_findChromPeaks_centWave.Rd b/man/do_findChromPeaks_centWave.Rd index 165cda5e3..e615910e9 100644 --- a/man/do_findChromPeaks_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -86,57 +86,61 @@ should be used for the centWave-wavelets.} } \value{ A matrix, each row representing an identified chromatographic peak, -with columns: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -\item{mzmin}{Minimum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rt}{Retention time of the peak's midpoint.} -\item{rtmin}{Minimum retention time of the peak.} -\item{rtmax}{Maximum retention time of the peak.} -\item{into}{Integrated (original) intensity of the peak.} -\item{intb}{Per-peak baseline corrected integrated peak intensity.} -\item{maxo}{Maximum intensity of the peak.} -\item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, -\code{sd} being the standard deviation of local chromatographic noise.} -\item{egauss}{RMSE of Gaussian fit.} -} -Additional columns for \code{verboseColumns = TRUE}: -\describe{ -\item{mu}{Gaussian parameter mu.} -\item{sigma}{Gaussian parameter sigma.} -\item{h}{Gaussian parameter h.} -\item{f}{Region number of the m/z ROI where the peak was localized.} -\item{dppm}{m/z deviation of mass trace across scanns in ppk.} -\item{scale}{Scale on which the peak was localized.} -\item{scpos}{Peak position found by wavelet analysis (scan number).} -\item{scmin}{Left peak limit found by wavelet analysis (scan number).} -\item{scmax}{Right peak limit found by wavelet analysis (scan numer).} -} + with columns: + \describe{ + + \item{mz}{Intensity weighted mean of m/z values of the peak across + scans.} + \item{mzmin}{Minimum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rt}{Retention time of the peak's midpoint.} + \item{rtmin}{Minimum retention time of the peak.} + \item{rtmax}{Maximum retention time of the peak.} + \item{into}{Integrated (original) intensity of the peak.} + \item{intb}{Per-peak baseline corrected integrated peak intensity.} + \item{maxo}{Maximum intensity of the peak.} + \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, + \code{sd} being the standard deviation of local chromatographic noise.} + \item{egauss}{RMSE of Gaussian fit.} + } + Additional columns for \code{verboseColumns = TRUE}: + \describe{ + + \item{mu}{Gaussian parameter mu.} + \item{sigma}{Gaussian parameter sigma.} + \item{h}{Gaussian parameter h.} + \item{f}{Region number of the m/z ROI where the peak was localized.} + \item{dppm}{m/z deviation of mass trace across scans in ppm.} + \item{scale}{Scale on which the peak was localized.} + \item{scpos}{Peak position found by wavelet analysis (scan number).} + \item{scmin}{Left peak limit found by wavelet analysis (scan number).} + \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} + } } \description{ This function performs peak density and wavelet based -chromatographic peak detection for high resolution LC/MS data in centroid -mode [Tautenhahn 2008]. + chromatographic peak detection for high resolution LC/MS data in centroid + mode [Tautenhahn 2008]. } \details{ This algorithm is most suitable for high resolution -LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase the -method identifies \emph{regions of interest} (ROIs) representing mass traces -that are characterized as regions with less than \code{ppm} m/z deviation in -consecutive scans in the LC/MS map. These ROIs are then subsequently -analyzed using continuous wavelet transform (CWT) to locate chromatographic -peaks on different scales. The first analysis step is skipped, if regions -of interest are passed with the \code{roiList} parameter. + LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase + the method identifies \emph{regions of interest} (ROIs) representing + mass traces that are characterized as regions with less than \code{ppm} + m/z deviation in consecutive scans in the LC/MS map. These ROIs are then + subsequently analyzed using continuous wavelet transform (CWT) to locate + chromatographic peaks on different scales. The first analysis step is + skipped, if regions of interest are passed with the \code{roiList} + parameter. } \note{ The \emph{centWave} was designed to work on centroided mode, thus it -is expected that such data is presented to the function. + is expected that such data is presented to the function. -This function exposes core chromatographic peak detection functionality of -the \emph{centWave} method. While this function can be called directly, -users will generally call the corresponding method for the data object -instead. + This function exposes core chromatographic peak detection functionality + of the \emph{centWave} method. While this function can be called + directly, users will generally call the corresponding method for the + data object instead. } \examples{ ## Load the test file @@ -159,8 +163,8 @@ head(res) } \references{ Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly -sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} -2008, 9:504 + sensitive feature detection for high resolution LC/MS" + \emph{BMC Bioinformatics} 2008, 9:504 } \seealso{ \code{\link{centWave}} for the standard user interface method. diff --git a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd index eee8fa2d6..ab399c0fb 100644 --- a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd +++ b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd @@ -42,9 +42,10 @@ peak width in chromatographic space. Given as a range (min, max) in seconds.} \item{snthresh}{For \code{do_findChromPeaks_addPredIsoROIs}: -numeric(1) defining the signal to noise threshold for the centWave algorithm. -For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: numeric(1) defining the -signal to noise threshold for the initial (first) centWave run.} +numeric(1) defining the signal to noise threshold for the centWave +algorithm. For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: +numeric(1) defining the signal to noise threshold for the initial +(first) centWave run.} \item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter step for the first analysis step (ROI detection). Mass traces are only @@ -119,60 +120,60 @@ Currently not used, but has to be \code{"positive"}, \code{"negative"} or a call to \code{link{do_findChromPeaks_centWave}} or \code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) with the peaks for which isotopes should be predicted and used for an -additional peak detectoin using the centWave method. Required columns are: -\code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, -\code{"scale"} and \code{"into"}.} +additional peak detectoin using the centWave method. Required columns +are: \code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, +\code{"scmax"}, \code{"scale"} and \code{"into"}.} } \value{ -A matrix, each row representing an identified chromatographic peak. All -non-overlapping peaks identified in both centWave runs are reported. -The matrix columns are: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} -\item{mzmin}{Minimum m/z of the peaks.} -\item{mzmax}{Maximum m/z of the peaks.} -\item{rt}{Retention time of the peak's midpoint.} -\item{rtmin}{Minimum retention time of the peak.} -\item{rtmax}{Maximum retention time of the peak.} -\item{into}{Integrated (original) intensity of the peak.} -\item{intb}{Per-peak baseline corrected integrated peak intensity.} -\item{maxo}{Maximum intensity of the peak.} -\item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, -\code{sd} being the standard deviation of local chromatographic noise.} -\item{egauss}{RMSE of Gaussian fit.} -} -Additional columns for \code{verboseColumns = TRUE}: -\describe{ -\item{mu}{Gaussian parameter mu.} -\item{sigma}{Gaussian parameter sigma.} -\item{h}{Gaussian parameter h.} -\item{f}{Region number of the m/z ROI where the peak was localized.} -\item{dppm}{m/z deviation of mass trace across scanns in ppk.} -\item{scale}{Scale on which the peak was localized.} -\item{scpos}{Peak position found by wavelet analysis (scan number).} -\item{scmin}{Left peak limit found by wavelet analysis (scan number).} -\item{scmax}{Right peak limit found by wavelet analysis (scan numer).} -} +A matrix, each row representing an identified chromatographic peak. + All non-overlapping peaks identified in both centWave runs are reported. + The matrix columns are: + \describe{ + \item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} + \item{mzmin}{Minimum m/z of the peaks.} + \item{mzmax}{Maximum m/z of the peaks.} + \item{rt}{Retention time of the peak's midpoint.} + \item{rtmin}{Minimum retention time of the peak.} + \item{rtmax}{Maximum retention time of the peak.} + \item{into}{Integrated (original) intensity of the peak.} + \item{intb}{Per-peak baseline corrected integrated peak intensity.} + \item{maxo}{Maximum intensity of the peak.} + \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, + \code{sd} being the standard deviation of local chromatographic noise.} + \item{egauss}{RMSE of Gaussian fit.} + } + Additional columns for \code{verboseColumns = TRUE}: + \describe{ + \item{mu}{Gaussian parameter mu.} + \item{sigma}{Gaussian parameter sigma.} + \item{h}{Gaussian parameter h.} + \item{f}{Region number of the m/z ROI where the peak was localized.} + \item{dppm}{m/z deviation of mass trace across scans in ppm.} + \item{scale}{Scale on which the peak was localized.} + \item{scpos}{Peak position found by wavelet analysis (scan number).} + \item{scmin}{Left peak limit found by wavelet analysis (scan number).} + \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} + } } \description{ The \code{do_findChromPeaks_centWaveWithPredIsoROIs} performs a -two-step centWave based peak detection: chromatographic peaks are identified -using centWave followed by a prediction of the location of the identified -peaks' isotopes in the mz-retention time space. These locations are fed as -\emph{regions of interest} (ROIs) to a subsequent centWave run. All non -overlapping peaks from these two peak detection runs are reported as -the final list of identified peaks. + two-step centWave based peak detection: chromatographic peaks are + identified using centWave followed by a prediction of the location of + the identified peaks' isotopes in the mz-retention time space. These + locations are fed as \emph{regions of interest} (ROIs) to a subsequent + centWave run. All non overlapping peaks from these two peak detection + runs are reported as the final list of identified peaks. The \code{do_findChromPeaks_centWaveAddPredIsoROIs} performs -centWave based peak detection based in regions of interest (ROIs) -representing predicted isotopes for the peaks submitted with argument -\code{peaks.}. The function returns a matrix with the identified peaks -consisting of all input peaks and peaks representing predicted isotopes -of these (if found by the centWave algorithm). + centWave based peak detection based in regions of interest (ROIs) + representing predicted isotopes for the peaks submitted with argument + \code{peaks.}. The function returns a matrix with the identified peaks + consisting of all input peaks and peaks representing predicted isotopes + of these (if found by the centWave algorithm). } \details{ For more details on the centWave algorithm see -\code{\link{centWave}}. + \code{\link{centWave}}. } \seealso{ Other core peak detection functions: \code{\link{do_findChromPeaks_centWave}}, diff --git a/man/do_findChromPeaks_massifquant.Rd b/man/do_findChromPeaks_massifquant.Rd index a28658c86..ca2fab745 100644 --- a/man/do_findChromPeaks_massifquant.Rd +++ b/man/do_findChromPeaks_massifquant.Rd @@ -112,45 +112,46 @@ centWave algorithm, which includes wavelet estimation.} } \value{ A matrix, each row representing an identified chromatographic peak, -with columns: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the peaks across -scans.} -\item{mzmin}{Minumum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rtmin}{Minimum retention time of the peak.} -\item{rtmax}{Maximum retention time of the peak.} -\item{rt}{Retention time of the peak's midpoint.} -\item{into}{Integrated (original) intensity of the peak.} -\item{maxo}{Maximum intensity of the peak.} -} -If \code{withWave} is set to \code{TRUE}, the result is the same as -returned by the \code{\link{do_findChromPeaks_centWave}} method. + with columns: + \describe{ + \item{mz}{Intensity weighted mean of m/z values of the peaks across + scans.} + \item{mzmin}{Minumum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rtmin}{Minimum retention time of the peak.} + \item{rtmax}{Maximum retention time of the peak.} + \item{rt}{Retention time of the peak's midpoint.} + \item{into}{Integrated (original) intensity of the peak.} + \item{maxo}{Maximum intensity of the peak.} + } + + If \code{withWave} is set to \code{TRUE}, the result is the same as + returned by the \code{\link{do_findChromPeaks_centWave}} method. } \description{ Massifquant is a Kalman filter (KF)-based chromatographic peak -detection for XC-MS data in centroid mode. The identified peaks -can be further refined with the \emph{centWave} method (see -\code{\link{do_findChromPeaks_centWave}} for details on centWave) -by specifying \code{withWave = TRUE}. + detection for XC-MS data in centroid mode. The identified peaks + can be further refined with the \emph{centWave} method (see + \code{\link{do_findChromPeaks_centWave}} for details on centWave) + by specifying \code{withWave = TRUE}. } \details{ This algorithm's performance has been tested rigorously -on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -Simultaneous kalman filters identify peaks and calculate their -area under the curve. The default parameters are set to operate on -a complex LC-MS Orbitrap sample. Users will find it useful to do some -simple exploratory data analysis to find out where to set a minimum -intensity, and identify how many scans an average peak spans. The -\code{consecMissedLimit} parameter has yielded good performance on -Orbitrap data when set to (\code{2}) and on TOF data it was found best -to be at (\code{1}). This may change as the algorithm has yet to be -tested on many samples. The \code{criticalValue} parameter is perhaps -most dificult to dial in appropriately and visual inspection of peak -identification is the best suggested tool for quick optimization. -The \code{ppm} and \code{checkBack} parameters have shown less influence -than the other parameters and exist to give users flexibility and -better accuracy. + on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. + Simultaneous kalman filters identify peaks and calculate their + area under the curve. The default parameters are set to operate on + a complex LC-MS Orbitrap sample. Users will find it useful to do some + simple exploratory data analysis to find out where to set a minimum + intensity, and identify how many scans an average peak spans. The + \code{consecMissedLimit} parameter has yielded good performance on + Orbitrap data when set to (\code{2}) and on TOF data it was found best + to be at (\code{1}). This may change as the algorithm has yet to be + tested on many samples. The \code{criticalValue} parameter is perhaps + most dificult to dial in appropriately and visual inspection of peak + identification is the best suggested tool for quick optimization. + The \code{ppm} and \code{checkBack} parameters have shown less influence + than the other parameters and exist to give users flexibility and + better accuracy. } \examples{ library(faahKO) diff --git a/man/do_findChromPeaks_matchedFilter.Rd b/man/do_findChromPeaks_matchedFilter.Rd index c3e7d58b9..9b24370ac 100644 --- a/man/do_findChromPeaks_matchedFilter.Rd +++ b/man/do_findChromPeaks_matchedFilter.Rd @@ -65,49 +65,51 @@ returned instead of values for m/z and retention times.} } \value{ A matrix, each row representing an identified chromatographic peak, -with columns: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} -\item{mzmin}{Minimum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rt}{Retention time of the peak's midpoint.} -\item{rtmin}{Minimum retention time of the peak.} -\item{rtmax}{Maximum retention time of the peak.} -\item{into}{Integrated (original) intensity of the peak.} -\item{intf}{Integrated intensity of the filtered peak.} -\item{maxo}{Maximum intensity of the peak.} -\item{maxf}{Maximum intensity of the filtered peak.} -\item{i}{Rank of peak in merged EIC (\code{<= max}).} -\item{sn}{Signal to noise ratio of the peak} -} + with columns: + \describe{ + \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} + \item{mzmin}{Minimum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rt}{Retention time of the peak's midpoint.} + \item{rtmin}{Minimum retention time of the peak.} + \item{rtmax}{Maximum retention time of the peak.} + \item{into}{Integrated (original) intensity of the peak.} + \item{intf}{Integrated intensity of the filtered peak.} + \item{maxo}{Maximum intensity of the peak.} + \item{maxf}{Maximum intensity of the filtered peak.} + \item{i}{Rank of peak in merged EIC (\code{<= max}).} + \item{sn}{Signal to noise ratio of the peak} + } } \description{ This function identifies peaks in the chromatographic -time domain as described in [Smith 2006]. The intensity values are -binned by cutting The LC/MS data into slices (bins) of a mass unit -(\code{binSize} m/z) wide. Within each bin the maximal intensity is -selected. The peak detection is then performed in each bin by -extending it based on the \code{steps} parameter to generate slices -comprising bins \code{current_bin - steps +1} to \code{current_bin + steps - 1}. -Each of these slices is then filtered with matched filtration using -a second-derative Gaussian as the model peak shape. After filtration -peaks are detected using a signal-to-ration cut-off. For more details -and illustrations see [Smith 2006]. + time domain as described in [Smith 2006]. The intensity values are + binned by cutting The LC/MS data into slices (bins) of a mass unit + (\code{binSize} m/z) wide. Within each bin the maximal intensity is + selected. The peak detection is then performed in each bin by + extending it based on the \code{steps} parameter to generate slices + comprising bins \code{current_bin - steps +1} to + \code{current_bin + steps - 1}. + Each of these slices is then filtered with matched filtration using + a second-derative Gaussian as the model peak shape. After filtration + peaks are detected using a signal-to-ration cut-off. For more details + and illustrations see [Smith 2006]. } \details{ The intensities are binned by the provided m/z values within each -spectrum (scan). Binning is performed such that the bins are centered around -the m/z values (i.e. the first bin includes all m/z values between -\code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). + spectrum (scan). Binning is performed such that the bins are centered + around the m/z values (i.e. the first bin includes all m/z values between + \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). -For more details on binning and missing value imputation see -\code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. + For more details on binning and missing value imputation see + \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. } \note{ This function exposes core peak detection functionality of -the \emph{matchedFilter} method. While this function can be called directly, -users will generally call the corresponding method for the data object -instead (e.g. the \code{link{findPeaks.matchedFilter}} method). + the \emph{matchedFilter} method. While this function can be called + directly, users will generally call the corresponding method for the + data object instead (e.g. the \code{link{findPeaks.matchedFilter}} + method). } \examples{ ## Load the test file @@ -133,8 +135,8 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" } \seealso{ \code{\link{binYonX}} for a binning function, -\code{\link{imputeLinInterpol}} for the interpolation of missing values. -\code{\link{matchedFilter}} for the standard user interface method. + \code{\link{imputeLinInterpol}} for the interpolation of missing values. + \code{\link{matchedFilter}} for the standard user interface method. Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, \code{\link{do_findChromPeaks_centWave}}, diff --git a/man/do_findPeaks_MSW.Rd b/man/do_findPeaks_MSW.Rd index 601bf0c1e..ada00f4fb 100644 --- a/man/do_findPeaks_MSW.Rd +++ b/man/do_findPeaks_MSW.Rd @@ -3,7 +3,7 @@ \name{do_findPeaks_MSW} \alias{do_findPeaks_MSW} \title{Core API function for single-spectrum non-chromatography MS data -peak detection} + peak detection} \usage{ do_findPeaks_MSW(mz, int, snthresh = 3, verboseColumns = FALSE, ...) } @@ -24,35 +24,35 @@ columns should be returned.} } \value{ A matrix, each row representing an identified peak, with columns: -\describe{ -\item{mz}{m/z value of the peak at the centroid position.} -\item{mzmin}{Minimum m/z of the peak.} -\item{mzmax}{Maximum m/z of the peak.} -\item{rt}{Always \code{-1}.} -\item{rtmin}{Always \code{-1}.} -\item{rtmax}{Always \code{-1}.} -\item{into}{Integrated (original) intensity of the peak.} -\item{maxo}{Maximum intensity of the peak.} -\item{intf}{Always \code{NA}.} -\item{maxf}{Maximum MSW-filter response of the peak.} -\item{sn}{Signal to noise ratio.} -} + \describe{ + \item{mz}{m/z value of the peak at the centroid position.} + \item{mzmin}{Minimum m/z of the peak.} + \item{mzmax}{Maximum m/z of the peak.} + \item{rt}{Always \code{-1}.} + \item{rtmin}{Always \code{-1}.} + \item{rtmax}{Always \code{-1}.} + \item{into}{Integrated (original) intensity of the peak.} + \item{maxo}{Maximum intensity of the peak.} + \item{intf}{Always \code{NA}.} + \item{maxf}{Maximum MSW-filter response of the peak.} + \item{sn}{Signal to noise ratio.} + } } \description{ This function performs peak detection in mass spectrometry -direct injection spectrum using a wavelet based algorithm. + direct injection spectrum using a wavelet based algorithm. } \details{ This is a wrapper around the peak picker in Bioconductor's -\code{MassSpecWavelet} package calling -\code{\link[MassSpecWavelet]{peakDetectionCWT}} and -\code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the -\emph{xcmsDirect} vignette for more information. + \code{MassSpecWavelet} package calling + \code{\link[MassSpecWavelet]{peakDetectionCWT}} and + \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the + \emph{xcmsDirect} vignette for more information. } \seealso{ -##' \code{\link{MSW}} for the standard user interface -method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the -\code{MassSpecWavelet} package. +\code{\link{MSW}} for the standard user interface + method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the + \code{MassSpecWavelet} package. Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, \code{\link{do_findChromPeaks_centWave}}, diff --git a/man/do_groupChromPeaks_density.Rd b/man/do_groupChromPeaks_density.Rd index df69aed12..b06249cd5 100644 --- a/man/do_groupChromPeaks_density.Rd +++ b/man/do_groupChromPeaks_density.Rd @@ -6,7 +6,7 @@ grouping} \usage{ do_groupChromPeaks_density(peaks, sampleGroups, bw = 30, minFraction = 0.5, - minSamples = 1, binSize = 0.25, maxFeatures = 50) + minSamples = 1, binSize = 0.25, maxFeatures = 50, sleep = 0) } \arguments{ \item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and @@ -36,6 +36,9 @@ in mz dimension.} \item{maxFeatures}{\code{numeric(1)} with the maximum number of peak groups to be identified in a single mz slice.} + +\item{sleep}{\code{numeric(1)} defining the time to \emph{sleep} between +iterations and plot the result from the current iteration.} } \value{ A \code{list} with elements \code{"featureDefinitions"} and