diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 09338ee6..8af61088 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -124,32 +124,7 @@ jobs: - name: Install macOS system dependencies if: matrix.config.os == 'macOS-latest' run: | - ## Enable installing XML from source if needed - brew install libxml2 - echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV - - ## Required to install magick as noted at - ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 - brew install imagemagick@6 - - ## For textshaping, required by ragg, and required by pkgdown - brew install harfbuzz fribidi - - ## For installing usethis's dependency gert - brew install libgit2 - - ## required for ncdf4 - ## brew install netcdf ## Does not work as it is compiled with gcc - ## Use pre-compiled libraries from https://mac.r-project.org/libs-4/ - # curl -O https://mac.r-project.org/libs-4/netcdf-4.7.4-darwin.17-x86_64.tar.gz - # tar fvxzm netcdf-4.7.4-darwin.17-x86_64.tar.gz -C / - # rm netcdf-4.7.4-darwin.17-x86_64.tar.gz - # curl -O https://mac.r-project.org/libs-4/hdf5-1.12.0-darwin.17-x86_64.tar.gz - # tar fvxzm hdf5-1.12.0-darwin.17-x86_64.tar.gz -C / - # rm hdf5-1.12.0-darwin.17-x86_64.tar.gz - # curl -O https://mac.r-project.org/libs-4/szip-2.1.1-darwin.17-x86_64.tar.gz - # tar fvxzm szip-2.1.1-darwin.17-x86_64.tar.gz -C / - # rm szip-2.1.1-darwin.17-x86_64.tar.gz + shell: Rscript {0} - name: Install Windows system dependencies if: runner.os == 'Windows' diff --git a/DESCRIPTION b/DESCRIPTION index 9741c4f0..a9c46263 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.3.2 +Version: 4.3.3 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, @@ -54,7 +54,7 @@ Imports: methods, Biobase, BiocGenerics, - ProtGenerics (>= 1.35.4), + ProtGenerics (>= 1.37.1), lattice, MassSpecWavelet (>= 1.66.0), S4Vectors, @@ -63,7 +63,7 @@ Imports: MsCoreUtils (>= 1.15.5), MsFeatures, MsExperiment (>= 1.5.4), - Spectra (>= 1.13.7), + Spectra (>= 1.15.7), progress, RColorBrewer, MetaboCoreUtils (>= 1.11.2) diff --git a/NAMESPACE b/NAMESPACE index 50756901..24fb6605 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,8 @@ import("methods") importMethodsFrom("ProtGenerics", "peaks", "chromatogram", "writeMSData", "polarity<-", "centroided", "isCentroided", "peaks<-", "isolationWindowTargetMz", "quantify", "bin", "spectrapply", - "filterFeatures", "filterMzRange", "filterRt", "filterMz", "filterMsLevel") + "filterFeatures", "filterMzRange", "filterRt", "filterMz", "filterMsLevel", + "estimatePrecursorIntensity") importClassesFrom("ProtGenerics", "Param") importFrom("BiocGenerics", "updateObject", "fileName", "subset", "dirname", "dirname<-") @@ -250,7 +251,6 @@ export( "hasFilledChromPeaks", "plotAdjustedRtime", "groupOverlaps", - "estimatePrecursorIntensity", "featureArea", "loadXcmsData", "matchLamasChromPeaks", @@ -263,6 +263,7 @@ export( exportMethods( "showError", "findChromPeaks", + "estimatePrecursorIntensity", "groupChromPeaks", "adjustRtime", "findChromPeaksIsolationWindow", @@ -568,6 +569,7 @@ importMethodsFrom("Spectra", "precursorMz") importMethodsFrom("Spectra", "$") importMethodsFrom("Spectra", "uniqueMsLevels") importMethodsFrom("Spectra", "backendBpparam") +importMethodsFrom("Spectra", "estimatePrecursorIntensity") importFrom("Spectra", "MsBackendMemory", "filterEmptySpectra") ## MsExperiment things diff --git a/NEWS.md b/NEWS.md index a678609b..1a47aa96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,15 @@ # xcms 4.3 +## Changes in version 4.3.3 + +- Fix issue #755: `chromatogram()` with `msLevel = 2` fails to extract + chromatographic data if `isolationWindowTargetMz` is not specified or + available (e.g. for MSe data). +- Support coercing from `XcmsExperiment` to `XCMSnExp` with + `as(object, "XCMSnExp")`. +- Change `estimatePrecursorIntensity()` to a method and add an implementation + for `MsExperiment` objects. + ## Changes in version 4.3.2 - Remove data/results import/export functionality as it is being developed in diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R index b40f547c..cb73888c 100644 --- a/R/MsExperiment-functions.R +++ b/R/MsExperiment-functions.R @@ -462,7 +462,7 @@ if (length(msLevel) != npks) msLevel <- rep(msLevel[1L], npks) if (!length(isolationWindow)) - isolationWindow <- rep(1L, npks) + isolationWindow <- rep(NA_real_, npks) if (length(isolationWindow) && length(isolationWindow) != npks) stop("Length of 'isolationWindow' (if provided) should match the ", "number of chromatograms to extract.") diff --git a/R/MsExperiment.R b/R/MsExperiment.R index ba12ce46..2110fe18 100644 --- a/R/MsExperiment.R +++ b/R/MsExperiment.R @@ -114,3 +114,16 @@ setMethod( msLevel = msLevel, isolationWindow = isolationWindowTargetMz, chunkSize = chunkSize, BPPARAM = BPPARAM) }) + +#' @rdname estimatePrecursorIntensity +setMethod( + "estimatePrecursorIntensity", "MsExperiment", + function(object, ppm = 10, tolerance = 0, + method = c("previous", "interpolation"), + BPPARAM = bpparam()) { + method <- match.arg(method) + estimatePrecursorIntensity(spectra(object), ppm = ppm, + tolerance = tolerance, method = method, + f = spectraSampleIndex(object), + BPPARAM = BPPARAM) +}) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 7117e8e3..931bcdb7 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -1136,6 +1136,78 @@ featureArea <- function(object, mzmin = min, mzmax = max, rtmin = min, } #' function to create an empty `XcmsExperiment` object +#' +#' @noRd XcmsExperiment <- function() { as(MsExperiment(), "XcmsExperiment") } + +#' function to convert an XcmsExperiment into an XCMSnExp. +#' +#' @author Johannes Rainer +#' +#' @noRd +.xcms_experiment_to_xcms_n_exp <- function(from) { + requireNamespace("MSnbase", quietly = TRUE) + n <- new("XCMSnExp") + if (!length(from)) + return(n) + m <- new("MsFeatureData") + ## works only if we have a MsBackendMzR + if (!inherits(spectra(from)@backend, "MsBackendMzR")) + stop("Can not convert 'from' to a 'XCMSnExp' object: 'spectra(x)' uses", + "an MS backend other than 'MsBackendMzR'. Currently, only", + " coercion of an object with a 'MsBackendMzR' is supported.") + ## works only if we have an empty processing queue + if (length(spectra(from)@processingQueue) > 0) + stop("Can not convert 'from' to a 'XCMSnExp' object: processing queue", + " of the Spectra object is not empty.") + ## -> OnDiskMSnExp + n@processingData <- new("MSnProcess", + processing = paste0("Data converted [", date(), "]"), + files = fileNames(from), + smoothed = NA) + n@phenoData <- new("NAnnotatedDataFrame", as.data.frame(sampleData(from))) + fd <- as.data.frame(from@spectra@backend@spectraData) + fnames <- unique(fd$dataStorage) + fd$fileIdx <- match(fd$dataStorage, fnames) + fd <- fd[, !colnames(fd) %in% c("dataStorage", "dataOrigin")] + colnames(fd) <- sub("scanIndex", "spIdx", colnames(fd)) + colnames(fd) <- sub("rtime", "retentionTime", colnames(fd)) + colnames(fd) <- sub("precScanNum", "precursorScanNum", colnames(fd)) + colnames(fd) <- sub("precursorMz", "precursorMZ", colnames(fd)) + fd$spectrum <- seq_len(nrow(fd)) + rownames(fd) <- MSnbase:::formatFileSpectrumNames( + fd$fileIdx, fd$spIdx, + max(fd$fileIdx), max(fd$spIdx)) + n@featureData <- new("AnnotatedDataFrame", fd) + nf <- length(fnames) + n@experimentData <- new("MIAPE", + instrumentManufacturer = rep(NA_character_, nf), + instrumentModel = rep(NA_character_, nf), + ionSource = rep(NA_character_, nf), + analyser = rep(NA_character_, nf), + detectorType = rep(NA_character_, nf)) + n@processingData <- new("MSnProcess", + processing = paste0("Coercion from ", + class(from)[1L], + " [", date(), "]"), + files = fnames) + ## -> XCMSnExp + if (hasChromPeaks(from)) { + chromPeaks(m) <- chromPeaks(from) + chromPeakData(m) <- chromPeakData(from) + } + if (hasAdjustedRtime(from)) { + art <- fd$retentionTime_adjusted + names(art) <- rownames(fd) + adjustedRtime(m) <- split(art, fd$fileIdx) + } + if (hasFeatures(from)) + featureDefinitions(m) <- DataFrame(featureDefinitions(from)) + lockEnvironment(m, bindings = TRUE) + n@msFeatureData <- m + n@.processHistory <- from@processHistory + validObject(n) + n +} diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index b540334b..db3efa05 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -124,14 +124,16 @@ #' associated feature definitions will be included in the returned #' `XChromatograms`. By default the function returns chromatograms from MS1 #' data, but by setting parameter `msLevel = 2L` it is possible to e.g. -#' extract also MS2 chromatograms. For `msLevel` other than 1 it is in -#' addition important to also specify the `isolationWindowTargetMz` for which -#' MS2 data should be extracted (e.g. for SWATH data MS2 spectra are created -#' for different m/z isolation windows and the `isolationWindowTargetMz` -#' parameter allows to define from which of these the MS2 chromatogram -#' should be extracted. -#' Note that in future more efficient data structures for chromatographic -#' data will be available as well. +#' extract also MS2 chromatograms. By default, with parameter +#' `isolationWindowTargetMz = NULL` or `isolationWindowTargetMz = NA_real_`, +#' data from **all** MS2 spectra will be considered in the chromatogram +#' extraction. If MS2 data was generated within different m/z isolation +#' windows (such as e.g. with Scies SWATH data), the parameter +#' `isolationWindowTargetMz` should be used to ensure signal is only extracted +#' from the respective isolation window. The `isolationWindowTargetMz()` +#' function on the `Spectra` object can be used to inspect/list available +#' isolation windows of a data set. See also the xcms *LC-MS/MS vignette* for +#' examples and details. #' #' - `chromPeaks`: returns a `numeric` matrix with the identified #' chromatographic peaks. Each row represents a chromatographic peak diff --git a/R/functions-OnDiskMSnExp.R b/R/functions-OnDiskMSnExp.R index f62b2347..70989840 100644 --- a/R/functions-OnDiskMSnExp.R +++ b/R/functions-OnDiskMSnExp.R @@ -722,7 +722,7 @@ setReplaceMethod("dirname", "OnDiskMSnExp", function(path, value) { #' par(mfrow = c(1, 2)) #' plot(res, precursorIntensity(tmt)) #' plot(res_2, precursorIntensity(tmt)) -.estimate_prec_intensity <- function(x, ppm = 10, +.estimate_prec_intensity <- function(x, ppm = 10, tolerance = 0, method = c("previous", "interpolation")) { method <- match.arg(method) pmz <- precursorMz(x) @@ -739,7 +739,8 @@ setReplaceMethod("dirname", "OnDiskMSnExp", function(path, value) { before_int <- numeric() if (length(before_idx)) { sp <- sps[[before_idx[length(before_idx)]]] - before_idx <- closest(pmz[i], sp@mz, ppm = ppm, tolerance = 0, + before_idx <- closest(pmz[i], sp@mz, ppm = ppm, + tolerance = tolerance, duplicates = "closest") if (!is.na(before_idx)) { before_rt <- sp@rt @@ -794,48 +795,6 @@ setReplaceMethod("dirname", "OnDiskMSnExp", function(path, value) { pmi } -#' @title Estimate precursor intensity for MS level 2 spectra -#' -#' @description -#' -#' `estimatePrecursorIntensity` determines the precursor intensity for a MS 2 -#' spectrum based on the intensity of the respective signal from the -#' neighboring MS 1 spectra (i.e. based on the peak with the m/z matching the -#' precursor m/z of the MS 2 spectrum). Based on parameter `method` either the -#' intensity of the peak from the previous MS 1 scan is used -#' (`method = "previous"`) or an interpolation between the intensity from the -#' previous and subsequent MS1 scan is used (`method = "interpolation"`, which -#' considers also the retention times of the two MS1 scans and the retention -#' time of the MS2 spectrum). -#' -#' @param x `OnDiskMSnExp` or `XCMSnExp` object. -#' -#' @param ppm `numeric(1)` defining the maximal acceptable difference (in ppm) -#' of the precursor m/z and the m/z of the corresponding peak in the MS 1 -#' scan. -#' -#' @param method `character(1)` defining the method how the precursor intensity -#' should be determined (see description above for details). Defaults to -#' `method = "previous"`. -#' -#' @param BPPARAM parallel processing setup. See [bpparam()] for details. -#' -#' @return `numeric` with length equal to the number of spectra in `x`. `NA` is -#' returned for MS 1 spectra or if no matching peak in a MS 1 scan can be -#' found for an MS 2 spectrum -#' -#' @author Johannes Rainer -#' -#' @md -estimatePrecursorIntensity <- function(x, ppm = 10, - method = c("previous", "interpolation"), - BPPARAM = bpparam()) { - method <- match.arg(method) - unlist(bplapply(.split_by_file2(x, subsetFeatureData = FALSE), - .estimate_prec_intensity, ppm = ppm, method = method, - BPPARAM = BPPARAM), use.names = FALSE) -} - #' Helper function to convert an OnDiskMSnExp to a Spectra object. This will #' only convert the spectra data, but no sample information. #' diff --git a/R/functions-utils.R b/R/functions-utils.R index 77d236eb..46d4498d 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -791,8 +791,11 @@ groupOverlaps <- function(xmin, xmax) { #' chromatograms should be extracted. #' #' @param pks_tmz `numeric` with the isolation window target m/z in which -#' the (MS2) chromatographic peak was detected. Does not need to be -#' provided for MS1 data. +#' the (MS2) chromatographic peak was detected. For `pks_msl > 1L` only +#' spectra with their `isolationWindowTargetMz` being equal to this value +#' are considered for the chromatogram extraction. Set to +#' `pks_tmz = NA_real_` to use **all** spectra with matching MS level and +#' ignore the isolation window. #' #' @param file_idx `integer(1)` allowing to optionally set the index of the #' file the EIC is from (parameter `fromFile`). @@ -803,8 +806,9 @@ groupOverlaps <- function(xmin, xmax) { #' #' @noRd .chromatograms_for_peaks <- function(pd, rt, msl, file_idx = 1L, - tmz = rep(1L, length(pd)), pks, pks_msl, - pks_tmz = rep(1L, nrow(pks)), + tmz = rep(NA_real_, length(pd)), pks, + pks_msl, + pks_tmz = rep(NA_real_, nrow(pks)), aggregationFun = "sum") { nr <- nrow(pks) pks_msl <- as.integer(pks_msl) @@ -826,9 +830,9 @@ groupOverlaps <- function(xmin, xmax) { slot(res[[i]], "msLevel", check = FALSE) <- pks_msl[i] ## if pks_msl > 1: precursor m/z has to match! keep <- between(rt, pks[i, rtc]) & msl == pks_msl[i] - if (pks_msl[i] > 1L) { + if (pks_msl[i] > 1L && !is.na(pks_tmz[i])) { ## for DIA MS2: spectra have to match the isolation window. - keep <- keep & tmz == pks_tmz[i] + keep <- keep & tmz %in% pks_tmz[i] } keep <- which(keep) # the get rid of `NA`. if (length(keep)) { diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index ecda9f1f..79ca965a 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -626,3 +626,55 @@ setMethod( validObject(object) object }) + +#' @title Estimate precursor intensity for MS level 2 spectra +#' +#' @description +#' +#' `estimatePrecursorIntensity()` determines the precursor intensity for a MS 2 +#' spectrum based on the intensity of the respective signal from the +#' neighboring MS 1 spectra (i.e. based on the peak with the m/z matching the +#' precursor m/z of the MS 2 spectrum). Based on parameter `method` either the +#' intensity of the peak from the previous MS 1 scan is used +#' (`method = "previous"`) or an interpolation between the intensity from the +#' previous and subsequent MS1 scan is used (`method = "interpolation"`, which +#' considers also the retention times of the two MS1 scans and the retention +#' time of the MS2 spectrum). +#' +#' @param object `MsExperiment`, `XcmsExperiment`, `OnDiskMSnExp` or +#' `XCMSnExp` object. +#' +#' @param ppm `numeric(1)` defining the maximal acceptable difference (in ppm) +#' of the precursor m/z and the m/z of the corresponding peak in the MS 1 +#' scan. +#' +#' @param tolerance `numeric(1)` with the maximal allowed difference of m/z +#' values between the precursor m/z of a spectrum and the m/z of the +#' respective ion on the MS1 scan. +#' +#' @param method `character(1)` defining the method how the precursor intensity +#' should be determined (see description above for details). Defaults to +#' `method = "previous"`. +#' +#' @param BPPARAM parallel processing setup. See [bpparam()] for details. +#' +#' @return `numeric` with length equal to the number of spectra in `x`. `NA` is +#' returned for MS 1 spectra or if no matching peak in a MS 1 scan can be +#' found for an MS 2 spectrum +#' +#' @author Johannes Rainer with feedback and suggestions from Corey Broeckling +#' +#' @md +#' +#' @rdname estimatePrecursorIntensity +setMethod( + "estimatePrecursorIntensity", "OnDiskMSnExp", + function(object, ppm = 10, tolerance = 0, + method = c("previous", "interpolation"), + BPPARAM = bpparam()) { + method <- match.arg(method) + unlist(bplapply(.split_by_file2(object, subsetFeatureData = FALSE), + .estimate_prec_intensity, ppm = ppm, + tolerance = tolerance, method = method, + BPPARAM = BPPARAM), use.names = FALSE) + }) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 516e8e3f..ae8a0a71 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -1439,6 +1439,12 @@ setAs(from = "XCMSnExp", to = "xcmsSet", def = .XCMSnExp2xcmsSet) #' @name XcmsExperiment setAs(from = "XcmsExperiment", to = "xcmsSet", def = .XCMSnExp2xcmsSet) +#' @rdname XcmsExperiment +#' +#' @name XcmsExperiment +setAs(from = "XcmsExperiment", to = "XCMSnExp", + def = .xcms_experiment_to_xcms_n_exp) + #' @rdname XCMSnExp-peak-grouping-results setMethod("quantify", "XCMSnExp", function(object, ...) { .XCMSnExp2SummarizedExperiment(object, ...) diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index f05d094d..9bfc0437 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -511,14 +511,16 @@ If the \code{XcmsExperiment} contains correspondence results, also the associated feature definitions will be included in the returned \code{XChromatograms}. By default the function returns chromatograms from MS1 data, but by setting parameter \code{msLevel = 2L} it is possible to e.g. -extract also MS2 chromatograms. For \code{msLevel} other than 1 it is in -addition important to also specify the \code{isolationWindowTargetMz} for which -MS2 data should be extracted (e.g. for SWATH data MS2 spectra are created -for different m/z isolation windows and the \code{isolationWindowTargetMz} -parameter allows to define from which of these the MS2 chromatogram -should be extracted. -Note that in future more efficient data structures for chromatographic -data will be available as well. +extract also MS2 chromatograms. By default, with parameter +\code{isolationWindowTargetMz = NULL} or \code{isolationWindowTargetMz = NA_real_}, +data from \strong{all} MS2 spectra will be considered in the chromatogram +extraction. If MS2 data was generated within different m/z isolation +windows (such as e.g. with Scies SWATH data), the parameter +\code{isolationWindowTargetMz} should be used to ensure signal is only extracted +from the respective isolation window. The \code{isolationWindowTargetMz()} +function on the \code{Spectra} object can be used to inspect/list available +isolation windows of a data set. See also the xcms \emph{LC-MS/MS vignette} for +examples and details. \item \code{chromPeaks}: returns a \code{numeric} matrix with the identified chromatographic peaks. Each row represents a chromatographic peak identified in one sample (file). The number of columns depends on the diff --git a/man/estimatePrecursorIntensity.Rd b/man/estimatePrecursorIntensity.Rd index ed4ebb59..6dfe1254 100644 --- a/man/estimatePrecursorIntensity.Rd +++ b/man/estimatePrecursorIntensity.Rd @@ -1,23 +1,38 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/functions-OnDiskMSnExp.R -\name{estimatePrecursorIntensity} -\alias{estimatePrecursorIntensity} +% Please edit documentation in R/MsExperiment.R, R/methods-OnDiskMSnExp.R +\name{estimatePrecursorIntensity,MsExperiment-method} +\alias{estimatePrecursorIntensity,MsExperiment-method} +\alias{estimatePrecursorIntensity,OnDiskMSnExp-method} \title{Estimate precursor intensity for MS level 2 spectra} \usage{ -estimatePrecursorIntensity( - x, +\S4method{estimatePrecursorIntensity}{MsExperiment}( + object, ppm = 10, + tolerance = 0, + method = c("previous", "interpolation"), + BPPARAM = bpparam() +) + +\S4method{estimatePrecursorIntensity}{OnDiskMSnExp}( + object, + ppm = 10, + tolerance = 0, method = c("previous", "interpolation"), BPPARAM = bpparam() ) } \arguments{ -\item{x}{\code{OnDiskMSnExp} or \code{XCMSnExp} object.} +\item{object}{\code{MsExperiment}, \code{XcmsExperiment}, \code{OnDiskMSnExp} or +\code{XCMSnExp} object.} \item{ppm}{\code{numeric(1)} defining the maximal acceptable difference (in ppm) of the precursor m/z and the m/z of the corresponding peak in the MS 1 scan.} +\item{tolerance}{\code{numeric(1)} with the maximal allowed difference of m/z +values between the precursor m/z of a spectrum and the m/z of the +respective ion on the MS1 scan.} + \item{method}{\code{character(1)} defining the method how the precursor intensity should be determined (see description above for details). Defaults to \code{method = "previous"}.} @@ -30,7 +45,7 @@ returned for MS 1 spectra or if no matching peak in a MS 1 scan can be found for an MS 2 spectrum } \description{ -\code{estimatePrecursorIntensity} determines the precursor intensity for a MS 2 +\code{estimatePrecursorIntensity()} determines the precursor intensity for a MS 2 spectrum based on the intensity of the respective signal from the neighboring MS 1 spectra (i.e. based on the peak with the m/z matching the precursor m/z of the MS 2 spectrum). Based on parameter \code{method} either the @@ -41,5 +56,5 @@ considers also the retention times of the two MS1 scans and the retention time of the MS2 spectrum). } \author{ -Johannes Rainer +Johannes Rainer with feedback and suggestions from Corey Broeckling } diff --git a/tests/testthat/test_MsExperiment-functions.R b/tests/testthat/test_MsExperiment-functions.R index a8cd065d..773ef380 100644 --- a/tests/testthat/test_MsExperiment-functions.R +++ b/tests/testthat/test_MsExperiment-functions.R @@ -343,9 +343,9 @@ test_that(".mse_chromatogram works", { res <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L) expect_true(validObject(res)) expect_equal(msLevel(res[[1L]]), 2L) - expect_true(length(intensity(res[[1L]])) == 0) + expect_true(length(intensity(res[[1L]])) > 0) expect_equal(msLevel(res[[2L]]), 2L) - expect_true(length(intensity(res[[2L]])) == 0) + expect_true(length(intensity(res[[2L]])) > 0) ## Set isolationWindowTargetMz. isolationWindowTargetMz(spectra(mse_dda)) <- as.numeric( @@ -354,13 +354,16 @@ test_that(".mse_chromatogram works", { c(81, 83)) rtr <- rbind(c(10, 700), c(10, 700)) - res <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L, + res <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L) + res2 <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L, isolationWindow = c(56, 40)) - expect_true(all(intensity(res[[1L]]) > 0)) - expect_true(length(intensity(res[[2L]])) == 0) - res <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L, - isolationWindow = c(56, 82)) - expect_true(all(intensity(res[[1L]]) > 0)) + expect_true(all(intensity(res2[[1L]]) > 0)) + expect_true(length(intensity(res2[[2L]])) == 0) + expect_true(length(rtime(res[[1L]])) > length(rtime(res2[[1L]]))) + res2 <- .mse_chromatogram(mse_dda, rt = rtr, mz = mzr, msLevel = 2L, + isolationWindow = c(56, 82)) + expect_true(all(intensity(res2[[1L]]) > 0)) + expect_true(length(intensity(res[[1L]])) > length(intensity(res2[[1L]]))) expect_true(all(intensity(res[[2L]]) > 0, na.rm = TRUE)) ## Can extract chromatograms if providing the correct isolationWindow. diff --git a/tests/testthat/test_MsExperiment.R b/tests/testthat/test_MsExperiment.R index 4bef6221..82412e9c 100644 --- a/tests/testthat/test_MsExperiment.R +++ b/tests/testthat/test_MsExperiment.R @@ -121,3 +121,14 @@ test_that("polarity,MsExperiment works", { res <- polarity(mse) expect_equal(res, polarity(spectra(mse))) }) + +test_that("estimatePrecursorIntensity,MsExperiment works", { + ftmt <- msdata::proteomics(full.names = TRUE)[5] + library(MsExperiment) + tmt <- readMsExperiment(ftmt) + res <- estimatePrecursorIntensity(tmt, ppm = 10, tolerance = 0) + expect_true(is.numeric(res)) + expect_equal(length(res), length(spectra(tmt))) + expect_true(cor(res, precursorIntensity(tmt@spectra), + use = "pairwise.complete.obs") > 0.9) +}) diff --git a/tests/testthat/test_XcmsExperiment-functions.R b/tests/testthat/test_XcmsExperiment-functions.R index 3f161074..dd2128f2 100644 --- a/tests/testthat/test_XcmsExperiment-functions.R +++ b/tests/testthat/test_XcmsExperiment-functions.R @@ -201,3 +201,33 @@ test_that(".features_ms_region works", { xmseg, features = rownames(featureDefinitions(xmseg))) expect_equal(rownames(res), rownames(featureDefinitions(xmseg))) }) + +test_that(".xcms_experiment_to_xcms_n_exp works", { + library(MsExperiment) + a <- XcmsExperiment() + res <- xcms:::.xcms_experiment_to_xcms_n_exp(a) + expect_s4_class(res, "XCMSnExp") + expect_true(length(res) == 0) + + a <- loadXcmsData("xmse") + a1 <- a[1] + a1@spectra <- Spectra::setBackend(spectra(a1), Spectra::MsBackendMemory()) + expect_error(xcms:::.xcms_experiment_to_xcms_n_exp(a1), "MsBackendMzR") + + res <- xcms:::.xcms_experiment_to_xcms_n_exp(a) + expect_s4_class(res, "XCMSnExp") + expect_equal(unname(rtime(res)), rtime(a)) + expect_equal(chromPeaks(res), chromPeaks(a)) + expect_equal(chromPeakData(res), chromPeakData(a)) + expect_equal(featureDefinitions(res), DataFrame(featureDefinitions(a))) + expect_equal(res@.processHistory, a@processHistory) + + ref <- loadXcmsData("xdata") + expect_equal(rtime(res), rtime(ref)) + + expect_true(hasChromPeaks(res)) + expect_true(hasAdjustedRtime(res)) + expect_true(hasFeatures(res)) + + expect_equal(mz(res[1:3]), mz(ref[1:3])) +}) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index a63bf85f..272a58db 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -1171,11 +1171,25 @@ test_that("chromatogram,XcmsExperiment and .xmse_extract_chromatograms_old", { ## real MS2 data. res <- chromatogram(mse_dia, msLevel = 2L, mz = c(50, 300), rt = c(100, 600)) - expect_true(length(intensity(res[[1L]])) == 0) - res <- chromatogram(mse_dia, msLevel = 2L, mz = c(50, 300), - rt = c(100, 600), isolationWindowTargetMz = 270.85, - aggregationFun = "sum") expect_true(all(intensity(res[[1L]]) > 0)) + res2 <- chromatogram(mse_dia, msLevel = 2L, mz = c(50, 300), + rt = c(100, 600), isolationWindowTargetMz = 270.85, + aggregationFun = "sum") + expect_true(all(intensity(res2[[1L]]) > 0)) + ## have more data points without isolation windows + expect_true(length(intensity(res[[1L]])) > length(intensity(res2[[1L]]))) + + ## fake MS2 data with undefined isolation window. + a <- chromatogram(xmseg, msLevel = 1L, + mz = chromPeaks(xmse)[1:5, c("mzmin", "mzmax")], + rt = chromPeaks(xmse)[1:5, c("rtmin", "rtmax")]) + tmp <- xmseg + tmp@spectra$msLevel <- 2L + expect_true(all(is.na(isolationWindowTargetMz(tmp@spectra)))) + b <- chromatogram(tmp, msLevel = 2L, + mz = chromPeaks(xmse)[1:5, c("mzmin", "mzmax")], + rt = chromPeaks(xmse)[1:5, c("rtmin", "rtmax")]) + expect_equal(intensity(a[[1L]]), intensity(b[[1L]])) ## Defining only mz or rt. rtr <- c(2600, 2700) diff --git a/vignettes/xcms-lcms-ms.Rmd b/vignettes/xcms-lcms-ms.Rmd index 132efa4a..93486237 100644 --- a/vignettes/xcms-lcms-ms.Rmd +++ b/vignettes/xcms-lcms-ms.Rmd @@ -99,7 +99,7 @@ dda_data <- filterRt(dda_data, rt = c(230, 610)) The variable `dda_data` contains now all MS1 and MS2 spectra from the specified mzML file. The number of spectra for each MS level is listed below. Note that we subset the experiment to the first data file (using `[1]`) and then access -directly the spectra within this sample with the `spectra` function (which +directly the spectra within this sample with the `spectra()` function (which returns a `Spectra` object from the `r Biocpkg("Spectra")` package). Note that we use the pipe operator `|>` for better readability. @@ -111,7 +111,7 @@ table() ``` For the MS2 spectra we can get the m/z of the precursor ion with the -`precursorMz` function. Below we first subset the data set again to a single +`precursorMz()` function. Below we first subset the data set again to a single sample and filter to spectra from MS level 2 extracting then their precursor m/z values. @@ -123,7 +123,7 @@ precursorMz() |> head() ``` -With the `precursorIntensity` function it is also possible to extract the +With the `precursorIntensity()` function it is also possible to extract the intensity of the precursor ion. ```{r precursor-intensity} @@ -136,9 +136,9 @@ head() Some manufacturers (like Sciex) don't define/export the precursor intensity and thus either `NA` or `0` is reported. We can however use the -`estimatePrecursorIntensity` function from the `r Biocpkg("Spectra")` package to -determine the precursor intensity for a MS 2 spectrum based on the intensity of -the respective ion in the previous MS1 scan (note that with `method = +`estimatePrecursorIntensity()` function from the `r Biocpkg("Spectra")` package +to determine the precursor intensity for a MS 2 spectrum based on the intensity +of the respective ion in the previous MS1 scan (note that with `method = "interpolation"` the precursor intensity would be defined based on interpolation between the intensity in the previous and subsequent MS1 scan). Below we estimate the precursor intensities, on the full data (for MS1 spectra a `NA` @@ -149,7 +149,7 @@ prec_int <- estimatePrecursorIntensity(spectra(dda_data)) ``` We next set the precursor intensity in the spectrum metadata of `dda_data`. So -that it can be extracted later with the `precursorIntensity` function. +that it can be extracted later with the `precursorIntensity()` function. ```{r set-precursor-intensity} spectra(dda_data)$precursorIntensity <- prec_int @@ -162,8 +162,8 @@ head() ``` Next we perform the chromatographic peak detection on the MS level 1 data with -the `findChromPeaks` method. Below we define the settings for a *centWave*-based -peak detection and perform the analysis. +the `findChromPeaks()` method. Below we define the settings for a +*centWave*-based peak detection and perform the analysis. ```{r dda-find-chrom-peaks-ms1, message = FALSE} cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, @@ -179,7 +179,7 @@ corresponding MS2 spectra measured. Thus, for some of the ions (identified as MS1 chromatographic peaks) MS2 spectra are available. These can facilitate the annotation of the respective MS1 chromatographic peaks (or MS1 features after a correspondence analysis). Spectra for identified chromatographic peaks can be -extracted with the `chromPeakSpectra` method. MS2 spectra with their precursor +extracted with the `chromPeakSpectra()` method. MS2 spectra with their precursor m/z and retention time within the rt and m/z range of the chromatographic peak are returned. @@ -189,7 +189,7 @@ dda_spectra <- chromPeakSpectra(dda_data, msLevel = 2L) dda_spectra ``` -By default `chromPeakSpectra` returns all spectra associated with a MS1 +By default `chromPeakSpectra()` returns all spectra associated with a MS1 chromatographic peak, but parameter `method` allows to choose and return only one spectrum per peak (have a look at the `?chromPeakSpectra` help page for more details). Also, it would be possible to extract MS1 spectra for each peak by @@ -237,7 +237,7 @@ ex_spectra There are 5 MS2 spectra representing fragmentation of the ion(s) measured in our candidate chromatographic peak. We next reduce this to a single MS2 -spectrum using the `combineSpectra` method employing the `combinePeaks` +spectrum using the `combineSpectra()` method employing the `combinePeaks()` function to determine which peaks to keep in the resulting spectrum (have a look at the `?combinePeaks` help page for details). Parameter `f` allows to specify which spectra in the input object should be combined into one. Note that this @@ -270,7 +270,7 @@ collision energies of 0V, 10V, 20V and 40V are available. Below we import the respective data and plot our candidate spectrum against the MS2 spectra of Flumanezil and Fenamiphos (from a collision energy of 20V). To import files in MGF format we have to load the `r Biocpkg("MsBackendMgf")` Bioconductor package -which adds MGF file support to the `Spectra` package. +which adds MGF file support to the *Spectra* package. Prior plotting we *scale* our experimental spectra to replace all peak intensities with values relative to the maximum peak intensity (which is set to @@ -305,7 +305,7 @@ plotSpectraMirror(ex_spectrum, fenamiphos[3], main = "against Fenamiphos", Our candidate spectrum matches Fenamiphos, thus, our example chromatographic peak represents signal measured for this compound. In addition to plotting the spectra, we can also calculate similarities between them with the -`compareSpectra` method (which uses by default the normalized dot-product to +`compareSpectra()` method (which uses by default the normalized dot-product to calculate the similarity). ```{r dda-ms2-dotproduct} @@ -316,8 +316,8 @@ compareSpectra(ex_spectrum, fenamiphos, ppm = 40) Clearly, the candidate spectrum does not match Flumanezil, while it has a high similarity to Fenamiphos. While we performed here the MS2-based annotation on a single chromatographic peak, this could be easily extended to the full list of -MS2 spectra (returned by `chromPeakSpectra`) for all chromatographic peaks in an -experiment. See also [here](https://jorainer.github.io/SpectraTutorials/) or +MS2 spectra (returned by `chromPeakSpectra()`) for all chromatographic peaks in +an experiment. See also [here](https://jorainer.github.io/SpectraTutorials/) or [here](https://jorainer.github.io/MetaboAnnotationTutorials) for alternative tutorials on matching experimental fragment spectra against a reference. @@ -326,8 +326,8 @@ to perform a sample alignment and correspondence analysis. These tasks could however be performed similarly to *plain* LC-MS data, retention times of recorded MS2 spectra would however also be adjusted during alignment based on the MS1 data. After correspondence analysis (peak grouping) MS2 spectra for -*features* can be extracted with the `featureSpectra` function which returns all -MS2 spectra associated with any chromatographic peak of a feature. +*features* can be extracted with the `featureSpectra()` function which returns +all MS2 spectra associated with any chromatographic peak of a feature. Note also that this workflow can be included into the *Feature-Based Molecular Networking* @@ -342,7 +342,7 @@ for more details and examples. In this section we analyze a small SWATH data set consisting of a single mzML file with data from the same sample analyzed in the previous section but -recorded in SWATH mode. We again read the data with the `readMsExperiment` +recorded in SWATH mode. We again read the data with the `readMsExperiment()` function. The resulting object will contain all recorded MS1 and MS2 spectra in the specified file. Similar to the previous data file, we filter the file to signal between 230 and 610 seconds. @@ -373,7 +373,7 @@ available as additional *spectra variables*. Below we inspect the respective information for the first few spectra. The upper and lower isolation window m/z is available with spectra variables `"isolationWindowLowerMz"` and `"isolationWindowUpperMz"` respectively and the *target* m/z of the isolation -window with `"isolationWindowTargetMz"`. We can use the `spectraData` function +window with `"isolationWindowTargetMz"`. We can use the `spectraData()` function to extract this information from the spectra within our `swath_data` object. ```{r fdata-isolationwindow} @@ -385,7 +385,7 @@ head() ``` We could also access these variables directly with the dedicated -`isolationWindowLowerMz` and `isolationWindowUpperMz` functions. +`isolationWindowLowerMz()` and `isolationWindowUpperMz()` functions. ```{r} head(isolationWindowLowerMz(spectra(swath_data))) @@ -403,10 +403,10 @@ table(isolationWindowTargetMz(spectra(swath_data))) We have thus between 422 and 423 MS2 spectra measured in each isolation window. To inspect the data we can also extract chromatograms from both the measured MS1 -as well as MS2 data. For MS2 data we have to set parameter `msLevel = 2L` and in -addition also specify the isolation window from which we want to extract the -data. Below we extract the TIC of the MS1 data and of one of the isolation -windows (isolation window target m/z of 270.85) and plot these. +as well as MS2 data. For MS2 data we have to set parameter `msLevel = 2L` and, +for SWATH data, in addition also specify the isolation window from which we want +to extract the data. Below we extract the TIC of the MS1 data and of one of the +isolation windows (isolation window target m/z of 270.85) and plot these. ```{r, fig.cap = "TIC for MS1 (upper panel) and MS2 data from the isolation window with target m/z 270.85 (lower panel)."} tic_ms1 <- chromatogram(swath_data, msLevel = 1L, aggregationFun = "sum") @@ -417,12 +417,23 @@ plot(tic_ms1, main = "MS1") plot(tic_ms2, main = "MS2, isolation window m/z 270.85") ``` -Note that also DIA data from other manufacturers (e.g. Waters MSe) are supported -as long as a spectra variable `isolationWindowTargetMz` is available. If that -variable is not provided in the raw data files it can also be manually assigned -to each spectrum in the data set. In the example below the precursor m/z from -the individual spectra would be used as their `"isolationWindowTargetMz"` (note: -the line below is only shown as an example but not evaluated). +Without specifying the `isolationWindowTargetMz` parameter, all MS2 spectra +would be considered in the chromatogram extraction which would result in a +*chimeric* chromatogram such as the one shown below: + +```{r, fig.cap = "TIC considering **all** MS2 spectra (from all isolation windows)."} +tic_all_ms2 <- chromatogram(swath_data, msLevel = 2L, aggregationFun = "sum") +plot(tic_all_ms2, main = "MS2, all isolation windows") +``` + +For MS2 data without specific, **different**, m/z isolation windows (such as +e.g. Waters MSe data) parameter `isolationWindowTargetMz` can be omitted in the +`chromatograms()` call in which case, as already stated above, all MS2 spectra +are considered in the chromatogram calculation. Alternatively, if the isolation +window is not provided or specified in the original data files, it would be +possible to manually define a value for this spectra variable, such as in the +example below (from which the code is however not evaluated) were we assign the +value of the precursor m/z to the spectra's isolation window target m/z. ```{r, eval = FALSE} spectra(swath_data)$isolationWindowTargetMz <- precursorMz(spectra(swath_data)) @@ -432,9 +443,9 @@ spectra(swath_data)$isolationWindowTargetMz <- precursorMz(spectra(swath_data)) ## Chromatographic peak detection in MS1 and MS2 data Similar to a *conventional* LC-MS analysis, we perform first a chromatographic -peak detection (on the MS level 1 data) with the `findChromPeaks` method. Below -we define the settings for a *centWave*-based peak detection and perform the -analysis. +peak detection (on the MS level 1 data) with the `findChromPeaks()` +method. Below we define the settings for a *centWave*-based peak detection and +perform the analysis. ```{r find-chrom-peaks-ms1, message = FALSE} cwp <- CentWaveParam(snthresh = 5, noise = 100, ppm = 10, @@ -444,13 +455,13 @@ swath_data ``` Next we perform a chromatographic peak detection in MS level 2 data separately -for each individual isolation window. We use the `findChromPeaksIsolationWindow` -function employing the same peak detection algorithm reducing however the -required signal-to-noise ratio. The `isolationWindow` parameter allows to -specify which MS2 spectra belong to which isolation window and hence defines in -which set of MS2 spectra chromatographic peak detection should be performed. As -a default the `"isolationWindowTargetMz"` variable of the object's spectra is -used. +for each individual isolation window. We use the +`findChromPeaksIsolationWindow()` function employing the same peak detection +algorithm reducing however the required signal-to-noise ratio. The +`isolationWindow` parameter allows to specify which MS2 spectra belong to which +isolation window and hence defines in which set of MS2 spectra chromatographic +peak detection should be performed. As a default the `"isolationWindowTargetMz"` +variable of the object's spectra is used. ```{r find-chrom-peaks-ms2, message = FALSE} cwp <- CentWaveParam(snthresh = 3, noise = 10, ppm = 10, @@ -459,7 +470,7 @@ swath_data <- findChromPeaksIsolationWindow(swath_data, param = cwp) swath_data ``` -The `findChromPeaksIsolationWindow` function added all peaks identified in the +The `findChromPeaksIsolationWindow()` function added all peaks identified in the individual isolation windows to the `chromPeaks` matrix containing already the MS1 chromatographic peaks. These newly added peaks can be identified through the `"isolationWindow"` column in the object's `chromPeakData`. @@ -495,7 +506,7 @@ similar retention time and peak shape than the precursor's MS1 chromatographic peak. After detection of MS1 and MS2 chromatographic peaks has been performed, we can -reconstruct the MS2 spectra using the `reconstructChromPeakSpectra` +reconstruct the MS2 spectra using the `reconstructChromPeakSpectra()` function. This function defines an MS2 spectrum for each MS1 chromatographic peak based on the following approach: @@ -551,10 +562,10 @@ above conditions. Next we extract the ion chromatogram of the MS1 peak and of all selected candidate MS2 signals. To ensure chromatograms are extracted from spectra in the correct isolation window we need to specify the respective isolation window by passing its isolation window target m/z to the -`chromatogram` function (in addition to setting `msLevel = 2`). This can be done -by either getting the `isolationWindowTargetMz` of the spectra after the data -was subset using `filterIsolationWindow` (as done below) or by selecting the -`isolationWindowTargetMz` closest to the m/z of the compound of interest. +`chromatogram()` function (in addition to setting `msLevel = 2`). This can be +done by either getting the `isolationWindowTargetMz` of the spectra after the +data was subset using `filterIsolationWindow()` (as done below) or by selecting +the `isolationWindowTargetMz` closest to the m/z of the compound of interest. ```{r fena-eic-extract, warning = FALSE} rtr <- fenamiphos_ms1_peak[, c("rtmin", "rtmax")] @@ -573,7 +584,7 @@ table() ``` The target m/z of the isolation window containing the m/z of interest is thus -299.1 and we can use this in the `chromatogram` call below to extract the data +299.1 and we can use this in the `chromatogram()` call below to extract the data from the correct (MS2) spectra. ```{r} @@ -603,7 +614,7 @@ MS2 chromatographic peaks. Note that, because MS1 and MS2 spectra are recorded consecutively, the retention times of the individual data points will differ between the MS2 and MS1 chromatographic data and data points have thus to be matched (aligned) before performing the correlation analysis. This is done -automatically by the `correlate` function. See the help for the `align` method +automatically by the `correlate()` function. See the help for the `align` method for more information on alignment options. ```{r fena-cor} @@ -619,7 +630,7 @@ similarity to the MS1 chromatographic peaks, an MS2 spectrum could be peaks (i.e., using their `"mz"` and `"maxo"` or `"into"` values). Instead of performing this assignment of MS2 signal to MS1 chromatographic peaks -manually as above, we can use the `reconstructChromPeakSpectra` function that +manually as above, we can use the `reconstructChromPeakSpectra()` function that performs the exact same steps for all MS1 chromatographic peaks in a DIA data set. Below we use this function to reconstruct MS2 spectra for our example data requiring a peak shape correlation higher than `0.9` between the candidate MS2 @@ -688,15 +699,15 @@ pk_ids ``` With these peak IDs available we can extract their retention time window and m/z -ranges from the `chromPeaks` matrix and use the `chromatogram` function to +ranges from the `chromPeaks` matrix and use the `chromatogram()` function to extract their EIC. Note however that for SWATH data we have MS2 signal from different isolation windows. Thus we have to first filter the `swath_data` object by the isolation window containing the precursor m/z with the -`filterIsolationWindow` to subset the data to MS2 spectra related to the ion of -interest. In addition, we have to use `msLevel = 2L` in the `chromatogram` call -because `chromatogram` extracts by default only data from MS1 spectra and we -need to specify the target m/z of the isolation window containing the fragment -data from the compound of interest. +`filterIsolationWindow()` to subset the data to MS2 spectra related to the ion +of interest. In addition, we have to use `msLevel = 2L` in the `chromatogram()` +call because `chromatogram()` extracts by default only data from MS1 spectra and +we need to specify the target m/z of the isolation window containing the +fragment data from the compound of interest. ```{r} rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] @@ -807,8 +818,8 @@ plotSpectra(prochloraz_swath_spectrum) These could represent fragments from isotopes of the original compound. DIA MS2 data, since all ions at a given retention time are fragmented, can contain -fragments from isotopes. We thus below use the `isotopologues` function from the -`r Biocpkg("MetaboCoreUtils")` package to check for presence of potential +fragments from isotopes. We thus below use the `isotopologues()` function from +the `r Biocpkg("MetaboCoreUtils")` package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. ```{r}