From dc9ae94a7d07f6f2de6e7a6e5b7485466254b2b5 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Mon, 28 Oct 2024 14:36:04 +0100 Subject: [PATCH 01/16] Update xcms.Rmd --- vignettes/xcms.Rmd | 110 +++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 59 deletions(-) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 3f59a2e9..e2f6c5b7 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -62,7 +62,10 @@ be applied to the older *MSnbase*-based workflows (xcms version 3). Additional documents and tutorials covering also other topics of untargeted metabolomics analysis are listed at the end of this document. There is also a [xcms tutorial](https://jorainer.github.io/xcmsTutorials) available with more examples -and details. +and details. +To get a complete overview of LCMS-MS analysis, an end-to-end workflow +[Metabonaut website](https://rformassspectrometry.github.io/metabonaut/), which +integrate the *xcms* preprocessing steps with the downstream analysis, is available. # Preprocessing of LC-MS data @@ -1180,55 +1183,6 @@ defined above. The `filter` argument can accommodate various types of input, each determining the specific type of quality assessment and filtering to be performed. -The `RsdFilter` enable users to filter features based on their relative -standard deviation (coefficient of variation) for a specified `threshold`. It -is recommended to base the computation on quality control (QC) samples, -as demonstrated below: - -```{r} -# Set up parameters for RsdFilter -rsd_filter <- RsdFilter(threshold = 0.3, - qcIndex = sampleData(faahko)$sample_type == "QC") - -# Apply the filter to faakho object -filtered_faahko <- filterFeatures(object = faahko, filter = rsd_filter) - -# Now apply the same strategy to the res object -rsd_filter <- RsdFilter(threshold = 0.3, qcIndex = res$sample_type == "QC") -filtered_res <- filterFeatures(object = res, filter = rsd_filter, assay = "raw") -``` - -All features with an RSD (CV) strictly larger than 0.3 in QC samples were thus -removed from the data set. - -The `DratioFilter` can be used to filter features based on the D-ratio or -*dispersion ratio*, which compares the standard deviation in QC samples to that -in study samples. - -```{r} -# Set up parameters for DratioFilter -dratio_filter <- DratioFilter( - threshold = 0.5, - qcIndex = sampleData(filtered_faahko)$sample_type == "QC", - studyIndex = sampleData(filtered_faahko)$sample_type == "study") - -# Apply the filter to faahko object -filtered_faakho <- filterFeatures(object = filtered_faahko, - filter = dratio_filter) - -# Now same but for the res object -dratio_filter <- DratioFilter( - threshold = 0.5, - qcIndex = filtered_res$sample_type == "QC", - studyIndex = filtered_res$sample_type == "study") - -filtered_res <- filterFeatures(object = filtered_res, - filter = dratio_filter) -``` - -All features with an D-ratio strictly larger than 0.5 were thus removed from -the data set. - The `PercentMissingFilter` allows to filter features based on the percentage of missing values for each feature. This function takes as an input the parameter `f` which is supposed to be a vector of length equal to the length of the object @@ -1276,16 +1230,54 @@ samples. More information can be found in the documentation of the filter: ?BlankFlag ``` -## Normalization +The `RsdFilter` enable users to filter features based on their relative +standard deviation (coefficient of variation) for a specified `threshold`. It +is recommended to base the computation on quality control (QC) samples, +as demonstrated below: + +```{r} +# Set up parameters for RsdFilter +rsd_filter <- RsdFilter(threshold = 0.3, + qcIndex = sampleData(faahko)$sample_type == "QC") + +# Apply the filter to faakho object +filtered_faahko <- filterFeatures(object = faahko, filter = rsd_filter) + +# Now apply the same strategy to the res object +rsd_filter <- RsdFilter(threshold = 0.3, qcIndex = res$sample_type == "QC") +filtered_res <- filterFeatures(object = res, filter = rsd_filter, assay = "raw") +``` + +All features with an RSD (CV) strictly larger than 0.3 in QC samples were thus +removed from the data set. + +The `DratioFilter` can be used to filter features based on the D-ratio or +*dispersion ratio*, which compares the standard deviation in QC samples to that +in study samples. + +```{r} +# Set up parameters for DratioFilter +dratio_filter <- DratioFilter( + threshold = 0.5, + qcIndex = sampleData(filtered_faahko)$sample_type == "QC", + studyIndex = sampleData(filtered_faahko)$sample_type == "study") + +# Apply the filter to faahko object +filtered_faakho <- filterFeatures(object = filtered_faahko, + filter = dratio_filter) + +# Now same but for the res object +dratio_filter <- DratioFilter( + threshold = 0.5, + qcIndex = filtered_res$sample_type == "QC", + studyIndex = filtered_res$sample_type == "study") + +filtered_res <- filterFeatures(object = filtered_res, + filter = dratio_filter) +``` -Normalizing features' signal intensities is required, but at present not (yet) -supported in `xcms` (some methods might be added in near future). It is advised -to use the `SummarizedExperiment` returned by the `quantify()` method for any -further data processing, as this type of object stores feature definitions, -sample annotations as well as feature abundances in the same object. For the -identification of e.g. features with significant different -intensities/abundances it is suggested to use functionality provided in other R -packages, such as Bioconductor's excellent *limma* package. +All features with an D-ratio strictly larger than 0.5 were thus removed from +the data set. ## Alignment to an external reference dataset From ac3e209bf6a99775334229983aa92cc63aac1f78 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:17:13 +0100 Subject: [PATCH 02/16] Update xcms.Rmd --- vignettes/xcms.Rmd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index e2f6c5b7..02e595a2 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -1196,24 +1196,24 @@ Both examples are shown below: ```{r} # To set up parameter `f` to filter only based on QC samples -f <- sampleData(filtered_faakho)$sample_type +f <- sampleData(faahko)$sample_type f[f != "QC"] <- NA # To set up parameter `f` to filter per sample type excluding QC samples -f <- sampleData(filtered_faakho)$sample_type +f <- sampleData(faahko)$sample_type f[f == "QC"] <- NA missing_filter <- PercentMissingFilter(threshold = 30, f = f) # Apply the filter to faakho object -filtered_faakho <- filterFeatures(object = filtered_faakho, +filtered_faakho <- filterFeatures(object = faahko, filter = missing_filter) # Apply the filter to res object missing_filter <- PercentMissingFilter(threshold = 30, f = f) -filtered_res <- filterFeatures(object = filtered_res, +filtered_res <- filterFeatures(object = res, filter = missing_filter) ``` @@ -1238,14 +1238,14 @@ as demonstrated below: ```{r} # Set up parameters for RsdFilter rsd_filter <- RsdFilter(threshold = 0.3, - qcIndex = sampleData(faahko)$sample_type == "QC") + qcIndex = sampleData(filtered_faahko)$sample_type == "QC") # Apply the filter to faakho object -filtered_faahko <- filterFeatures(object = faahko, filter = rsd_filter) +filtered_faahko <- filterFeatures(object = filtered_faahko, filter = rsd_filter) # Now apply the same strategy to the res object -rsd_filter <- RsdFilter(threshold = 0.3, qcIndex = res$sample_type == "QC") -filtered_res <- filterFeatures(object = res, filter = rsd_filter, assay = "raw") +rsd_filter <- RsdFilter(threshold = 0.3, qcIndex = filtered_res$sample_type == "QC") +filtered_res <- filterFeatures(object = filtered_res, filter = rsd_filter, assay = "raw") ``` All features with an RSD (CV) strictly larger than 0.3 in QC samples were thus From 23dd061485a04091ddb466aa8040ad7fd1b788f9 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:36:32 +0100 Subject: [PATCH 03/16] Update xcms.Rmd --- vignettes/xcms.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 02e595a2..0382314b 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -1205,9 +1205,8 @@ f[f == "QC"] <- NA missing_filter <- PercentMissingFilter(threshold = 30, f = f) - # Apply the filter to faakho object -filtered_faakho <- filterFeatures(object = faahko, +filtered_faahko <- filterFeatures(object = faahko, filter = missing_filter) # Apply the filter to res object From 7c9e7d390e181445d48a05f2c3aa80d9f94e978b Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 30 Oct 2024 15:32:54 +0100 Subject: [PATCH 04/16] fix: .rt_model only use 2 columns instead of all --- R/do_adjustRtime-functions.R | 2 +- vignettes/xcms.Rmd | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R index 1fe5282c..b432daf2 100644 --- a/R/do_adjustRtime-functions.R +++ b/R/do_adjustRtime-functions.R @@ -874,7 +874,7 @@ NULL resid_ratio = 3, zero_weight = 10, bs = "tp"){ - rt_map <- rt_map[order(rt_map$obs), ] + rt_map <- rt_map[order(rt_map$obs), c("ref", "obs")] # add first row of c(0,0) to set a fix timepoint. rt_map <- rbind(c(0,0), rt_map) weights <- rep(1, nrow(rt_map)) diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd index 0382314b..02abbe13 100644 --- a/vignettes/xcms.Rmd +++ b/vignettes/xcms.Rmd @@ -1203,17 +1203,13 @@ f[f != "QC"] <- NA f <- sampleData(faahko)$sample_type f[f == "QC"] <- NA -missing_filter <- PercentMissingFilter(threshold = 30, - f = f) +missing_filter <- PercentMissingFilter(threshold = 30, f = f) # Apply the filter to faakho object -filtered_faahko <- filterFeatures(object = faahko, - filter = missing_filter) +filtered_faahko <- filterFeatures(object = faahko, filter = missing_filter) # Apply the filter to res object -missing_filter <- PercentMissingFilter(threshold = 30, - f = f) -filtered_res <- filterFeatures(object = res, - filter = missing_filter) +missing_filter <- PercentMissingFilter(threshold = 30, f = f) +filtered_res <- filterFeatures(object = res, filter = missing_filter) ``` Here, no feature was removed, meaning that all the features had less than 30% From cd7e00cdb09c44bd1c2a24ae5154027cfc49f441 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:41:12 +0100 Subject: [PATCH 05/16] add chromPeaks information to the Spectra output of chromPeakSpectra --- R/XcmsExperiment-functions.R | 4 +++- R/XcmsExperiment.R | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 931bcdb7..fdbed26e 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -793,7 +793,8 @@ "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, - peaks = integer(), BPPARAM = bpparam()) { + peaks = integer(), peaksInfo = c("rt", "mz"), + BPPARAM = bpparam()) { method <- match.arg(method) pks <- .chromPeaks(x)[, c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "maxo", "sample")] @@ -830,6 +831,7 @@ ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] res$peak_id <- ids + res2@backend@spectraData <- cbind(res2@backend@spectraData, info) res }, MoreArgs = list(msLevel = msLevel, method = method), diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index a094a716..6a222aec 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -515,6 +515,10 @@ #' indicating the identified chromatographic peaks. Only a single color #' is supported. Defaults to `peakCol = "#ff000060". #' +#' @param peaksInfo For `chromPeakSpectra`: `character` vector of additional +#' information from `chromPeaks()` to be added to the spectra object. The +#' columns names will be appended with "peaks_". +#' #' @param ppm For `chromPeaks` and `featureDefinitions`: optional `numeric(1)` #' specifying the ppm by which the m/z range (defined by `mz` should be #' extended. For a value of `ppm = 10`, all peaks within `mz[1] - ppm / 1e6` @@ -1228,7 +1232,7 @@ setMethod( function(object, method = c("all", "closest_rt", "closest_mz", "largest_tic", "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, - skipFilled = FALSE, peaks = character(), + skipFilled = FALSE, peaks = character(), peaksInfo = c("rt", "mz") return.type = c("Spectra", "List"), BPPARAM = bpparam()) { if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) @@ -1244,7 +1248,7 @@ setMethod( else pkidx <- integer() res <- .mse_spectra_for_peaks(object, method, msLevel, expandRt, expandMz, ppm, skipFilled, pkidx, - BPPARAM) + peaksInfo, BPPARAM) if (!length(pkidx)) peaks <- rownames(.chromPeaks(object)) else peaks <- rownames(.chromPeaks(object))[pkidx] From 7b82ba44e77b337422f94185479cabd42f46238b Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:41:22 +0100 Subject: [PATCH 06/16] Update XcmsExperiment-functions.R --- R/XcmsExperiment-functions.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index fdbed26e..d0f5c9e3 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -831,6 +831,8 @@ ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] res$peak_id <- ids + info <- pk[res$peak_id, peaksInfo] + colnames(info) <- paste("peak_", peaksInfo, sep = "") res2@backend@spectraData <- cbind(res2@backend@spectraData, info) res }, From 8403d58f06dbb0f2aecebcae2f4524d04d6cab32 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:58:13 +0100 Subject: [PATCH 07/16] lil fix --- DESCRIPTION | 3 ++- R/XcmsExperiment.R | 2 +- man/XcmsExperiment.Rd | 4 ++++ man/chromPeakSpectra.Rd | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 80956294..811e90ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -158,4 +158,5 @@ Collate: 'writemzdata.R' 'writemztab.R' 'xcmsSource.R' - 'zzz.R' \ No newline at end of file + 'zzz.R' + diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 6a222aec..c695da70 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1232,7 +1232,7 @@ setMethod( function(object, method = c("all", "closest_rt", "closest_mz", "largest_tic", "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, - skipFilled = FALSE, peaks = character(), peaksInfo = c("rt", "mz") + skipFilled = FALSE, peaks = character(), peaksInfo = c("rt", "mz"), return.type = c("Spectra", "List"), BPPARAM = bpparam()) { if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 9bfc0437..28516110 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -396,6 +396,10 @@ also parameter \code{type} below for additional information.} \item{keepFeatures}{for most subsetting functions (\code{[}, \code{filterFile}): \code{logical(1)}: wheter eventually present feature definitions should be retained in the returned (filtered) object.} + +\item{peaksInfo}{For \code{chromPeakSpectra}: \code{character} vector of additional +information from \code{chromPeaks()} to be added to the spectra object. The +columns names will be appended with "peaks_".} } \description{ The \code{XcmsExperiment} is a data container for \code{xcms} preprocessing results diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd index 40fef183..8cd9439a 100644 --- a/man/chromPeakSpectra.Rd +++ b/man/chromPeakSpectra.Rd @@ -18,6 +18,7 @@ chromPeakSpectra(object, ...) ppm = 0, skipFilled = FALSE, peaks = character(), + peaksInfo = c("rt", "mz"), return.type = c("Spectra", "List"), BPPARAM = bpparam() ) From 367aada9b967880ef2fe65e6978fee878c249106 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 20 Nov 2024 18:48:16 +0100 Subject: [PATCH 08/16] Update XcmsExperiment-functions.R --- R/XcmsExperiment-functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index d0f5c9e3..c8366696 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -833,7 +833,7 @@ res$peak_id <- ids info <- pk[res$peak_id, peaksInfo] colnames(info) <- paste("peak_", peaksInfo, sep = "") - res2@backend@spectraData <- cbind(res2@backend@spectraData, info) + res@backend@spectraData <- cbind(res@backend@spectraData, info) res }, MoreArgs = list(msLevel = msLevel, method = method), From 1d3e4824bda0633af3516f2d9cd8f9d976884767 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:04:56 +0100 Subject: [PATCH 09/16] update code chromPeakSpectra() --- R/XcmsExperiment-functions.R | 21 ++++++++++++++------- R/XcmsExperiment.R | 19 +++++++++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index c8366696..be67d750 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -793,7 +793,9 @@ "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, - peaks = integer(), peaksInfo = c("rt", "mz"), + peaks = integer(), + addColumnsChromPeaks = c("rt", "mz"), + addColumnsChrompeaksPrefix = "chrom_peak_", BPPARAM = bpparam()) { method <- match.arg(method) pks <- .chromPeaks(x)[, c("mz", "mzmin", "mzmax", "rt", @@ -819,7 +821,8 @@ res <- bpmapply( split.data.frame(pks, f), split(spectra(x), factor(fromFile(x), levels = levels(f))), - FUN = function(pk, sp, msLevel, method) { + FUN = function(pk, sp, msLevel, method, addColumnsChromPeaks, + addColumnsChrompeaksPrefix) { sp <- filterMsLevel(sp, msLevel) idx <- switch( method, @@ -830,13 +833,17 @@ largest_bpi = .spectra_index_list_largest_bpi(sp, pk, msLevel)) ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] - res$peak_id <- ids - info <- pk[res$peak_id, peaksInfo] - colnames(info) <- paste("peak_", peaksInfo, sep = "") - res@backend@spectraData <- cbind(res@backend@spectraData, info) + pk_data <- DataFrame(pk[ids, addColumnsChromPeaks, drop = FALSE]) + pk_data$id <- ids + colnames(pk_data) <- paste0(addColumnsChrompeaksPrefix, + colnames(pk_data)) + pk_data$spectrumId <- res$spectrumId + res <- Spectra::joinSpectraData(res, pk_data) res }, - MoreArgs = list(msLevel = msLevel, method = method), + MoreArgs = list(msLevel = msLevel, method = method, + addColumnsChromPeaks = addColumnsChromPeaks, + addColumnsChrompeaksPrefix = addColumnsChrompeaksPrefix), BPPARAM = BPPARAM) Spectra:::.concatenate_spectra(res) } diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index c695da70..1cbb8545 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1232,7 +1232,9 @@ setMethod( function(object, method = c("all", "closest_rt", "closest_mz", "largest_tic", "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, - skipFilled = FALSE, peaks = character(), peaksInfo = c("rt", "mz"), + skipFilled = FALSE, peaks = character(), + addColumnsChromPeaks = c("rt", "mz"), + addColumnsChromPeaksPrefix = "chrom_peak_", return.type = c("Spectra", "List"), BPPARAM = bpparam()) { if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) @@ -1248,14 +1250,19 @@ setMethod( else pkidx <- integer() res <- .mse_spectra_for_peaks(object, method, msLevel, expandRt, expandMz, ppm, skipFilled, pkidx, - peaksInfo, BPPARAM) + addColumnsChromPeaks, + addColumnsChromPeaksPrefix, + BPPARAM) if (!length(pkidx)) peaks <- rownames(.chromPeaks(object)) else peaks <- rownames(.chromPeaks(object))[pkidx] - if (return.type == "Spectra") - res <- res[as.matrix(findMatches(peaks, res$peak_id))[, 2L]] - else - as(split(res, factor(res$peak_id, levels = peaks)), "List") + if (return.type == "Spectra") { + col <- paste0(addColumnsChromPeaksPrefix, "id") + res <- res[as.matrix(findMatches(peaks, res[[col]]))[, 2L]] + } else { + col <- paste0(addColumnsChromPeaksPrefix, "id") + as(split(res, factor(res[[col]], levels = peaks)), "List") + } }) #' @rdname reconstructChromPeakSpectra From 971db33d443be730b63130ecfcf132e1a01343c8 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:00:18 +0100 Subject: [PATCH 10/16] addition of addCollmns.. parameters and documentation --- R/AllGenerics.R | 67 ++++++++++++++++++++++------ R/XcmsExperiment-functions.R | 34 ++++++++++---- R/XcmsExperiment.R | 21 +++++---- man/XcmsExperiment.Rd | 4 -- man/chromPeakSpectra.Rd | 38 +++++++++++----- man/featureSpectra.Rd | 39 ++++++++++++++-- tests/testthat/test_XcmsExperiment.R | 35 ++++++++------- vignettes/xcms-lcms-ms.Rmd | 25 +++++++---- 8 files changed, 192 insertions(+), 71 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index b8fb0a76..600934e1 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -411,14 +411,21 @@ setGeneric("chromPeakData<-", function(object, value) #' #' Parameter `return.type` allows to specify the *type* of the result object. #' With `return.type = "Spectra"` (the default) a [Spectra] object with all -#' matching spectra is returned. The spectra variable `"peak_id"` of the -#' returned `Spectra` contains the ID of the chromatographic peak (i.e., the -#' rowname of the peak in the `chromPeaks` matrix) for each spectrum. -#' With `return.type = "Spectra"` a `List` of `Spectra` is returned. The -#' length of the list is equal to the number of rows of `chromPeaks`. Each -#' element of the list contains thus a `Spectra` with all spectra for one -#' chromatographic peak (or a `Spectra` of length 0 if no spectrum was found -#' for the respective chromatographic peak). +#' matching spectra is returned. With `return.type = "Spectra"` a `List` of +#' `Spectra` is returned. The length of the list is equal to the number of rows +#' of `chromPeaks`. Each element of the list contains thus a `Spectra` with all +#' spectra for one chromatographic peak (or a `Spectra` of length 0 if no +#' spectrum was found for the respective chromatographic peak). +#' +#' Parameters `addColumnsChromPeaks` allow the user to add specific metadata +#' columns from the chromatographic peaks (`chromPeaks`) to the returned +#' spectra object. This can be useful to retain information such as retention +#' time (`rt`), m/z (`mz`). The columns will be named as they is written in the +#' `chromPeaks` object with a prefix that is defined by the parameter +#' `addColumnsChromPeaksPrefix`. The *peak ID* (i.e., the row name of the +#' peak in the `chromPeaks` matrix) is always added to the spectra object as +#' metadata column `paste0(addColumnsChromPeaksPrefix,id)`, by default it will +#' be `"chrom_peak_id"`. #' #' See also the *LC-MS/MS data analysis* vignette for more details and examples. #' @@ -453,6 +460,16 @@ setGeneric("chromPeakData<-", function(object, value) #' @param return.type `character(1)` defining the type of result object that #' should be returned. #' +#' @param addColumnsChromPeaks `character` vector with the names of the columns +#' from `chromPeaks` that should be added to the returned spectra object. +#' The columns will be named as they are written in the `chromPeaks` object +#' with a prefix that is defined by the parameter +#' `addColumnsChromPeaksPrefix`. Defaults to `c("mz", "rt")`. +#' +#' @param addColumnsChromPeaksPrefix `character(1)` defining the prefix that +#' should be used for the columns from `chromPeaks` that are added to the +#' returned spectra object. Defaults to `"chrom_peak_"`. +#' #' @param BPPARAM parallel processing setup. Defaults to [bpparam()]. #' #' @param ... ignored. @@ -503,7 +520,7 @@ setGeneric("chromPeakData<-", function(object, value) #' ## spectra variable *peak_id* contain the row names of the peaks in the #' ## chromPeak matrix and allow thus to map chromatographic peaks to the #' ## returned MS2 spectra -#' ms2_sps$peak_id +#' ms2_sps$chrom_peak_id #' chromPeaks(dda) #' #' ## Alternatively, return the result as a List of Spectra objects. This list @@ -799,10 +816,24 @@ setGeneric("featureDefinitions<-", function(object, value) #' spectrum **per chromatographic peak** will be returned (hence multiple #' spectra per feature). #' -#' The ID of each chromatographic peak (i.e. its row name in `chromPeaks`) -#' and each feature (i.e., its row name in `featureDefinitions`) are -#' available in the returned [Spectra()] with spectra variables `"peak_id"` -#' and `"feature_id"`, respectively. +#' The information from `featureDefinitions` for each feature can be included +#' in the returned [Spectra()] object using the `addColumnsFeatures` parameter. +#' This is useful for retaining details such as the median retention time (`rtmed`) +#' or median m/z (`mzmed`). The columns will retain their names as specified +#' in the `featureDefinitions` object, prefixed by the value of the +#' `addColumnsFeaturesPrefix` parameter. Additionally, the *feature ID* +#' (i.e., the row name of the feature in the `featureDefinitions` data.frame) +#' is always added as a metadata column with the name +#' `paste0(addColumnsFeaturesPrefix, "id")`, which defaults to `"feature_id"`. +#' +#' See also [chromPeakSpectra()], as it supports a similar parameter for +#' including columns from the chromatographic peaks in the returned spectra object. +#' These parameters can be used in combination to include information from both +#' the chromatographic peaks and the features in the returned [Spectra()]. +#' The *peak ID* (i.e., the row name of the peak in the `chromPeaks` matrix) +#' is added as a metadata column with the name +#' `paste0(addColumnsChromPeaksPrefix, "id")`, which defaults to +#' `"chrom_peak_id"`. #' #' @param object [XcmsExperiment] or [XCMSnExp] object with feature defitions. #' @@ -815,6 +846,16 @@ setGeneric("featureDefinitions<-", function(object, value) #' `featureDefinitions(x)`). This parameter overrides `skipFilled` and is #' only supported for `return.type` being either `"Spectra"` or `"List"`. #' +#' @param addColumnsFeatures `character` vector with the names of the columns +#' from `featureDefinitions` that should be added to the returned spectra +#' object. The columns will be named as they are written in the +#' `featureDefinitions` object with a prefix that is defined by the parameter +#' `addColumnsFeaturesPrefix`. Defaults to `c("mzmed", "rtmed")`. +#' +#' @param addColumnsFeaturesPrefix `character(1)` defining the prefix that +#' should be used for the columns from `featureDefinitions` that are added +#' to the returned spectra object. Defaults to `"feature_"`. +#' #' @param ... additional arguments to be passed along to [chromPeakSpectra()], #' such as `method`. #' diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index be67d750..5ffe1766 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -795,7 +795,7 @@ ppm = 0, skipFilled = FALSE, peaks = integer(), addColumnsChromPeaks = c("rt", "mz"), - addColumnsChrompeaksPrefix = "chrom_peak_", + addColumnsChromPeaksPrefix = "chrom_peak_", BPPARAM = bpparam()) { method <- match.arg(method) pks <- .chromPeaks(x)[, c("mz", "mzmin", "mzmax", "rt", @@ -822,8 +822,8 @@ split.data.frame(pks, f), split(spectra(x), factor(fromFile(x), levels = levels(f))), FUN = function(pk, sp, msLevel, method, addColumnsChromPeaks, - addColumnsChrompeaksPrefix) { - sp <- filterMsLevel(sp, msLevel) + addColumnsChromPeaksPrefix) { + sp <- Spectra::filterMsLevel(sp, msLevel) idx <- switch( method, all = .spectra_index_list(sp, pk, msLevel), @@ -833,21 +833,37 @@ largest_bpi = .spectra_index_list_largest_bpi(sp, pk, msLevel)) ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] - pk_data <- DataFrame(pk[ids, addColumnsChromPeaks, drop = FALSE]) - pk_data$id <- ids - colnames(pk_data) <- paste0(addColumnsChrompeaksPrefix, + pk_data <- pk[ids, addColumnsChromPeaks, drop = FALSE] + pk_data <- cbind(pk_data, id = ids) + colnames(pk_data) <- paste0(addColumnsChromPeaksPrefix, colnames(pk_data)) - pk_data$spectrumId <- res$spectrumId - res <- Spectra::joinSpectraData(res, pk_data) + res <- .add_spectra_data(res, pk_data) res }, MoreArgs = list(msLevel = msLevel, method = method, addColumnsChromPeaks = addColumnsChromPeaks, - addColumnsChrompeaksPrefix = addColumnsChrompeaksPrefix), + addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix), BPPARAM = BPPARAM) Spectra:::.concatenate_spectra(res) } +#' @param x `Spectra` object. +#' +#' @param data `data.frame` or `matrix` with the data to be added to the +#' spectra object. +#' +#' @noRd +.add_spectra_data <- function(x, data) { + if (is(data, "matrix")) + data <- as.data.frame(data) + if (nrow(data) != length(x)) + stop("Length of 'data' does not match the number of spectra in 'x'") + for (i in colnames(data)) { + x[[i]] <- data[, i] + } + x +} + #' @param peaks `matrix` with chrom peaks. #' #' @param peakIdx `list` of `integer` indices defining which chromatographic diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 1cbb8545..3607bcf4 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -515,10 +515,6 @@ #' indicating the identified chromatographic peaks. Only a single color #' is supported. Defaults to `peakCol = "#ff000060". #' -#' @param peaksInfo For `chromPeakSpectra`: `character` vector of additional -#' information from `chromPeaks()` to be added to the spectra object. The -#' columns names will be appended with "peaks_". -#' #' @param ppm For `chromPeaks` and `featureDefinitions`: optional `numeric(1)` #' specifying the ppm by which the m/z range (defined by `mz` should be #' extended. For a value of `ppm = 10`, all peaks within `mz[1] - ppm / 1e6` @@ -1784,7 +1780,11 @@ setMethod( "featureSpectra", "XcmsExperiment", function(object, msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, return.type = c("Spectra", "List"), - features = character(), ...) { + features = character(), + addColumnsFeatures = c("rtmed", "mzmed"), + addColumnsFeaturesPrefix = "feature_", + addColumnsChromPeaksPrefix = "chrom_peak_", + ...) { return.type <- match.arg(return.type) if (!hasFeatures(object)) stop("No feature definitions present. Please run ", @@ -1803,13 +1803,18 @@ setMethod( sps <- .mse_spectra_for_peaks( object, msLevel = msLevel, expandRt = expandRt, expandMz = expandMz, ppm = ppm, skipFilled = skipFilled, - peaks = unique(pindex), ...) + peaks = unique(pindex), + addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix) + col <- paste0(addColumnsChromPeaksPrefix, "id") mtch <- as.matrix( - findMatches(sps$peak_id, rownames(.chromPeaks(object))[pindex])) + findMatches(sps[[col]], rownames(.chromPeaks(object))[pindex])) sps <- sps[mtch[, 1L]] fid <- rep( ufeatures, lengths(featureDefinitions(object)$peakidx[findex])) - sps$feature_id <- fid[mtch[, 2L]] + f_data <- featureDefinitions(object)[fid[mtch[, 2L]], addColumnsFeatures] + f_data$id <- fid[mtch[, 2L]] + colnames(f_data) <- paste0(addColumnsFeaturesPrefix, colnames(f_data)) + sps <- .add_spectra_data(sps, f_data) if (return.type == "List") { sps <- List(split(sps, f = factor(sps$feature_id, levels = ufeatures))) diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd index 28516110..9bfc0437 100644 --- a/man/XcmsExperiment.Rd +++ b/man/XcmsExperiment.Rd @@ -396,10 +396,6 @@ also parameter \code{type} below for additional information.} \item{keepFeatures}{for most subsetting functions (\code{[}, \code{filterFile}): \code{logical(1)}: wheter eventually present feature definitions should be retained in the returned (filtered) object.} - -\item{peaksInfo}{For \code{chromPeakSpectra}: \code{character} vector of additional -information from \code{chromPeaks()} to be added to the spectra object. The -columns names will be appended with "peaks_".} } \description{ The \code{XcmsExperiment} is a data container for \code{xcms} preprocessing results diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd index 8cd9439a..832addc8 100644 --- a/man/chromPeakSpectra.Rd +++ b/man/chromPeakSpectra.Rd @@ -18,7 +18,8 @@ chromPeakSpectra(object, ...) ppm = 0, skipFilled = FALSE, peaks = character(), - peaksInfo = c("rt", "mz"), + addColumnsChromPeaks = c("rt", "mz"), + addColumnsChromPeaksPrefix = "chrom_peak_", return.type = c("Spectra", "List"), BPPARAM = bpparam() ) @@ -66,6 +67,16 @@ be returned (providing either their ID, a logical vector same length than \code{nrow(chromPeaks(x))} or their index in \code{chromPeaks(x)}). This parameter overrides \code{skipFilled}.} +\item{addColumnsChromPeaks}{\code{character} vector with the names of the columns +from \code{chromPeaks} that should be added to the returned spectra object. +The columns will be named as they are written in the \code{chromPeaks} object +with a prefix that is defined by the parameter +\code{addColumnsChromPeaksPrefix}. Defaults to \code{c("mz", "rt")}.} + +\item{addColumnsChromPeaksPrefix}{\code{character(1)} defining the prefix that +should be used for the columns from \code{chromPeaks} that are added to the +returned spectra object. Defaults to \code{"chrom_peak_"}.} + \item{return.type}{\code{character(1)} defining the type of result object that should be returned.} @@ -125,14 +136,21 @@ signal (\code{"maxo"}); only supported for \code{msLevel = 2L}. Parameter \code{return.type} allows to specify the \emph{type} of the result object. With \code{return.type = "Spectra"} (the default) a \link{Spectra} object with all -matching spectra is returned. The spectra variable \code{"peak_id"} of the -returned \code{Spectra} contains the ID of the chromatographic peak (i.e., the -rowname of the peak in the \code{chromPeaks} matrix) for each spectrum. -With \code{return.type = "Spectra"} a \code{List} of \code{Spectra} is returned. The -length of the list is equal to the number of rows of \code{chromPeaks}. Each -element of the list contains thus a \code{Spectra} with all spectra for one -chromatographic peak (or a \code{Spectra} of length 0 if no spectrum was found -for the respective chromatographic peak). +matching spectra is returned. With \code{return.type = "Spectra"} a \code{List} of +\code{Spectra} is returned. The length of the list is equal to the number of rows +of \code{chromPeaks}. Each element of the list contains thus a \code{Spectra} with all +spectra for one chromatographic peak (or a \code{Spectra} of length 0 if no +spectrum was found for the respective chromatographic peak). + +Parameters \code{addColumnsChromPeaks} allow the user to add specific metadata +columns from the chromatographic peaks (\code{chromPeaks}) to the returned +spectra object. This can be useful to retain information such as retention +time (\code{rt}), m/z (\code{mz}). The columns will be named as they is written in the +\code{chromPeaks} object with a prefix that is defined by the parameter +\code{addColumnsChromPeaksPrefix}. The \emph{peak ID} (i.e., the row name of the +peak in the \code{chromPeaks} matrix) is always added to the spectra object as +metadata column \code{paste0(addColumnsChromPeaksPrefix,id)}, by default it will +be \code{"chrom_peak_id"}. See also the \emph{LC-MS/MS data analysis} vignette for more details and examples. } @@ -155,7 +173,7 @@ ms2_sps ## spectra variable *peak_id* contain the row names of the peaks in the ## chromPeak matrix and allow thus to map chromatographic peaks to the ## returned MS2 spectra -ms2_sps$peak_id +ms2_sps$chrom_peak_id chromPeaks(dda) ## Alternatively, return the result as a List of Spectra objects. This list diff --git a/man/featureSpectra.Rd b/man/featureSpectra.Rd index 047f06ba..d5cfec9f 100644 --- a/man/featureSpectra.Rd +++ b/man/featureSpectra.Rd @@ -18,6 +18,9 @@ featureSpectra(object, ...) skipFilled = FALSE, return.type = c("Spectra", "List"), features = character(), + addColumnsFeatures = c("rtmed", "mzmed"), + addColumnsFeaturesPrefix = "feature_", + addColumnsChromPeaksPrefix = "chrom_peak_", ... ) @@ -63,6 +66,20 @@ be returned (providing either their ID, a logical vector same length than \code{nrow(featureDefinitions(x))} or their index in \code{featureDefinitions(x)}). This parameter overrides \code{skipFilled} and is only supported for \code{return.type} being either \code{"Spectra"} or \code{"List"}.} + +\item{addColumnsFeatures}{\code{character} vector with the names of the columns +from \code{featureDefinitions} that should be added to the returned spectra +object. The columns will be named as they are written in the +\code{featureDefinitions} object with a prefix that is defined by the parameter +\code{addColumnsFeaturesPrefix}. Defaults to \code{c("mzmed", "rtmed")}.} + +\item{addColumnsFeaturesPrefix}{\code{character(1)} defining the prefix that +should be used for the columns from \code{featureDefinitions} that are added +to the returned spectra object. Defaults to \code{"feature_"}.} + +\item{addColumnsChromPeaksPrefix}{\code{character(1)} defining the prefix that +should be used for the columns from \code{chromPeaks} that are added to the +returned spectra object. Defaults to \code{"chrom_peak_"}.} } \value{ The function returns either a \code{\link[=Spectra]{Spectra()}} (for \code{return.type = "Spectra"}) @@ -96,10 +113,24 @@ feature are returned. With any other option for \code{method}, a single spectrum \strong{per chromatographic peak} will be returned (hence multiple spectra per feature). -The ID of each chromatographic peak (i.e. its row name in \code{chromPeaks}) -and each feature (i.e., its row name in \code{featureDefinitions}) are -available in the returned \code{\link[=Spectra]{Spectra()}} with spectra variables \code{"peak_id"} -and \code{"feature_id"}, respectively. +The information from \code{featureDefinitions} for each feature can be included +in the returned \code{\link[=Spectra]{Spectra()}} object using the \code{addColumnsFeatures} parameter. +This is useful for retaining details such as the median retention time (\code{rtmed}) +or median m/z (\code{mzmed}). The columns will retain their names as specified +in the \code{featureDefinitions} object, prefixed by the value of the +\code{addColumnsFeaturesPrefix} parameter. Additionally, the \emph{feature ID} +(i.e., the row name of the feature in the \code{featureDefinitions} data.frame) +is always added as a metadata column with the name +\code{paste0(addColumnsFeaturesPrefix, "id")}, which defaults to \code{"feature_id"}. + +See also \code{\link[=chromPeakSpectra]{chromPeakSpectra()}}, as it supports a similar parameter for +including columns from the chromatographic peaks in the returned spectra object. +These parameters can be used in combination to include information from both +the chromatographic peaks and the features in the returned \code{\link[=Spectra]{Spectra()}}. +The \emph{peak ID} (i.e., the row name of the peak in the \code{chromPeaks} matrix) +is added as a metadata column with the name +\code{paste0(addColumnsChromPeaksPrefix, "id")}, which defaults to +\code{"chrom_peak_id"}. } \author{ Johannes Rainer diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 272a58db..87d3a959 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -912,22 +912,22 @@ test_that(".spectra_index_list_closest_mz works", { test_that(".mse_spectra_for_peaks works", { res <- .mse_spectra_for_peaks(xmse) expect_s4_class(res, "Spectra") - expect_true(any(spectraVariables(res) == "peak_id")) + expect_true(any(spectraVariables(res) == "chrom_peak_id")) expect_true(length(res) == 0) res <- .mse_spectra_for_peaks(xmse, msLevel = 1L, method = "closest_rt") expect_s4_class(res, "Spectra") - expect_true(any(spectraVariables(res) == "peak_id")) + expect_true(any(spectraVariables(res) == "chrom_peak_id")) expect_true(length(res) == nrow(chromPeaks(xmse))) res <- .mse_spectra_for_peaks(xmse, msLevel = 1L, method = "all", peaks = 220) - expect_true(all(res$peak_id == "CP220")) + expect_true(all(res$chrom_peak_id == "CP220")) ## Duplicates index? res <- .mse_spectra_for_peaks(xmse, msLevel = 1L, method = "closest_rt", peaks = c(3, 2, 3, 3, 1)) - expect_equal(res$peak_id, c("CP003", "CP002", "CP003", "CP003", "CP001")) + expect_equal(res$chrom_peak_id, c("CP003", "CP002", "CP003", "CP003", "CP001")) expect_equal(rtime(res)[1], rtime(res)[3]) expect_equal(mz(res)[1], mz(res)[3]) }) @@ -948,32 +948,34 @@ test_that("chromPeakSpectra works", { expect_equal(names(res), pks) res <- chromPeakSpectra(xmse, peaks = pks, msLevel = 1L) expect_s4_class(res, "Spectra") - expect_equal(unique(res$peak_id), pks) + expect_equal(unique(res$chrom_peak_id), pks) res2 <- chromPeakSpectra(xmse, msLevel = 1L, method = "closest_rt") expect_equal(length(res2), nrow(chromPeaks(xmse))) - expect_equal(res2$peak_id, rownames(chromPeaks(xmse))) + expect_equal(res2$chrom_peak_id, rownames(chromPeaks(xmse))) res2 <- chromPeakSpectra(xmse, msLevel = 1L, method = "largest_tic", peaks = pks) expect_equal(length(res2), length(pks)) - expect_equal(res2$peak_id, pks) - ic <- split(ionCount(res), factor(res$peak_id, levels = pks)) + expect_equal(res2$chrom_peak_id, pks) + ic <- split(ionCount(res), factor(res$chrom_peak_id, levels = pks)) idx <- vapply(ic, which.max, integer(1)) - expect_equal(rtime(res2[1L]), rtime(res[res$peak_id == pks[1L]])[idx[1L]]) - expect_equal(rtime(res2[2L]), rtime(res[res$peak_id == pks[2L]])[idx[2L]]) - expect_equal(rtime(res2[3L]), rtime(res[res$peak_id == pks[3L]])[idx[3L]]) + expect_equal(rtime(res2[1L]), rtime(res[res$chrom_peak_id == pks[1L]])[idx[1L]]) + expect_equal(rtime(res2[2L]), rtime(res[res$chrom_peak_id == pks[2L]])[idx[2L]]) + expect_equal(rtime(res2[3L]), rtime(res[res$chrom_peak_id == pks[3L]])[idx[3L]]) res2 <- chromPeakSpectra(xmse, msLevel = 1L, method = "largest_bpi", peaks = pks, return.type = "List") expect_equal(length(res2), length(pks)) expect_equal(names(res2), pks) expect_true(all(lengths(res2) == 1L)) - bpi <- split(max(intensity(res)), factor(res$peak_id, levels = pks)) + bpi <- split(max(intensity(res)), factor(res$chrom_peak_id, levels = pks)) idx <- vapply(bpi, which.max, integer(1)) - expect_equal(rtime(res2[[1L]]), rtime(res[res$peak_id == pks[1L]])[idx[1L]]) - expect_equal(rtime(res2[[2L]]), rtime(res[res$peak_id == pks[2L]])[idx[2L]]) - expect_equal(rtime(res2[[3L]]), rtime(res[res$peak_id == pks[3L]])[idx[3L]]) + expect_equal(rtime(res2[[1L]]), rtime(res[res$chrom_peak_id == pks[1L]])[idx[1L]]) + expect_equal(rtime(res2[[2L]]), rtime(res[res$chrom_peak_id == pks[2L]])[idx[2L]]) + expect_equal(rtime(res2[[3L]]), rtime(res[res$chrom_peak_id == pks[3L]])[idx[3L]]) + expect(all(c("chrom_peak_id", "chrom_peak_mz", "chrom_peak_rt") %in% + spectraVariables(res2))) ## DDA data fl <- system.file("TripleTOF-SWATH/PestMix1_DDA.mzML", package = "msdata") @@ -1083,6 +1085,9 @@ test_that("featureSpectra works", { expect_s4_class(res_all, "Spectra") expect_true(all(rownames(featureDefinitions(xmseg)) %in% res_all$feature_id)) + expect_true(all(c("chrom_peak_rt", "chrom_peak_mz", "chrom_peak_id", + "feature_rtmed", "feature_mzmed", "feature_id") %in% + spectraVariables(res_all))) res_all <- featureSpectra(xmseg, msLevel = 1L, method = "closest_rt", return.type = "List") diff --git a/vignettes/xcms-lcms-ms.Rmd b/vignettes/xcms-lcms-ms.Rmd index 93486237..febad8e7 100644 --- a/vignettes/xcms-lcms-ms.Rmd +++ b/vignettes/xcms-lcms-ms.Rmd @@ -197,12 +197,21 @@ specifying `msLevel = 1L` in the call above (e.g. to evaluate the full MS1 signal at the peak's apex position). The returned `Spectra` contains also the reference to the respective -chromatographic peak as additional *spectra variable* `"peak_id"` that contains +chromatographic peak as additional *spectra variable* `"chrom_peak_id"` that contains the identifier for the chromatographic peak (i.e. its row name in the -`chromPeaks` matrix). +`chromPeaks` matrix). -```{r peak_id} -dda_spectra$peak_id +```{r chrom_peak_id} +dda_spectra$chrom_peak_id +``` + +Some information about the chromatographic peak can also be added to the +returned `Spectra` object using the `addChromPeaksColumns` parameter in +`chromPeakSpectra()`. By default, the m/z and retention time of the +chromatographic peak are added to the spectra metadata. + +```{r chrom_peak_mz} +dda_spectra$chrom_peak_mz ``` Note also that with `return.type = "List"` a list parallel to the `chromPeaks` @@ -231,7 +240,7 @@ chromatographic peak using the ID of the peak in the present data set. ```{r dda-ms2-get-ms2, message = FALSE} ex_id <- rownames(chromPeaks(dda_data, mz = ex_mz, ppm = 20)) -ex_spectra <- dda_spectra[dda_spectra$peak_id == ex_id] +ex_spectra <- dda_spectra[dda_spectra$chrom_peak_id == ex_id] ex_spectra ``` @@ -248,7 +257,7 @@ generally the best approach or suggested for all types of data. ex_spectrum <- combineSpectra(ex_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, - f = ex_spectra$peak_id) + f = ex_spectra$chrom_peak_id) ex_spectrum ``` @@ -769,7 +778,7 @@ match almost perfectly. Next we get the MS2 spectra for this peak. ```{r pro-dda-ms2} prochloraz_dda_spectra <- dda_spectra[ - dda_spectra$peak_id == rownames(prochloraz_dda_peak)] + dda_spectra$chrom_peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra ``` @@ -780,7 +789,7 @@ peaks. Next we combine them into a consensus spectrum. prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, - f = prochloraz_dda_spectra$peak_id) + f = prochloraz_dda_spectra$chrom_peak_id) ``` At last we load also the Prochloraz MS2 spectra (for different collision From d117184e91ab94760df85e79e35422879aff0ffb Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:23:24 +0100 Subject: [PATCH 11/16] fix dots handling --- R/XcmsExperiment.R | 2 +- tests/testthat/test_XcmsExperiment.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 3607bcf4..06a13f1f 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1804,7 +1804,7 @@ setMethod( object, msLevel = msLevel, expandRt = expandRt, expandMz = expandMz, ppm = ppm, skipFilled = skipFilled, peaks = unique(pindex), - addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix) + addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix, ...) col <- paste0(addColumnsChromPeaksPrefix, "id") mtch <- as.matrix( findMatches(sps[[col]], rownames(.chromPeaks(object))[pindex])) diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 87d3a959..57703518 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -963,6 +963,8 @@ test_that("chromPeakSpectra works", { expect_equal(rtime(res2[1L]), rtime(res[res$chrom_peak_id == pks[1L]])[idx[1L]]) expect_equal(rtime(res2[2L]), rtime(res[res$chrom_peak_id == pks[2L]])[idx[2L]]) expect_equal(rtime(res2[3L]), rtime(res[res$chrom_peak_id == pks[3L]])[idx[3L]]) + expect_true(all(c("chrom_peak_id", "chrom_peak_mz", "chrom_peak_rt") %in% + spectraVariables(res2))) res2 <- chromPeakSpectra(xmse, msLevel = 1L, method = "largest_bpi", peaks = pks, return.type = "List") @@ -974,8 +976,6 @@ test_that("chromPeakSpectra works", { expect_equal(rtime(res2[[1L]]), rtime(res[res$chrom_peak_id == pks[1L]])[idx[1L]]) expect_equal(rtime(res2[[2L]]), rtime(res[res$chrom_peak_id == pks[2L]])[idx[2L]]) expect_equal(rtime(res2[[3L]]), rtime(res[res$chrom_peak_id == pks[3L]])[idx[3L]]) - expect(all(c("chrom_peak_id", "chrom_peak_mz", "chrom_peak_rt") %in% - spectraVariables(res2))) ## DDA data fl <- system.file("TripleTOF-SWATH/PestMix1_DDA.mzML", package = "msdata") @@ -1107,10 +1107,10 @@ test_that("featureSpectra works", { res_2 <- featureSpectra(xmseg, msLevel = 1L, features = c("FT03", "FT01"), return.type = "List") expect_true(length(res[[1L]]) < length(res_2[[1L]])) - expect_true(all(res[[1L]]$peak_id %in% res_2[[1L]]$peak_id)) + expect_true(all(res[[1L]]$chrom_peak_id %in% res_2[[1L]]$chrom_peak_id)) expect_equal(unique(res[[1L]]$feature_id), unique(res_2[[1L]]$feature_id)) expect_true(length(res[[2L]]) < length(res_2[[2L]])) - expect_true(all(res[[2L]]$peak_id %in% res_2[[2L]]$peak_id)) + expect_true(all(res[[2L]]$chrom_peak_id %in% res_2[[2L]]$chrom_peak_id)) expect_equal(unique(res[[2L]]$feature_id), unique(res_2[[2L]]$feature_id)) }) From f0faca9f40cedecec3c0f009ed065e2a55c8223c Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 21 Nov 2024 21:04:05 +0100 Subject: [PATCH 12/16] version bump --- DESCRIPTION | 2 +- NEWS.md | 15 ++++++++++++--- R/XcmsExperiment-functions.R | 7 ++++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 811e90ab..9cba15a2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.5.0 +Version: 4.5.1 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF, diff --git a/NEWS.md b/NEWS.md index 536bc58e..af48a5a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,15 @@ -# xcms 4.3 +# xcms 4.5 + +## Changes in version 4.5.1 + +- Small update to `featureSpectra()` and `chromPeakSpectra()` to allow addition + of `chromPeaks()` and `featuresDefinitions()` columns to be added to the + `Spectra` output. +- Tidied the `xcms` vignette, to order the filtering of features and remove + the outdated normalisation paragraph.In depth discussion on this subject can + be found on `metabonaut`. + +# 4.3 ## Changes in version 4.3.4 @@ -7,8 +18,6 @@ - Small fix to the .yml file for the github actions, so they do not crash on warnings. - - ## Changes in version 4.3.3 - Fix issue #755: `chromatogram()` with `msLevel = 2` fails to extract diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 5ffe1766..0fbf9adc 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -823,7 +823,7 @@ split(spectra(x), factor(fromFile(x), levels = levels(f))), FUN = function(pk, sp, msLevel, method, addColumnsChromPeaks, addColumnsChromPeaksPrefix) { - sp <- Spectra::filterMsLevel(sp, msLevel) + sp <- filterMsLevel(sp, msLevel) idx <- switch( method, all = .spectra_index_list(sp, pk, msLevel), @@ -833,8 +833,9 @@ largest_bpi = .spectra_index_list_largest_bpi(sp, pk, msLevel)) ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] - pk_data <- pk[ids, addColumnsChromPeaks, drop = FALSE] - pk_data <- cbind(pk_data, id = ids) + pk_data <- as.data.frame(pk[ids, addColumnsChromPeaks, + drop = FALSE]) + pk_data$id <- ids colnames(pk_data) <- paste0(addColumnsChromPeaksPrefix, colnames(pk_data)) res <- .add_spectra_data(res, pk_data) From 5f8a4f2e28c50182866cb6ade7b1fce1abd343ca Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Fri, 22 Nov 2024 11:49:59 +0100 Subject: [PATCH 13/16] jo comments --- R/AllGenerics.R | 48 +++++++++++++----------------------- R/XcmsExperiment-functions.R | 15 ++++------- R/XcmsExperiment.R | 32 +++++++++--------------- man/chromPeakSpectra.Rd | 24 ++++++------------ man/featureSpectra.Rd | 35 +++++++++----------------- vignettes/xcms-lcms-ms.Rmd | 2 +- 6 files changed, 54 insertions(+), 102 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 600934e1..ff86fd99 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -417,15 +417,13 @@ setGeneric("chromPeakData<-", function(object, value) #' spectra for one chromatographic peak (or a `Spectra` of length 0 if no #' spectrum was found for the respective chromatographic peak). #' -#' Parameters `addColumnsChromPeaks` allow the user to add specific metadata +#' Parameters `chromPeakColumns` allow the user to add specific metadata #' columns from the chromatographic peaks (`chromPeaks`) to the returned #' spectra object. This can be useful to retain information such as retention #' time (`rt`), m/z (`mz`). The columns will be named as they is written in the -#' `chromPeaks` object with a prefix that is defined by the parameter -#' `addColumnsChromPeaksPrefix`. The *peak ID* (i.e., the row name of the -#' peak in the `chromPeaks` matrix) is always added to the spectra object as -#' metadata column `paste0(addColumnsChromPeaksPrefix,id)`, by default it will -#' be `"chrom_peak_id"`. +#' `chromPeaks` object with the prefix `"chrom_peak_"`. The *peak ID* +#' (i.e., the row name of the peak in the `chromPeaks` matrix) is always added +#' to the spectra object as a metadata column named `"chrom_peak_id"`. #' #' See also the *LC-MS/MS data analysis* vignette for more details and examples. #' @@ -460,15 +458,10 @@ setGeneric("chromPeakData<-", function(object, value) #' @param return.type `character(1)` defining the type of result object that #' should be returned. #' -#' @param addColumnsChromPeaks `character` vector with the names of the columns +#' @param chromPeakColumns `character` vector with the names of the columns #' from `chromPeaks` that should be added to the returned spectra object. #' The columns will be named as they are written in the `chromPeaks` object -#' with a prefix that is defined by the parameter -#' `addColumnsChromPeaksPrefix`. Defaults to `c("mz", "rt")`. -#' -#' @param addColumnsChromPeaksPrefix `character(1)` defining the prefix that -#' should be used for the columns from `chromPeaks` that are added to the -#' returned spectra object. Defaults to `"chrom_peak_"`. +#' with a prefix `"chrom_peak_"`. Defaults to `c("mz", "rt")`. #' #' @param BPPARAM parallel processing setup. Defaults to [bpparam()]. #' @@ -517,7 +510,7 @@ setGeneric("chromPeakData<-", function(object, value) #' ms2_sps <- chromPeakSpectra(dda) #' ms2_sps #' -#' ## spectra variable *peak_id* contain the row names of the peaks in the +#' ## spectra variable *chrom_peak_id* contain the row names of the peaks in the #' ## chromPeak matrix and allow thus to map chromatographic peaks to the #' ## returned MS2 spectra #' ms2_sps$chrom_peak_id @@ -817,23 +810,20 @@ setGeneric("featureDefinitions<-", function(object, value) #' spectra per feature). #' #' The information from `featureDefinitions` for each feature can be included -#' in the returned [Spectra()] object using the `addColumnsFeatures` parameter. +#' in the returned [Spectra()] object using the `featureColumns` parameter. #' This is useful for retaining details such as the median retention time (`rtmed`) #' or median m/z (`mzmed`). The columns will retain their names as specified -#' in the `featureDefinitions` object, prefixed by the value of the -#' `addColumnsFeaturesPrefix` parameter. Additionally, the *feature ID* -#' (i.e., the row name of the feature in the `featureDefinitions` data.frame) -#' is always added as a metadata column with the name -#' `paste0(addColumnsFeaturesPrefix, "id")`, which defaults to `"feature_id"`. +#' in the `featureDefinitions` object, prefixed by `"feature_"` +#' (e.g., `"feature_mzmed"`). Additionally, the *feature ID* (i.e., the row +#' name of the feature in the `featureDefinitions` data.frame) is always added +#' as a metadata column named `"feature_id"`. #' #' See also [chromPeakSpectra()], as it supports a similar parameter for #' including columns from the chromatographic peaks in the returned spectra object. #' These parameters can be used in combination to include information from both #' the chromatographic peaks and the features in the returned [Spectra()]. #' The *peak ID* (i.e., the row name of the peak in the `chromPeaks` matrix) -#' is added as a metadata column with the name -#' `paste0(addColumnsChromPeaksPrefix, "id")`, which defaults to -#' `"chrom_peak_id"`. +#' is added as a metadata column named `"chrom_peak_id"`. #' #' @param object [XcmsExperiment] or [XCMSnExp] object with feature defitions. #' @@ -846,15 +836,11 @@ setGeneric("featureDefinitions<-", function(object, value) #' `featureDefinitions(x)`). This parameter overrides `skipFilled` and is #' only supported for `return.type` being either `"Spectra"` or `"List"`. #' -#' @param addColumnsFeatures `character` vector with the names of the columns +#' @param featureColumns `character` vector with the names of the columns #' from `featureDefinitions` that should be added to the returned spectra #' object. The columns will be named as they are written in the -#' `featureDefinitions` object with a prefix that is defined by the parameter -#' `addColumnsFeaturesPrefix`. Defaults to `c("mzmed", "rtmed")`. -#' -#' @param addColumnsFeaturesPrefix `character(1)` defining the prefix that -#' should be used for the columns from `featureDefinitions` that are added -#' to the returned spectra object. Defaults to `"feature_"`. +#' `featureDefinitions` object with the prefix `"feature_`. +#' Defaults to `c("mzmed", "rtmed")`. #' #' @param ... additional arguments to be passed along to [chromPeakSpectra()], #' such as `method`. @@ -866,7 +852,7 @@ setGeneric("featureDefinitions<-", function(object, value) #' the order and the length matches parameter `features` (or if no `features` #' is defined the order of the features in `featureDefinitions(object)`). #' -#' Spectra variables `"peak_id"` and `"feature_id"` define to which +#' Spectra variables `"chrom_peak_id"` and `"feature_id"` define to which #' chromatographic peak or feature each individual spectrum is associated #' with. #' diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 0fbf9adc..1b4ea2d1 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -794,8 +794,7 @@ msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, peaks = integer(), - addColumnsChromPeaks = c("rt", "mz"), - addColumnsChromPeaksPrefix = "chrom_peak_", + chromPeakColumns = c("rt", "mz"), BPPARAM = bpparam()) { method <- match.arg(method) pks <- .chromPeaks(x)[, c("mz", "mzmin", "mzmax", "rt", @@ -821,8 +820,7 @@ res <- bpmapply( split.data.frame(pks, f), split(spectra(x), factor(fromFile(x), levels = levels(f))), - FUN = function(pk, sp, msLevel, method, addColumnsChromPeaks, - addColumnsChromPeaksPrefix) { + FUN = function(pk, sp, msLevel, method, chromPeakColumns) { sp <- filterMsLevel(sp, msLevel) idx <- switch( method, @@ -833,17 +831,14 @@ largest_bpi = .spectra_index_list_largest_bpi(sp, pk, msLevel)) ids <- rep(rownames(pk), lengths(idx)) res <- sp[unlist(idx)] - pk_data <- as.data.frame(pk[ids, addColumnsChromPeaks, - drop = FALSE]) + pk_data <- as.data.frame(pk[ids, chromPeakColumns, drop = FALSE]) pk_data$id <- ids - colnames(pk_data) <- paste0(addColumnsChromPeaksPrefix, - colnames(pk_data)) + colnames(pk_data) <- paste0("chrom_peak_", colnames(pk_data)) res <- .add_spectra_data(res, pk_data) res }, MoreArgs = list(msLevel = msLevel, method = method, - addColumnsChromPeaks = addColumnsChromPeaks, - addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix), + chromPeakColumns = chromPeakColumns), BPPARAM = BPPARAM) Spectra:::.concatenate_spectra(res) } diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 06a13f1f..e5952975 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1229,8 +1229,7 @@ setMethod( "largest_tic", "largest_bpi"), msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, peaks = character(), - addColumnsChromPeaks = c("rt", "mz"), - addColumnsChromPeaksPrefix = "chrom_peak_", + chromPeakColumns = c("rt", "mz"), return.type = c("Spectra", "List"), BPPARAM = bpparam()) { if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) @@ -1246,19 +1245,15 @@ setMethod( else pkidx <- integer() res <- .mse_spectra_for_peaks(object, method, msLevel, expandRt, expandMz, ppm, skipFilled, pkidx, - addColumnsChromPeaks, - addColumnsChromPeaksPrefix, + chromPeakColumns, BPPARAM) if (!length(pkidx)) peaks <- rownames(.chromPeaks(object)) else peaks <- rownames(.chromPeaks(object))[pkidx] - if (return.type == "Spectra") { - col <- paste0(addColumnsChromPeaksPrefix, "id") - res <- res[as.matrix(findMatches(peaks, res[[col]]))[, 2L]] - } else { - col <- paste0(addColumnsChromPeaksPrefix, "id") - as(split(res, factor(res[[col]], levels = peaks)), "List") - } + if (return.type == "Spectra") + res <- res[as.matrix(findMatches(peaks, res$chrom_peak_id))[, 2L]] + else + as(split(res, factor(res$chrom_peak_id, levels = peaks)), "List") }) #' @rdname reconstructChromPeakSpectra @@ -1781,9 +1776,7 @@ setMethod( function(object, msLevel = 2L, expandRt = 0, expandMz = 0, ppm = 0, skipFilled = FALSE, return.type = c("Spectra", "List"), features = character(), - addColumnsFeatures = c("rtmed", "mzmed"), - addColumnsFeaturesPrefix = "feature_", - addColumnsChromPeaksPrefix = "chrom_peak_", + featureColumns = c("rtmed", "mzmed"), ...) { return.type <- match.arg(return.type) if (!hasFeatures(object)) @@ -1803,17 +1796,16 @@ setMethod( sps <- .mse_spectra_for_peaks( object, msLevel = msLevel, expandRt = expandRt, expandMz = expandMz, ppm = ppm, skipFilled = skipFilled, - peaks = unique(pindex), - addColumnsChromPeaksPrefix = addColumnsChromPeaksPrefix, ...) - col <- paste0(addColumnsChromPeaksPrefix, "id") + peaks = unique(pindex), ...) mtch <- as.matrix( - findMatches(sps[[col]], rownames(.chromPeaks(object))[pindex])) + findMatches(sps$chrom_peak_id, + rownames(.chromPeaks(object))[pindex])) sps <- sps[mtch[, 1L]] fid <- rep( ufeatures, lengths(featureDefinitions(object)$peakidx[findex])) - f_data <- featureDefinitions(object)[fid[mtch[, 2L]], addColumnsFeatures] + f_data <- featureDefinitions(object)[fid[mtch[, 2L]], featureColumns] f_data$id <- fid[mtch[, 2L]] - colnames(f_data) <- paste0(addColumnsFeaturesPrefix, colnames(f_data)) + colnames(f_data) <- paste0("feature_", colnames(f_data)) sps <- .add_spectra_data(sps, f_data) if (return.type == "List") { sps <- List(split(sps, f = factor(sps$feature_id, diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd index 832addc8..753f2cfd 100644 --- a/man/chromPeakSpectra.Rd +++ b/man/chromPeakSpectra.Rd @@ -18,8 +18,7 @@ chromPeakSpectra(object, ...) ppm = 0, skipFilled = FALSE, peaks = character(), - addColumnsChromPeaks = c("rt", "mz"), - addColumnsChromPeaksPrefix = "chrom_peak_", + chromPeakColumns = c("rt", "mz"), return.type = c("Spectra", "List"), BPPARAM = bpparam() ) @@ -67,15 +66,10 @@ be returned (providing either their ID, a logical vector same length than \code{nrow(chromPeaks(x))} or their index in \code{chromPeaks(x)}). This parameter overrides \code{skipFilled}.} -\item{addColumnsChromPeaks}{\code{character} vector with the names of the columns +\item{chromPeakColumns}{\code{character} vector with the names of the columns from \code{chromPeaks} that should be added to the returned spectra object. The columns will be named as they are written in the \code{chromPeaks} object -with a prefix that is defined by the parameter -\code{addColumnsChromPeaksPrefix}. Defaults to \code{c("mz", "rt")}.} - -\item{addColumnsChromPeaksPrefix}{\code{character(1)} defining the prefix that -should be used for the columns from \code{chromPeaks} that are added to the -returned spectra object. Defaults to \code{"chrom_peak_"}.} +with a prefix \code{"chrom_peak_"}. Defaults to \code{c("mz", "rt")}.} \item{return.type}{\code{character(1)} defining the type of result object that should be returned.} @@ -142,15 +136,13 @@ of \code{chromPeaks}. Each element of the list contains thus a \code{Spectra} wi spectra for one chromatographic peak (or a \code{Spectra} of length 0 if no spectrum was found for the respective chromatographic peak). -Parameters \code{addColumnsChromPeaks} allow the user to add specific metadata +Parameters \code{chromPeakColumns} allow the user to add specific metadata columns from the chromatographic peaks (\code{chromPeaks}) to the returned spectra object. This can be useful to retain information such as retention time (\code{rt}), m/z (\code{mz}). The columns will be named as they is written in the -\code{chromPeaks} object with a prefix that is defined by the parameter -\code{addColumnsChromPeaksPrefix}. The \emph{peak ID} (i.e., the row name of the -peak in the \code{chromPeaks} matrix) is always added to the spectra object as -metadata column \code{paste0(addColumnsChromPeaksPrefix,id)}, by default it will -be \code{"chrom_peak_id"}. +\code{chromPeaks} object with the prefix \code{"chrom_peak_"}. The \emph{peak ID} +(i.e., the row name of the peak in the \code{chromPeaks} matrix) is always added +to the spectra object as a metadata column named \code{"chrom_peak_id"}. See also the \emph{LC-MS/MS data analysis} vignette for more details and examples. } @@ -170,7 +162,7 @@ dda <- findChromPeaks(dda, CentWaveParam(peakwidth = c(5, 15), ms2_sps <- chromPeakSpectra(dda) ms2_sps -## spectra variable *peak_id* contain the row names of the peaks in the +## spectra variable *chrom_peak_id* contain the row names of the peaks in the ## chromPeak matrix and allow thus to map chromatographic peaks to the ## returned MS2 spectra ms2_sps$chrom_peak_id diff --git a/man/featureSpectra.Rd b/man/featureSpectra.Rd index d5cfec9f..59753b5e 100644 --- a/man/featureSpectra.Rd +++ b/man/featureSpectra.Rd @@ -18,9 +18,7 @@ featureSpectra(object, ...) skipFilled = FALSE, return.type = c("Spectra", "List"), features = character(), - addColumnsFeatures = c("rtmed", "mzmed"), - addColumnsFeaturesPrefix = "feature_", - addColumnsChromPeaksPrefix = "chrom_peak_", + featureColumns = c("rtmed", "mzmed"), ... ) @@ -67,19 +65,11 @@ than \code{nrow(featureDefinitions(x))} or their index in \code{featureDefinitions(x)}). This parameter overrides \code{skipFilled} and is only supported for \code{return.type} being either \code{"Spectra"} or \code{"List"}.} -\item{addColumnsFeatures}{\code{character} vector with the names of the columns +\item{featureColumns}{\code{character} vector with the names of the columns from \code{featureDefinitions} that should be added to the returned spectra object. The columns will be named as they are written in the -\code{featureDefinitions} object with a prefix that is defined by the parameter -\code{addColumnsFeaturesPrefix}. Defaults to \code{c("mzmed", "rtmed")}.} - -\item{addColumnsFeaturesPrefix}{\code{character(1)} defining the prefix that -should be used for the columns from \code{featureDefinitions} that are added -to the returned spectra object. Defaults to \code{"feature_"}.} - -\item{addColumnsChromPeaksPrefix}{\code{character(1)} defining the prefix that -should be used for the columns from \code{chromPeaks} that are added to the -returned spectra object. Defaults to \code{"chrom_peak_"}.} +\code{featureDefinitions} object with the prefix \verb{"feature_}. +Defaults to \code{c("mzmed", "rtmed")}.} } \value{ The function returns either a \code{\link[=Spectra]{Spectra()}} (for \code{return.type = "Spectra"}) @@ -87,7 +77,7 @@ or a \code{List} of \code{Spectra} (for \code{return.type = "List"}). For the la the order and the length matches parameter \code{features} (or if no \code{features} is defined the order of the features in \code{featureDefinitions(object)}). -Spectra variables \code{"peak_id"} and \code{"feature_id"} define to which +Spectra variables \code{"chrom_peak_id"} and \code{"feature_id"} define to which chromatographic peak or feature each individual spectrum is associated with. } @@ -114,23 +104,20 @@ spectrum \strong{per chromatographic peak} will be returned (hence multiple spectra per feature). The information from \code{featureDefinitions} for each feature can be included -in the returned \code{\link[=Spectra]{Spectra()}} object using the \code{addColumnsFeatures} parameter. +in the returned \code{\link[=Spectra]{Spectra()}} object using the \code{featureColumns} parameter. This is useful for retaining details such as the median retention time (\code{rtmed}) or median m/z (\code{mzmed}). The columns will retain their names as specified -in the \code{featureDefinitions} object, prefixed by the value of the -\code{addColumnsFeaturesPrefix} parameter. Additionally, the \emph{feature ID} -(i.e., the row name of the feature in the \code{featureDefinitions} data.frame) -is always added as a metadata column with the name -\code{paste0(addColumnsFeaturesPrefix, "id")}, which defaults to \code{"feature_id"}. +in the \code{featureDefinitions} object, prefixed by \code{"feature_"} +(e.g., \code{"feature_mzmed"}). Additionally, the \emph{feature ID} (i.e., the row +name of the feature in the \code{featureDefinitions} data.frame) is always added +as a metadata column named \code{"feature_id"}. See also \code{\link[=chromPeakSpectra]{chromPeakSpectra()}}, as it supports a similar parameter for including columns from the chromatographic peaks in the returned spectra object. These parameters can be used in combination to include information from both the chromatographic peaks and the features in the returned \code{\link[=Spectra]{Spectra()}}. The \emph{peak ID} (i.e., the row name of the peak in the \code{chromPeaks} matrix) -is added as a metadata column with the name -\code{paste0(addColumnsChromPeaksPrefix, "id")}, which defaults to -\code{"chrom_peak_id"}. +is added as a metadata column named \code{"chrom_peak_id"}. } \author{ Johannes Rainer diff --git a/vignettes/xcms-lcms-ms.Rmd b/vignettes/xcms-lcms-ms.Rmd index febad8e7..a1eb2c4e 100644 --- a/vignettes/xcms-lcms-ms.Rmd +++ b/vignettes/xcms-lcms-ms.Rmd @@ -206,7 +206,7 @@ dda_spectra$chrom_peak_id ``` Some information about the chromatographic peak can also be added to the -returned `Spectra` object using the `addChromPeaksColumns` parameter in +returned `Spectra` object using the `chrompeakColumns` parameter in `chromPeakSpectra()`. By default, the m/z and retention time of the chromatographic peak are added to the spectra metadata. From 7340287c84f630dbab2ec06fb9316c6b2f3eb1ec Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:06:14 +0100 Subject: [PATCH 14/16] fix sanity check --- R/AllGenerics.R | 8 ++++---- R/XcmsExperiment-functions.R | 8 ++++++-- R/XcmsExperiment.R | 3 +++ man/chromPeakSpectra.Rd | 6 +++--- man/featureSpectra.Rd | 2 +- tests/testthat/test_XcmsExperiment.R | 5 +++++ 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index ff86fd99..a53f91bc 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -417,10 +417,10 @@ setGeneric("chromPeakData<-", function(object, value) #' spectra for one chromatographic peak (or a `Spectra` of length 0 if no #' spectrum was found for the respective chromatographic peak). #' -#' Parameters `chromPeakColumns` allow the user to add specific metadata +#' Parameter `chromPeakColumns` allows the user to add specific metadata #' columns from the chromatographic peaks (`chromPeaks`) to the returned -#' spectra object. This can be useful to retain information such as retention -#' time (`rt`), m/z (`mz`). The columns will be named as they is written in the +#' spectra object. This can be useful to keep information such as retention +#' time (`rt`), m/z (`mz`). The columns will be named as they are written in the #' `chromPeaks` object with the prefix `"chrom_peak_"`. The *peak ID* #' (i.e., the row name of the peak in the `chromPeaks` matrix) is always added #' to the spectra object as a metadata column named `"chrom_peak_id"`. @@ -811,7 +811,7 @@ setGeneric("featureDefinitions<-", function(object, value) #' #' The information from `featureDefinitions` for each feature can be included #' in the returned [Spectra()] object using the `featureColumns` parameter. -#' This is useful for retaining details such as the median retention time (`rtmed`) +#' This is useful for keeping details such as the median retention time (`rtmed`) #' or median m/z (`mzmed`). The columns will retain their names as specified #' in the `featureDefinitions` object, prefixed by `"feature_"` #' (e.g., `"feature_mzmed"`). Additionally, the *feature ID* (i.e., the row diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index 1b4ea2d1..cb6be689 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -797,8 +797,12 @@ chromPeakColumns = c("rt", "mz"), BPPARAM = bpparam()) { method <- match.arg(method) - pks <- .chromPeaks(x)[, c("mz", "mzmin", "mzmax", "rt", - "rtmin", "rtmax", "maxo", "sample")] + if (!chromPeakColumns %in% colnames(.chromPeaks(x))) + stop("One or more of the columns in 'chromPeakColumns' are not ", + "available in the 'chromPeaks' data.") + pks <- .chromPeaks(x)[, union(c("mz", "mzmin", "mzmax", "rt", + "rtmin", "rtmax", "maxo", "sample"), + chromPeakColumns)] if (ppm != 0) expandMz <- expandMz + pks[, "mz"] * ppm / 1e6 if (expandMz[1L] != 0) { diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index e5952975..69588eb6 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1782,6 +1782,9 @@ setMethod( if (!hasFeatures(object)) stop("No feature definitions present. Please run ", "'groupChromPeaks' first.") + if (!featureColumns %in% colnames(featureDefinitions(object))) + stop("One or more of the requested 'featureColumns' are not ", + "present in the feature definitions.") if (hasAdjustedRtime(object)) object <- applyAdjustedRtime(object) features_all <- rownames(featureDefinitions(object)) diff --git a/man/chromPeakSpectra.Rd b/man/chromPeakSpectra.Rd index 753f2cfd..8899c15a 100644 --- a/man/chromPeakSpectra.Rd +++ b/man/chromPeakSpectra.Rd @@ -136,10 +136,10 @@ of \code{chromPeaks}. Each element of the list contains thus a \code{Spectra} wi spectra for one chromatographic peak (or a \code{Spectra} of length 0 if no spectrum was found for the respective chromatographic peak). -Parameters \code{chromPeakColumns} allow the user to add specific metadata +Parameter \code{chromPeakColumns} allows the user to add specific metadata columns from the chromatographic peaks (\code{chromPeaks}) to the returned -spectra object. This can be useful to retain information such as retention -time (\code{rt}), m/z (\code{mz}). The columns will be named as they is written in the +spectra object. This can be useful to keep information such as retention +time (\code{rt}), m/z (\code{mz}). The columns will be named as they are written in the \code{chromPeaks} object with the prefix \code{"chrom_peak_"}. The \emph{peak ID} (i.e., the row name of the peak in the \code{chromPeaks} matrix) is always added to the spectra object as a metadata column named \code{"chrom_peak_id"}. diff --git a/man/featureSpectra.Rd b/man/featureSpectra.Rd index 59753b5e..c669649a 100644 --- a/man/featureSpectra.Rd +++ b/man/featureSpectra.Rd @@ -105,7 +105,7 @@ spectra per feature). The information from \code{featureDefinitions} for each feature can be included in the returned \code{\link[=Spectra]{Spectra()}} object using the \code{featureColumns} parameter. -This is useful for retaining details such as the median retention time (\code{rtmed}) +This is useful for keeping details such as the median retention time (\code{rtmed}) or median m/z (\code{mzmed}). The columns will retain their names as specified in the \code{featureDefinitions} object, prefixed by \code{"feature_"} (e.g., \code{"feature_mzmed"}). Additionally, the \emph{feature ID} (i.e., the row diff --git a/tests/testthat/test_XcmsExperiment.R b/tests/testthat/test_XcmsExperiment.R index 57703518..10d0699e 100644 --- a/tests/testthat/test_XcmsExperiment.R +++ b/tests/testthat/test_XcmsExperiment.R @@ -939,6 +939,9 @@ test_that("chromPeakSpectra works", { expect_error(chromPeakSpectra(xmse, peaks = "other"), "out of bounds") pks <- c("CP242", "CP007", "CP123") + + expect_error(chromPeakSpectra(xmse, peaks = pks, + chromPeakColumns = "other"), "not available") res <- chromPeakSpectra(xmse, peaks = pks) expect_s4_class(res, "Spectra") expect_equal(length(res), 0) @@ -1081,6 +1084,8 @@ test_that("filterFeatureDefinitions works", { test_that("featureSpectra works", { expect_error(featureSpectra(xmse), "No feature definitions") + expect_error(featureSpectra(xmseg, + featureColumns = "other"), "not present") res_all <- featureSpectra(xmseg, msLevel = 1L) expect_s4_class(res_all, "Spectra") expect_true(all(rownames(featureDefinitions(xmseg)) %in% From b8525f08f936a2d61a3e1ac37c57d9983ed86a77 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:01:45 +0100 Subject: [PATCH 15/16] fix sanity check part 2 --- R/XcmsExperiment-functions.R | 2 +- R/XcmsExperiment.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R index cb6be689..b0906bb8 100644 --- a/R/XcmsExperiment-functions.R +++ b/R/XcmsExperiment-functions.R @@ -797,7 +797,7 @@ chromPeakColumns = c("rt", "mz"), BPPARAM = bpparam()) { method <- match.arg(method) - if (!chromPeakColumns %in% colnames(.chromPeaks(x))) + if (!all(chromPeakColumns %in% colnames(.chromPeaks(x)))) stop("One or more of the columns in 'chromPeakColumns' are not ", "available in the 'chromPeaks' data.") pks <- .chromPeaks(x)[, union(c("mz", "mzmin", "mzmax", "rt", diff --git a/R/XcmsExperiment.R b/R/XcmsExperiment.R index 69588eb6..487b868c 100644 --- a/R/XcmsExperiment.R +++ b/R/XcmsExperiment.R @@ -1782,7 +1782,7 @@ setMethod( if (!hasFeatures(object)) stop("No feature definitions present. Please run ", "'groupChromPeaks' first.") - if (!featureColumns %in% colnames(featureDefinitions(object))) + if (!all(featureColumns %in% colnames(featureDefinitions(object)))) stop("One or more of the requested 'featureColumns' are not ", "present in the feature definitions.") if (hasAdjustedRtime(object)) From b1ca93caa46f8426016d1bec708127b3ea5097d1 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:54:49 +0100 Subject: [PATCH 16/16] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9cba15a2..d3f7cca9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: xcms -Version: 4.5.1 +Version: 4.5.2 Title: LC-MS and GC-MS Data Analysis Description: Framework for processing and visualization of chromatographically separated and single-spectra mass spectral data. Imports from AIA/ANDI NetCDF,