Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add new Ion-mobility peak picking algorithm #647

Draft
wants to merge 8 commits into
base: devel
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ export("CentWaveParam",
"MassifquantParam",
"MSWParam",
"CentWavePredIsoParam",
"IMCentWaveParam",
"PeakDensityParam",
"MzClustParam",
"NearestPeaksParam",
Expand Down
4 changes: 4 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ setGeneric("baseValue", function(object, ...) standardGeneric("baseValue"))
setGeneric("baseValue<-", function(object, value) standardGeneric("baseValue<-"))
setGeneric("binSize", function(object, ...) standardGeneric("binSize"))
setGeneric("binSize<-", function(object, value) standardGeneric("binSize<-"))
setGeneric("binWidthIM", function(object, ...) standardGeneric("binWidthIM"))
setGeneric("binWidthIM<-", function(object, value) standardGeneric("binWidthIM<-"))
setGeneric("bw", function(object) standardGeneric("bw"))
setGeneric("bw<-", function(object, value) standardGeneric("bw<-"))

Expand Down Expand Up @@ -736,6 +738,8 @@ setGeneric("plotTIC", function(object, ...) standardGeneric("plotTIC"))
setGeneric("plotTree", function(object, ...) standardGeneric("plotTree"))
setGeneric("ppm", function(object, ...) standardGeneric("ppm"))
setGeneric("ppm<-", function(object, value) standardGeneric("ppm<-"))
setGeneric("ppmMerging", function(object, ...) standardGeneric("ppmMerging"))
setGeneric("ppmMerging<-", function(object, value) standardGeneric("ppmMerging<-"))
setGeneric("prefilter", function(object, ...) standardGeneric("prefilter"))
setGeneric("prefilter<-", function(object, value) standardGeneric("prefilter<-"))
setGeneric("present", function(object, class, minfrac) standardGeneric("present"))
Expand Down
76 changes: 76 additions & 0 deletions R/DataClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,82 @@ setClass("CentWavePredIsoParam",
else TRUE
})

#### Ion mobility peak-picking classes ####

setClass("IMParam", contains = "VIRTUAL")

#' @title Centwave-based ion-mobility peak picking
#'
#' @aliases centWaveIonMobility
#'
#' @description Performs an extension of CentWave peak-picking on LC-IM-MS MS1
#' data: first it joins all mobility scans into frames and performs .centWave_orig on
#' the summarized LC-MS-like data; then, from each peak, it calculates its mobilogram and
#' performs a second peak-picking on the IM dimension, resolving the peaks.
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @param ppmMerging The maximum mass deviation allowed when grouping individual
#' IM scans into frames. Data points within \code{ppmMerging} ppm will be
#' summed up into a single value and the reported mz will be their weighted
#' average.
#'
#' @param binWidthIM The bin size used when calculating the mobilograms to resolve
#' the peaks into the ion-mobility dimension. Lower values will give better resolution
#' if the data allows it, but can also generate spurious peaks.
#'
#' @details See \code{\link{centWave}} for details on the centWave method.
#'
#' @family peak detection methods
#'
#' @author Roger Gine, Johannes Rainer
#'
#' @seealso The \code{\link{do_findChromPeaks_IM_centWave}} core
#' API function and \code{\link{CentWaveParam}} for the class the
#' \code{IMCentWaveParam} extends.
#'
#' @name findChromPeaks-centWaveIonMobility
NULL

#' @description The \code{IMCentWaveParam} class allows to specify all
#' settings for
#' Instances should be created with the \code{IMCentWaveParam}
#' constructor. See also the documentation of the
#' \code{\link{CentWaveParam}} for all methods and arguments this class
#' inherits.
#'
#' @slot ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,ppmMerging,binWidthIM
#' See corresponding parameter above.
#'
#' @rdname findChromPeaks-centWaveIonMobility
setClass("IMCentWaveParam",
contains = c("IMParam", "CentWaveParam"),
slots = c(
ppmMerging = "numeric",
binWidthIM = "numeric"

),
prototype = prototype(
ppmMerging = 10,
binWidthIM = 0.02
),
validity = function(object){
msg <- character()
if (length(object@ppmMerging) != 1 ||
object@ppmMerging < 0) {
msg <- c(msg,
"'ppmMerging' should be a positive numeric of length 1")
}
if (length(object@binWidthIM) != 1 ||
object@binWidthIM < 0) {
msg <- c(msg,
"'binWidthIM' should be a positive numeric of length 1")
}
if (length(msg))
msg
else TRUE
})

setClass("PeakDensityParam",
slots = c(sampleGroups = "ANY",
bw = "numeric",
Expand Down
27 changes: 23 additions & 4 deletions R/MsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
MatchedFilterParam = "do_findChromPeaks_matchedFilter",
MassifquantParam = "do_findChromPeaks_massifquant",
MSWParam = "do_findPeaks_MSW",
CentWavePredIsoParam = "do_findChromPeaks_centWaveWithPredIsoROIs")
CentWavePredIsoParam = "do_findChromPeaks_centWaveWithPredIsoROIs",
IMCentWaveParam = "do_findChromPeaks_IM_centWave")
fun <- p2f[class(x)[1L]]
if (is.na(fun))
stop("No peak detection function for parameter class ", class(x)[1L])
Expand Down Expand Up @@ -62,6 +63,11 @@
#' @noRd
.mse_find_chrom_peaks_sample <- function(x, msLevel = 1L, param, ...) {
x <- filterMsLevel(x, msLevel)
if(inherits(param, "IMParam")){
if(!any(c("inv_ion_mobility") %in% Spectra::spectraVariables(x))) # Add any other column name needed
stop("Your Spectra object doesn't contain ion-mobility data")
return(do.call(.param_to_fun(param), args = append(list(x), as(param, "list")))) #Append to avoid concatenating spectra
}
pkd <- Spectra::peaksData(x, columns = c("mz", "intensity"),
BPPARAM = SerialParam())
vals_per_spect <- vapply(pkd, nrow, integer(1), USE.NAMES = FALSE)
Expand All @@ -74,6 +80,9 @@
pkd <- do.call(rbind, pkd)
if (!length(pkd))
return(NULL) # not returning matrix because of rbind
rts <- rtime(x)
if (is.unsorted(rts))
stop("Spectra are not ordered by retention time", .call = FALSE)
if (inherits(param, "CentWaveParam")) {
centroided <- all(centroided(x))
if (is.na(centroided)) {
Expand All @@ -83,9 +92,6 @@
" works best on data in centroid mode.")
}
}
rts <- rtime(x)
if (is.unsorted(rts))
stop("Spectra are not ordered by retention time", .call = FALSE)
do.call(.param_to_fun(param),
args = c(list(mz = pkd[, 1L], int = pkd[, 2L], scantime = rts,
valsPerSpect = vals_per_spect), as(param, "list")))
Expand Down Expand Up @@ -219,6 +225,19 @@
if (lx)
f <- factor(x$.SAMPLE_IDX, levels = sidx)
else f <- factor(integer(), levels = sidx)

## Ion mobility peak-picking dispatch point
if (inherits(param, "IMParam")){
if (!any(c("inv_ion_mobility") %in% Spectra::spectraVariables(x))) # Add any other column name needed
stop("Spectra object does not seem to have ion-mobility data")
return (
bplapply(split(x, f), function(spec){
do.call(.param_to_fun(param),
args = append(list(spec), as(param, "list"))) #Append to avoid concatenating spectra
}, BPPARAM = BPPARAM)
)
}

## Check for random number of spectra if they are centroided. NOT all.
if (inherits(param, "CentWaveParam")) {
cntr <- all(centroided(x[sort(sample(seq_along(x), min(c(100, lx))))]))
Expand Down
210 changes: 210 additions & 0 deletions R/do_findChromPeaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2220,8 +2220,218 @@ do_findPeaks_MSW <- function(mz, int, snthresh = 3,
peaklist
}

############################################################
## Ion-mobility peak picking
##
#' @title Core API for Centwave-based ion-mobility peak picking
#' @name do_findChromPeaks_IM_centWave
#'
#' @description Performs an extension of CentWave peak-picking on LC-IM-MS MS1
#' data. First it joins all scans into frames and performs .centWave_orig on
#' the summarized LC-MS-like data. From each peak, it calculates its mobilogram and
#' performs a second peak-picking on the IM dimension, resolving the peaks.
#'
#' @inheritParams do_findChromPeaks_centWave
#' @inheritParams findChromPeaks-centWaveIonMobility
#'
#' @return A matrix, each row representing an identified peak, with columns:
#' \describe{
#' \item{mz}{m/z value of the peak at the apex position.}
#' \item{mzmin}{Minimum m/z of the peak.}
#' \item{mzmax}{Maximum m/z of the peak.}
#' \item{rt}{Retention time value of the peak at the apex position.}
#' \item{rtmin}{Minimum retention time of the peak.}
#' \item{rtmax}{Maximum retention time of the peak.}
#' \item{im}{Ion mobility value of the peak at the apex position.}
#' \item{immin}{Minimum ion mobility value of the peak.}
#' \item{immax}{Maximum ion mobility value of the peak.}
#' \item{maxo}{Maximum intensity of the peak.}
#' \item{into}{Integrated (original) intensity of the peak.}
#' \item{intb}{Always \code{NA}.}
#' \item{sn}{Always \code{NA}}
#' }
#'
#' @family core peak detection functions
#'
#' @author Roger Gine, Johannes Rainer
#'
#' @importFrom Spectra peaksData rtime combineSpectra mz
do_findChromPeaks_IM_centWave <- function(spec,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there other ways/algorithms to perform peak detection on IM data that do not first collapse the data and then expand it again like you do here?

If not I would suggest to split the functionality into 3 functions:

  1. collapse peaksData by frame
  2. do peak detection using the original detection algorithm.
  3. have a function that takes the original peaksData and the detected chrom peaks matrix from 2) as input and post processes the data.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle, yes, there are other ways to perform peak-picking that directly use the "full" data without collapsing (for instance, you could do a 2D-CWT where you change the scales in both RT and IM dimensions and find local maxima; or any peakpicking algorithm such as those used for GCxGC-MS, where you have a similar situation). I haven't looked deeply into such methods, but we should accomodate for them too, just in case

Still, splitting the functionality in do_findChromPeaks_IM_centWave seems good, since those functions would be reusable for other algorithms, etc. Specifically, if you agree, I'll do the following:

  • Call .mse_find_chrom_peaks_sample with the IM-collapsed Spectra object and the param as(param, CentWaveParam") (since it IMCentWaveParam inherits from it) -> That function will take care of extracting the peaksData, rt, valsPerSpect, etc., run do_findChromPeaks_centWave, and return the peak matrix.
  • Encapsulate the post processing (ie. resolving across IM dimension) in another function

Sounds good? 👍

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for my late reply!

splitting functionality is always good, as you say, enables reuse - and makes the code easier to read. so, yes, it sounds good.

so, if I understand:

  • if param inherits IMCentWaveParam collapse the peaksData by frame.
  • pass the data to do_findChromPeaks_centWave for peak detection (since IMCentWaveParam inherits CentWaveParam)
  • if param inherits IMCentWaveParam post process the detected peaks (resolve across IM dimension) and return results.

if the functions get to large, you could also consider implementing a .im_mse_find_chrom_peaks_sample that is called instead of .mse_find_chrom_peaks_sample if param inherits from an IM param object... not sure if that would simplify integration of additional/other IM peak detection methods.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I've encapsulated all this into do_findChromPeaks_IM_centWave, so it's called after dispatching the function corresponding to the param:

    ## Merging all frames from the same scans to summarize across IM dimension
    scans_summarized <-
        Spectra::combineSpectra(
            spec,
            f = as.factor(spec$frameId),
            intensityFun = base::sum,
            weighted = TRUE,
            ppm = ppmMerging
        )
    Spectra::centroided(scans_summarized) <- TRUE
    
    ## 1D Peak-picking on summarized data
    peaks <- .mse_find_chrom_peaks_sample(scans_summarized,
                                          msLevel = 1L,
                                          param = CentWaveParam(ppm = ppm, peakwidth = peakwidth,
                                                        snthresh = snthresh, prefilter = prefilter,
                                                        mzCenterFun = mzCenterFun, integrate = integrate,
                                                        mzdiff = mzdiff, fitgauss = fitgauss, noise = noise,
                                                        verboseColumns = verboseColumns, roiList = roiList,
                                                        firstBaselineCheck = firstBaselineCheck,
                                                        roiScales = roiScales,
                                                        extendLengthMSW = extendLengthMSW))
    
    ## 1D Peak-picking, for each individual peak, to resolve across the IM dimension
    .do_resolve_IM_peaks_CWT(spec, peaks, binWidthIM)

It's basically the all steps you mentioned (collapse, CentWave and resolve), but called from a "lower" function call level, so everything upstream is more tidy. What I like about this is that all Centwave-specific checks and data extraction (mz, int, valsPerSpect, etc.) are handled by .mse_find_chrom_peaks_sample, so we are reusing already-existing code.

I'll commit the proposed refactor so you can take a closer look by yourself

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent. Let me know when you're OK/ready from your side. I would then like to try the code in action and tinker myself a bit to see if/where we could improve/optimize.

For that I would create yet another branch to play with the code and ask for your feedback on the merge.

Related to that: could you please provide a short code snipped with the example how to perform the analysis (I guess I got already a file from you on which I can test...)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely! If you agree, perhaps we should use (for testing) the MsBackendMemory or MsBackendDataFrame to create a manageable toy example from the current read-only MsBackendTimsTof (for instance, subsetting the RT)

I'm still figuring out how to properly centroid the data (the format has some problems that makes the current peakPicks function in Spectra ineffective, see below), but still, the analysis would go somewhat like this:

library(opentimsr)
library(MsBackendTimsTof)
library(MsExperiment)
library(Spectra)
library(xcms)
library(magrittr)

## Setting up Bruker SDK to read the raw data directly (if you have it already, just point to the corresponding file)
so_folder <- tempdir()
so_file <- download_bruker_proprietary_code(so_folder, method = "wget")
setup_bruker_so(so_file)

## Set up valid MsExperiment with the subsetted spectra, then detect IM peaks
be <- backendInitialize(MsBackendTimsTof(), "./path_to_your_file_folder.d")
spec <- Spectra(be) %>%
    filterRt(., c(350, 365)) %>% 
    filterMsLevel(.. 1) %>% 
    setBackend(., MsBackendMemory())

exp <- MsExperiment()
spectra(exp) <- spec
sampleData(exp) <- DataFrame(
    raw_file = normalizePath("./path_to_your_file_folder.d")
)
exp <- linkSampleData(exp, with = "sampleData.raw_file = spectra.dataOrigin")

exp <- findChromPeaks(exp, IMCentWaveParam())

You can use the TIMS-TOF data file I sent you a while back, just bear in mind it's in a zero-less profile mode (working on fixing that) and the chromatographic peaks are usually very short (<6-10s)

ppm = 25,
peakwidth = c(20, 50),
snthresh = 10,
prefilter = c(3, 100),
mzCenterFun = "wMean",
integrate = 1,
mzdiff = -0.001,
fitgauss = FALSE,
noise = 0,
verboseColumns = FALSE,
roiList = list(),
firstBaselineCheck = TRUE,
roiScales = numeric(),
sleep = 0,
extendLengthMSW = FALSE,
ppmMerging = 10,
binWidthIM = 0.01
){

## Merging all scans from the same frame to summarize across IM dimension
scans_summarized <-
Spectra::combineSpectra(
spec,
f = as.factor(spec$frameId),
intensityFun = base::sum,
weighted = TRUE,
ppm = ppmMerging
)
Spectra::centroided(scans_summarized) <- TRUE

## 1D Peak-picking on summarized data
peaks <- .mse_find_chrom_peaks_sample(scans_summarized,
msLevel = 1L,
param = CentWaveParam(ppm = ppm, peakwidth = peakwidth,
snthresh = snthresh, prefilter = prefilter,
mzCenterFun = mzCenterFun, integrate = integrate,
mzdiff = mzdiff, fitgauss = fitgauss, noise = noise,
verboseColumns = verboseColumns, roiList = roiList,
firstBaselineCheck = firstBaselineCheck,
roiScales = roiScales,
extendLengthMSW = extendLengthMSW))
if (!nrow(peaks)) return()

#Correcting for the fact that combineSpectra combined close mz values
peaks[,"mzmin"] <- peaks[,"mzmin"] * (1 - ppmMerging * 1e-6)
peaks[,"mzmax"] <- peaks[,"mzmax"] * (1 + ppmMerging * 1e-6)

## 1D Peak-picking, for each individual peak, to resolve across the IM dimension
.do_resolve_IM_peaks_CWT(spec, peaks, binWidthIM)

}


.do_resolve_IM_peaks_CWT <- function(spec, peaks, binWidthIM){
## Extract frame information
pdata <- peaksData(spec, columns = c("mz", "intensity"))
rt <- rtime(spec)
im <- spec$inv_ion_mobility

## Resolving peaks across IM dimension
resolved_peaks <- vector("list", nrow(peaks))
for (i in seq_len(nrow(peaks))) {
current_peak <- peaks[i,]
mobilogram <- .extract_mobilogram(pdata, current_peak, rt, im, binWidthIM)
if (length(mobilogram) == 0) {
# warning(i, " mobilogram is empty")
next
}

bounds <- .split_mobilogram(mobilogram)
new_peaks <- data.frame(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using data.frame here might have an impact on performance. I think matrix might be faster, but that's something we can check later.

mz = current_peak["mz"],
mzmin = current_peak["mzmin"],
mzmax = current_peak["mzmax"],
rt = current_peak["rt"],
rtmin = current_peak["rtmin"],
rtmax = current_peak["rtmax"],
im = vapply(bounds, function(x) x[[2]], numeric(1)),
immin = vapply(bounds, function(x) x[[1]], numeric(1)),
immax = vapply(bounds, function(x) x[[3]], numeric(1)),
row.names = NULL
)
resolved_peaks[[i]] <- new_peaks
}

resolved_peaks <- do.call(rbind, resolved_peaks)
if(is.null(resolved_peaks) || !nrow(resolved_peaks)) return()

## Refine and calculate peak parameters
vals <- vector("list", nrow(resolved_peaks))
for (i in seq(nrow(resolved_peaks))) {
peak <- unlist(resolved_peaks[i, , drop = TRUE])

## Create a EIC for mz, rt and IM ranges
eic <- .extract_EIC_IM(peak, pdata, rt, im)

if (nrow(eic) == 0 | all(eic[, 2] == 0))
next

## Refine RT bounds
rts <- c(peak["rtmin"], peak["rtmax"])
apx <- which.max(eic[, 2])
apx_rt <- eic[apx, 1]
range <- xcms:::descendMin(eic[, 2], apx)

eic <- eic[range[1]:range[2], , drop = FALSE]

## Calculate peak stats
vals[[i]] <- data.frame(
mz = peak["mz"],
mzmin = peak["mzmin"],
mzmax = peak["mzmax"],
rt = apx_rt,
rtmin = min(eic[, 1]),
rtmax = max(eic[, 1]),
im = peak["im"],
immin = peak["immin"],
immax = peak["immax"],
maxo = max(eic[, 2]),
into = sum(eic[, 2]),
intb = NA,
sn = NA
)
}
resolved_peaks <- do.call(rbind, vals)
resolved_peaks <-
resolved_peaks[resolved_peaks$into > 0, ] #Remove empty peaks

as.matrix(resolved_peaks)
}

#' @importFrom MsCoreUtils between bin
.extract_mobilogram <- function(pdata, peak, rt, im, binWidthIM = 0.01){
rtr <- c(peak[["rtmin"]], peak[["rtmax"]])
mzr <- c(peak[["mzmin"]], peak[["mzmax"]])
keep <- MsCoreUtils::between(rt, rtr)
if (length(keep) == 0) return()
ims <- im[keep]
ints <- vapply(pdata[keep], xcms:::.aggregate_intensities,
mzr = mzr, INTFUN = sum, na.rm = TRUE, numeric(1))
if(all(ints == 0)) return()
mob <- MsCoreUtils::bin(x = ints[order(ims)], y = sort(ims),
size = binWidthIM, FUN = sum)
mob
}


.split_mobilogram <- function(mob){
if(length(mob$x) == 0){return()}
vec <- mob$x
apex <- which(MsCoreUtils::localMaxima(vec, hws = 4))
limits <- list()
for (i in seq_along(apex)){
ranges <- descendMinTol(vec, startpos = c(apex[i], apex[i]), maxDescOutlier = 2)
limits[[i]] <- mob$mids[c(ranges[1], apex[i], ranges[2])]
}
limits <- limits[vapply(limits, function(x){!any(is.na(x))}, logical(1))]
limits
}


#' @importFrom dplyr between
.extract_EIC_IM <- function(peak, pdata, rt, im){
rtr <- c(peak["rtmin"], peak["rtmax"])
mzr <- c(peak["mzmin"], peak["mzmax"])
imr <- c(peak["immin"], peak["immax"])

keep <- dplyr::between(rt, rtr[1], rtr[2]) & dplyr::between(im, imr[1], imr[2])
rts <- rt[keep]
ints <- vapply(pdata[keep], xcms:::.aggregate_intensities,
mzr = mzr, INTFUN = sum, na.rm = TRUE, numeric(1))
ints <- vapply(unique(rts), function(x){sum(ints[rts == x])}, numeric(1))

cbind(unique(rts), ints)
}


############################################################
Expand Down
Loading