Skip to content

Commit

Permalink
feat: add chromatogram,XcmsExperimentHdf5
Browse files Browse the repository at this point in the history
  • Loading branch information
jorainer committed Nov 14, 2024
1 parent 4ef9c2b commit 7bd035d
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 89 deletions.
6 changes: 4 additions & 2 deletions R/XcmsExperiment-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,7 @@
msLevel, isolationWindow = NULL,
chunkSize, chromPeaks,
return.type, BPPARAM) {
message("Extracting chromatographic data")
chrs <- as(.mse_chromatogram(
as(object, "MsExperiment"), rt = rt, mz = mz,
aggregationFun = aggregationFun, msLevel = msLevel,
Expand Down Expand Up @@ -956,7 +957,8 @@
}
}
pb$tick()
## Process features - that is not perfect.
## Process features - that is not perfect: features are selected based on
## mz and rt, not based on the selected chrom peaks.
if (hasFeatures(object)) {
message("Processing features")
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
Expand Down Expand Up @@ -1264,4 +1266,4 @@ XcmsExperiment <- function() {
n@.processHistory <- from@processHistory
validObject(n)
n
}
}
101 changes: 74 additions & 27 deletions R/XcmsExperimentHdf5-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,10 @@ NULL
##
################################################################################
#' Read chromatograms for a set of samples (chunk) and adds chromatographic
#' peaks.
#' peaks. Chromatographic peaks are added/processed by sample reading the
#' respective information from the hdf5 file. Features are added by processing
#' the feature and chrom peak IDs of the selected chrom peaks.
#'
#'
#' @param x `XcmsExperimentHdf5` for one subset/chunk of data from which the
#' data should be extracted
Expand All @@ -588,49 +591,90 @@ NULL
#'
#' @param ms_level `integer(1)` with the MS level.
#' @noRd
.h5_x_chromatogram <- function(x, index = seq_along(x), ms_level = 1L,
mz, rt, ppm = 0, chromPeaks = "any",
.h5_x_chromatograms <- function(x, index = seq_along(x), aggregationFun = "sum",
ms_level = 1L, mz, rt, isolationWindow = NULL,
chromPeaks = "any", chunkSize = 2L,
return.type = "XChromatograms",
BPPARAM = bpparam()) {
## Get the chromatograms in parallel.
chr <- as(chromatogram(as(x, "MsExperiment"), mz = mz,
rt = rt, BPPARAM = BPPARAM), "XChromatograms")
js <- seq_len(nrow(chr))
message("Extracting chromatographic data")
chr <- as(.mse_chromatogram(
as(x, "MsExperiment"), rt = rt, mz = mz, msLevel = ms_level,
aggregationFun = aggregationFun, isolationWindow = isolationWindow,
chunkSize = chunkSize, BPPARAM = BPPARAM), return.type)
if (return.type == "MChromatograms" || chromPeaks == "none")
return(chr)
message("Processing chromatographic peaks")
js <- seq_len(nrow(chr))
pb <- progress_bar$new(format = paste0("[:bar] :current/:",
"total (:percent) in ",
":elapsed"),
total = ncol(chr) + 1L, clear = FALSE)
for (i in seq_along(x)) {
total = length(x), clear = FALSE)
has_features <- hasFeatures(x, ms_level)
if (has_features) {
fd <- .h5_read_data(x@hdf5_file, "features", "feature_definitions",
read_rownames = TRUE, ms_level = ms_level)[[1L]]
f2p <- vector("list", 1000) # initialize with an educated guess;
cnt <- 1L
}
for (i in seq_along(x)) { # iterate over samples
cp <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peaks",
ms_level = ms_level, read_colnames = TRUE,
read_rownames = TRUE)[[1L]]
for (j in js) {
cd <- .h5_read_data(x@hdf5_file, x@sample_id[i], "chrom_peak_data",
ms_level = ms_level, read_colnames = TRUE,
read_rownames = FALSE)[[1L]]
if (has_features)
fidx <- .h5_read_data(x@hdf5_file, x@sample_id[i],
"feature_to_chrom_peaks",
ms_level = ms_level)[[1L]]
for (j in js) { # iterate over ranges/EICs
idx <- which(.is_chrom_peaks_within_mz_rt(
cp, rt[j, ], mz[j, ], ppm, chromPeaks), useNames = FALSE)
a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, length(idx)))
b <- .h5_read_data(
x@hdf5_file, x@sample_id[i], "chrom_peak_data",
ms_level = ms_level, read_colnames = TRUE, i = idx,
read_rownames = FALSE)[[1L]]
b$ms_level <- rep(ms_level, length(idx))
cp, rt[j, ], mz[j, ], type = chromPeaks), useNames = FALSE)
li <- length(idx)
a <- cbind(cp[idx, , drop = FALSE], sample = rep(i, li))
b <- extractROWS(cd, idx)
b$ms_level <- rep(ms_level, li)
attr(b, "row.names") <- rownames(a)
tmp <- chr@.Data[j, i][[1L]]
slot(tmp, "chromPeaks", check = FALSE) <- a
slot(tmp, "chromPeakData", check = FALSE) <- as(b, "DataFrame")
chr@.Data[j, i][[1L]] <- tmp
chr@.Data[j, i][[1L]] <- tmp # this does not seem to cause copying
## Add mapping of features and chrom peaks for that sample/EIC
if (li && has_features) {
is_feature <- fidx[, 2L] %in% idx
if (any(is_feature)) {
f2p[[cnt]] <- cbind(rownames(fd)[fidx[is_feature, 1L]],
rownames(cp)[fidx[is_feature, 2L]],
rep(as.character(j), sum(is_feature)))
cnt <- cnt + 1L
}
}
}
pb$tick()
}
pb$tick()
if (hasFeatures(x, ms_level)) {
stop("Not yet implemented")
## Somehow add features.
if (has_features) {
message("Processing features")
## Define the featureDefinitions and assign chrom peaks to each,
## matching the same `"row"` (!).
f2p <- do.call(base::rbind, f2p) # mapping of features and chrom peaks
cp <- chromPeaks(chr) # chrom peaks to calculate index
cp_row_id <- paste0(rownames(cp), ".", cp[, "row"])
ft_cp_row_id <- paste0(f2p[, 2], ".", f2p[, 3])
ft_row_id <- paste0(f2p[, 1], ".", f2p[, 3])
peakidx <- lapply(split(ft_cp_row_id, ft_row_id),
base::match, table = cp_row_id)
id <- strsplit(names(peakidx), ".", fixed = TRUE)
fts <- fd[vapply(id, `[`, i = 1L, FUN.VALUE = NA_character_), ]
fts$peakidx <- unname(peakidx)
fts$row <- vapply(id, function(z) as.integer(z[2L]), NA_integer_)
fts$ms_level <- ms_level
slot(chr, "featureDefinitions", check = FALSE) <-
DataFrame(extractROWS(fts, order(fts$row)))
}
chr@.processHistory <- x@processHistory
slot(chr, ".processHistory", check = FALSE) <- x@processHistory
chr
}


################################################################################
##
## HDF5 FUNCTIONALITY
Expand Down Expand Up @@ -805,7 +849,6 @@ NULL
rhdf5::h5ls(g, recursive = recursive, datasetinfo = FALSE)$name
}


.h5_ms_levels <- function(h5, sample_id) {
nms <- .h5_dataset_names(paste0("/", sample_id), h5)
as.integer(unique(sub("ms_", "", grep("^ms", nms, value = TRUE))))
Expand Down Expand Up @@ -909,7 +952,7 @@ NULL
#'
#' @noRd
.h5_check_mod_count <- function(h5, mod_count = 0L) {
mc <- rhdf5::h5read(h5, "/header/modcount")[1L]
mc <- .h5_mod_count(h5)
if (mc != mod_count)
stop("The HDF5 file was changed by a different process. This xcms ",
"result object/variable is no longer valid.")
Expand Down Expand Up @@ -965,13 +1008,17 @@ NULL
level = comp_level)
}

.h5_mod_count <- function(h5) {
rhdf5::h5read(h5, "/header/modcount")[1L]
}

#' Every writing operation should increate the "mod count", i.e. the
#' count of data modifications. This function increases the mod count by +1
#' and returns this value.
#'
#' @noRd
.h5_increment_mod_count <- function(h5) {
mc <- rhdf5::h5read(h5, "/header/modcount")[1L] + 1L
mc <- .h5_mod_count(h5) + 1L
rhdf5::h5write(mc, h5, "/header/modcount",
level = .h5_compression_level())
mc
Expand Down
40 changes: 13 additions & 27 deletions R/XcmsExperimentHdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,14 @@ setMethod(
## })

#' TODO:
#' - add chunkSize to chromPeaks?
#' - chromPeakData,XcmsExperimentHdf5
#' - adjustRtime,XcmsExperimentHdf5
#' - fillChromPeaks,XcmsExperimentHdf5
#' - filterMsLevel
#' - filterRt
#' - filterMz
#'
#' @noRd
NULL


################################################################################
##
## RETENTION TIME ALIGNMENT
Expand Down Expand Up @@ -594,7 +594,7 @@ setMethod(
unique(c(object@features_ms_level, msLevel)))
cpk_idx <- res$peakidx
res$peakidx <- NULL
rownames(res) <- .featureIDs(
attr(res, "row.names") <- .featureIDs(
nrow(res), prefix = paste0("FT", msLevel), min_len = 6)
## Save features to "/features/ms_<msLevel>/feature_definitions"
.h5_write_data(object@hdf5_file, list(features = res),
Expand Down Expand Up @@ -705,15 +705,6 @@ setMethod(
##
################################################################################

#' While previously we were first extracting the chromatograms and then adding
#' the chrom peaks later for `XcmsExperimentHdf5` it might be more efficient to
#' also extract the chrom peaks in the loop/chunk processing. So, essentially:
#' - process `object` chunk-wise
#' - for each chunk:
#' - extract chromatograms (in parallel?)
#' - get chrom peaks for each sample/chrom peak.
#'
#' @noRd
#' @rdname hidden_aliases
setMethod(
"chromatogram", "XcmsExperimentHdf5",
Expand All @@ -737,19 +728,14 @@ setMethod(
mz <- cbind(rep(-Inf, nrow(rt)), rep(Inf, nrow(rt)))
return.type <- match.arg(return.type)
chromPeaks <- match.arg(chromPeaks)
if (!hasChromPeaks(object, msLevel))
chromPeaks <- "none"
if (hasAdjustedRtime(object))
object <- applyAdjustedRtime(object)
## process the data in chunks.
## in each chunk: get chromatograms, load chrom peaks and process those.
## ? how to get/define the features too? get the feature indices?
## Implementation notes:
## XChromatogram has slots @chromPeaks (matrix) @chromPeakData (DataFrame)
## XChromatograms has slot @featureDefinitions (DataFrame)


.xmse_extract_chromatograms_old(
object, rt = rt, mz = mz, aggregationFun = aggregationFun,
msLevel = msLevel, isolationWindow = isolationWindowTargetMz,
chunkSize = chunkSize, chromPeaks = chromPeaks,
return.type = return.type, BPPARAM = BPPARAM)
.h5_x_chromatograms(
object, ms_level = msLevel, chromPeaks = chromPeaks,
mz = mz, rt = rt, aggregationFun = aggregationFun,
chunkSize = chunkSize, return.type = return.type,
isolationWindow = isolationWindowTargetMz,
BPPARAM = BPPARAM)
})
2 changes: 1 addition & 1 deletion R/functions-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -922,4 +922,4 @@ groupOverlaps <- function(xmin, xmax) {
PACKAGE = "xcms")
cbind(rtime = scantime[scns],
intensity = res$intensity)
}
}
10 changes: 5 additions & 5 deletions R/methods-XChromatograms.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

setAs("MChromatograms", "XChromatograms", function(from) {
res <- new("XChromatograms")
res@.Data <- matrix(lapply(from, function(z) {
slot(res, ".Data", check = FALSE) <- matrix(lapply(from@.Data, function(z) {
if (is(z, "Chromatogram"))
as(z, "XChromatogram")
else z
}), nrow = nrow(from), ncol = ncol(from), dimnames = dimnames(from))
res@phenoData <- from@phenoData
res@featureData <- from@featureData
if (validObject(resetClass)) res
slot(res, "phenoData", check = FALSE) <- from@phenoData
slot(res, "featureData", check = FALSE) <- from@featureData
res
})

#' @rdname XChromatogram
Expand Down Expand Up @@ -676,4 +676,4 @@ setMethod("transformIntensity", "XChromatograms", function(object,
nrow = nrow(object),
dimnames = dimnames(object))
object
})
})
5 changes: 1 addition & 4 deletions man/XcmsExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/XcmsExperimentHdf5.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7bd035d

Please sign in to comment.