diff --git a/DESCRIPTION b/DESCRIPTION index e5830fa16..9002911c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xcms -Version: 1.51.4 -Date: 2017-01-16 +Version: 2.99.1 +Date: 2017-05-10 Title: LC/MS and GC/MS Data Analysis Author: Colin A. Smith , Ralf Tautenhahn , @@ -12,19 +12,19 @@ Maintainer: Steffen Neumann Depends: R (>= 2.14.0), methods, + Biobase, + BiocParallel (>= 1.8.0), + MSnbase (>= 2.1.10) +Imports: mzR (>= 1.1.6), BiocGenerics, ProtGenerics, - Biobase, - MSnbase (>= 2.1.4) -Imports: lattice, RColorBrewer, plyr, RANN, multtest, MassSpecWavelet (>= 1.5.2), - BiocParallel, S4Vectors Suggests: BiocStyle, @@ -45,8 +45,8 @@ URL: http://metlin.scripps.edu/download/ and https://github.com/sneumann/xcms VignetteBuilder: knitr BugReports: https://github.com/sneumann/xcms/issues/new biocViews: MassSpectrometry, Metabolomics -RoxygenNote: 5.0.1 -Collate: +RoxygenNote: 6.0.1 +Collate: 'AllGenerics.R' 'DataClasses.R' 'Deprecated.R' @@ -54,14 +54,17 @@ Collate: 'c.R' 'cwTools.R' 'databases.R' + 'functions-MsFeatureData.R' + 'do_adjustRtime-functions.R' 'functions-binning.R' - 'do_detectFeatures-functions.R' + 'do_findChromPeaks-functions.R' + 'functions-Params.R' + 'do_groupChromPeaks-functions.R' 'fastMatch.R' + 'functions-Chromatogram.R' 'functions-utils.R' 'functions-IO.R' - 'functions-MsFeatureData.R' 'functions-OnDiskMSnExp.R' - 'functions-Params.R' 'functions-ProcessHistory.R' 'functions-XCMSnExp.R' 'functions-xcmsEIC.R' @@ -70,6 +73,7 @@ Collate: 'functions-xcmsSet.R' 'init.R' 'matchpeaks.R' + 'methods-Chromatogram.R' 'methods-IO.R' 'methods-MsFeatureData.R' 'methods-OnDiskMSnExp.R' diff --git a/NAMESPACE b/NAMESPACE index 8b6f724f0..0ac1d5175 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,16 +4,23 @@ importFrom("utils", "capture.output") import("methods") importFrom("ProtGenerics", "peaks") importFrom("BiocGenerics", "updateObject", "fileName") -import("Biobase") -importFrom("graphics", "plot", "image", "boxplot") +## import("Biobase") +importFrom("Biobase", "AnnotatedDataFrame") +importClassesFrom("Biobase", "AnnotatedDataFrame", "Versioned") +importMethodsFrom("Biobase", "classVersion", "classVersion<-", "phenoData", + "phenoData<-", "pData", "rowMedians") + +importFrom("graphics", "plot", "image", "boxplot", "matplot", "rect", "axis") importFrom("mzR", "peaks", "close", "openMSfile", "header") importFrom("lattice", "levelplot", "panel.rect", "panel.levelplot") importFrom("plyr", "rbind.fill") import("RColorBrewer") import("BiocParallel") +## import("S4Vectors") importClassesFrom("S4Vectors", "Rle", "DataFrame") importFrom("S4Vectors", "split", "Rle", "DataFrame") +importMethodsFrom("S4Vectors", "as.matrix") ## Additional imports proposed by R CMD check: importFrom("graphics", "abline", "barplot", "close.screen", "hist", @@ -43,7 +50,8 @@ importMethodsFrom("MSnbase", "intensity", "mz", "rtime", "fileNames", "fromFile" "spectra", "impute", "isCentroided", "polarity", "[", "bin", "clean", "featureNames", "filterAcquisitionNum", "filterMz", "filterRt", "normalize", "pickPeaks", "removePeaks", - "removeReporters", "smooth", "trimMz") + "removeReporters", "smooth", "trimMz", "splitByFile", "[[", + "spectrapply", "peaksCount", "precursorMz") importFrom("MSnbase", "as.data.frame.Spectrum") export( @@ -182,21 +190,34 @@ export( "binYonX", "breaks_on_binSize", "breaks_on_nBins", - "do_detectFeatures_centWave", - "do_detectFeatures_massifquant", - "do_detectFeatures_matchedFilter", - "do_detectFeatures_MSW", - "do_detectFeatures_centWaveWithPredIsoROIs", - "do_detectFeatures_addPredIsoROIs", + "do_findChromPeaks_centWave", + "do_findChromPeaks_massifquant", + "do_findChromPeaks_matchedFilter", + "do_findPeaks_MSW", + "do_findChromPeaks_centWaveWithPredIsoROIs", + "do_findChromPeaks_addPredIsoROIs", "imputeLinInterpol", "useOriginalCode", - "setAs" + "setAs", + "do_groupChromPeaks_density", + "do_groupPeaks_mzClust", + "do_groupChromPeaks_nearest", + "Chromatogram", + "do_adjustRtime_peakGroups", + "processHistoryTypes", + "adjustRtimePeakGroups", + "plotAdjustedRtime", + "plotChromatogram", + "highlightChromPeaks", + "plotChromPeakDensity" ) ## New analysis methods exportMethods( "showError", - "detectFeatures" + "findChromPeaks", + "groupChromPeaks", + "adjustRtime" ) ## New Param classes @@ -205,7 +226,14 @@ exportClasses( "MatchedFilterParam", "MassifquantParam", "MSWParam", - "CentWavePredIsoParam" + "CentWavePredIsoParam", + "PeakDensityParam", + "MzClustParam", + "NearestPeaksParam", + "PeakGroupsParam", + "ObiwarpParam", + "GenericParam", + "FillChromPeaksParam" ) ## Param methods exportMethods( @@ -294,27 +322,86 @@ exportMethods( "mzIntervalExtension<-", "polarity", "polarity<-", - "rtime", - "mz", - "intensity" + ## PeakDensityParam + "sampleGroups", + "sampleGroups<-", + "bw", + "bw<-", + "minFraction", + "minFraction<-", + "minSamples", + "minSamples<-", + "maxFeatures", + "maxFeatures<-", + ## MzClustParam + "absMz", + "absMz<-", + ## NearestPeaksParam + "absRt", + "absRt<-", + "mzVsRtBalance", + "mzVsRtBalance<-", + "kNN", + "kNN<-", + "extraPeaks", + "extraPeaks<-", + "smooth", + "smooth<-", + "span", + "span<-", + "family", + "family<-", + ## PeakGroupsParam + "peakGroupsMatrix", + "peakGroupsMatrix<-", + ## ObiwarpParam + "centerSample", + "centerSample<-", + "response", + "response<-", + "distFun", + "distFun<-", + "gapInit", + "gapInit<-", + "gapExtend", + "gapExtend<-", + "factorDiag", + "factorDiag<-", + "factorGap", + "factorGap<-", + "localAlignment", + "localAlignment<-", + "initPenalty", + "initPenalty<-", + ## FillChromPeaksParam + "expandMz", + "expandMz<-", + "expandRt", + "expandRt<-" ) ## Param class functions export("CentWaveParam", "MatchedFilterParam", "MassifquantParam", "MSWParam", - "CentWavePredIsoParam") + "CentWavePredIsoParam", "PeakDensityParam", "MzClustParam", + "NearestPeaksParam", "PeakGroupsParam", "ObiwarpParam", "GenericParam", + "FillChromPeaksParam") ## Param class methods. ## New Classes -exportClasses("XCMSnExp", "MsFeatureData", "ProcessHistory", "XProcessHistory") +exportClasses("XCMSnExp", "MsFeatureData", "ProcessHistory", + "Chromatogram", + "XProcessHistory" + ) ## New methods for these classes -exportMethods("hasDetectedFeatures", - "hasAlignedFeatures", +exportMethods("hasChromPeaks", + "hasFeatures", "hasAdjustedRtime", "adjustedRtime", "adjustedRtime<-", - "featureGroups", - "featureGroups<-", - "features", - "features<-", + "featureDefinitions", + "featureDefinitions<-", + "featureValues", + "chromPeaks", + "chromPeaks<-", "processHistory", "fileIndex", "processDate", @@ -333,7 +420,18 @@ exportMethods("hasDetectedFeatures", "removePeaks", "smooth", "dropAdjustedRtime", - "dropFeatureGroups", - "dropFeatures", - "spectra" + "dropFeatureDefinitions", + "dropChromPeaks", + "spectra", + "rtime", + "mz", + "intensity", + "aggregationFun", + "extractChromatograms", + "precursorMz", + "productMz", + "fillChromPeaks", + "as.data.frame", + "dropFilledChromPeaks", + "extractMsData" ) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 642a7bb42..46380832b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -3,13 +3,21 @@ ## A setGeneric("absent", function(object, class, minfrac) standardGeneric("absent")) +setGeneric("absMz", function(object, ...) standardGeneric("absMz")) +setGeneric("absMz<-", function(object, value) standardGeneric("absMz<-")) +setGeneric("absRt", function(object, ...) standardGeneric("absRt")) +setGeneric("absRt<-", function(object, value) standardGeneric("absRt<-")) setGeneric("addParams", function(object, ...) standardGeneric("addParams")) setGeneric("addParams<-", function(object, value) standardGeneric("addParams<-")) setGeneric("addProcessHistory", function(object, ...) standardGeneric("addProcessHistory")) +setGeneric("adjustRtime", function(object, param, ...) + standardGeneric("adjustRtime")) setGeneric("adjustedRtime", function(object, ...) standardGeneric("adjustedRtime")) setGeneric("adjustedRtime<-", function(object, value) standardGeneric("adjustedRtime<-")) +setGeneric("aggregationFun", function(object, ...) + standardGeneric("aggregationFun")) setGeneric("ampTh", function(object, ...) standardGeneric("ampTh")) setGeneric("ampTh<-", function(object, value) standardGeneric("ampTh<-")) setGeneric("AutoLockMass", function(object) standardGeneric("AutoLockMass")) @@ -19,11 +27,19 @@ setGeneric("baseValue", function(object, ...) standardGeneric("baseValue")) setGeneric("baseValue<-", function(object, value) standardGeneric("baseValue<-")) setGeneric("binSize", function(object, ...) standardGeneric("binSize")) setGeneric("binSize<-", function(object, value) standardGeneric("binSize<-")) +setGeneric("bw", function(object) standardGeneric("bw")) +setGeneric("bw<-", function(object, value) standardGeneric("bw<-")) ## C setGeneric("calibrate", function(object, ...) standardGeneric("calibrate")) setGeneric("checkBack", function(object, ...) standardGeneric("checkBack")) +setGeneric("centerSample", function(object) standardGeneric("centerSample")) +setGeneric("centerSample<-", function(object, value) + standardGeneric("centerSample<-")) setGeneric("checkBack<-", function(object, value) standardGeneric("checkBack<-")) +setGeneric("chromPeaks", function(object, ...) standardGeneric("chromPeaks")) +setGeneric("chromPeaks<-", function(object, value) + standardGeneric("chromPeaks<-")) setGeneric("collect", function(object, ...) standardGeneric("collect")) setGeneric("consecMissedLimit", function(object, ...) standardGeneric("consecMissedLimit")) @@ -36,34 +52,64 @@ setGeneric("criticalValue<-", function(object, value) ## D setGeneric("deepCopy", function(object) standardGeneric("deepCopy")) -setGeneric("detectFeatures", function(object, param, ...) - standardGeneric("detectFeatures")) setGeneric("diffreport", function(object, ...) standardGeneric("diffreport")) setGeneric("distance", function(object, ...) standardGeneric("distance")) setGeneric("distance<-", function(object, value) standardGeneric("distance<-")) +setGeneric("distFun", function(object) standardGeneric("distFun")) +setGeneric("distFun<-", function(object, value) standardGeneric("distFun<-")) setGeneric("dropAdjustedRtime", function(object, ...) standardGeneric("dropAdjustedRtime")) -setGeneric("dropFeatureGroups", function(object, ...) - standardGeneric("dropFeatureGroups")) -setGeneric("dropFeatures", function(object, ...) - standardGeneric("dropFeatures")) +setGeneric("dropChromPeaks", function(object, ...) + standardGeneric("dropChromPeaks")) +setGeneric("dropFeatureDefinitions", function(object, ...) + standardGeneric("dropFeatureDefinitions")) +setGeneric("dropFilledChromPeaks", function(object, ...) + standardGeneric("dropFilledChromPeaks")) + +## E +setGeneric("expandMz", function(object, ...) + standardGeneric("expandMz")) +setGeneric("expandMz<-", function(object, value) + standardGeneric("expandMz<-")) +setGeneric("expandRt", function(object, ...) + standardGeneric("expandRt")) +setGeneric("expandRt<-", function(object, value) + standardGeneric("expandRt<-")) +setGeneric("extraPeaks", function(object, ...) + standardGeneric("extraPeaks")) +setGeneric("extraPeaks<-", function(object, value) + standardGeneric("extraPeaks<-")) +setGeneric("extractChromatograms", function(object, ...) + standardGeneric("extractChromatograms")) +setGeneric("extractMsData", function(object, ...) + standardGeneric("extractMsData")) ## F -setGeneric("features", function(object, ...) standardGeneric("features")) -setGeneric("features<-", function(object, value) - standardGeneric("features<-")) -setGeneric("featureGroups", function(object, ...) standardGeneric("featureGroups")) -setGeneric("featureGroups<-", function(object, value) - standardGeneric("featureGroups<-")) +setGeneric("factorDiag", function(object) standardGeneric("factorDiag")) +setGeneric("factorDiag<-", function(object, value) standardGeneric("factorDiag<-")) +setGeneric("factorGap", function(object) standardGeneric("factorGap")) +setGeneric("factorGap<-", function(object, value) standardGeneric("factorGap<-")) +setGeneric("family", function(object, ...) standardGeneric("family")) +setGeneric("family<-", function(object, value) standardGeneric("family<-")) +setGeneric("featureDefinitions", function(object, ...) + standardGeneric("featureDefinitions")) +setGeneric("featureDefinitions<-", function(object, value) + standardGeneric("featureDefinitions<-")) +setGeneric("featureValues", function(object, ...) + standardGeneric("featureValues")) setGeneric("fileIndex", function(object) standardGeneric("fileIndex")) setGeneric("fileIndex<-", function(object, value) standardGeneric("fileIndex<-")) setGeneric("filepaths", function(object) standardGeneric("filepaths")) setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-")) +setGeneric("fillChromPeaks", function(object, param, ...) + standardGeneric("fillChromPeaks")) setGeneric("fillPeaks.chrom", function(object, ...) standardGeneric("fillPeaks.chrom")) setGeneric("fillPeaks.MSW", function(object, ...) standardGeneric("fillPeaks.MSW")) setGeneric("fillPeaks", function(object, ...) standardGeneric("fillPeaks")) +setGeneric("findChromPeaks", function(object, param, ...) + standardGeneric("findChromPeaks")) setGeneric("findMZ", function(object, find, ppmE=25, print=TRUE) standardGeneric("findMZ")) setGeneric("findmzROI", function(object, ...) standardGeneric("findmzROI")) @@ -96,6 +142,10 @@ setGeneric("fwhm", function(object, ...) standardGeneric("fwhm")) setGeneric("fwhm<-", function(object, value) standardGeneric("fwhm<-")) ## G +setGeneric("gapExtend", function(object) standardGeneric("gapExtend")) +setGeneric("gapExtend<-", function(object, value) standardGeneric("gapExtend<-")) +setGeneric("gapInit", function(object) standardGeneric("gapInit")) +setGeneric("gapInit<-", function(object, value) standardGeneric("gapInit<-")) setGeneric("getEIC", function(object, ...) standardGeneric("getEIC")) setGeneric("getMsnScan", function(object, ...) standardGeneric("getMsnScan")) setGeneric("getPeaks", function(object, ...) standardGeneric("getPeaks")) @@ -107,6 +157,8 @@ setGeneric("group.density", function(object, ...) standardGeneric("group.density setGeneric("group.mzClust", function(object, ...) standardGeneric("group.mzClust")) setGeneric("group.nearest", function(object, ...) standardGeneric("group.nearest")) setGeneric("group", function(object, ...) standardGeneric("group")) +setGeneric("groupChromPeaks", function(object, param, ...) + standardGeneric("groupChromPeaks")) setGeneric("groupidx", function(object) standardGeneric("groupidx")) setGeneric("groupidx<-", function(object, value) standardGeneric("groupidx<-")) setGeneric("groupnames", function(object, ...) standardGeneric("groupnames")) @@ -118,10 +170,10 @@ setGeneric("groupval", function(object, ...) standardGeneric("groupval")) setGeneric("hasMSn", function(object, ...) standardGeneric("hasMSn")) setGeneric("hasAdjustedRtime", function(object, ...) standardGeneric("hasAdjustedRtime")) -setGeneric("hasAlignedFeatures", function(object, ...) - standardGeneric("hasAlignedFeatures")) -setGeneric("hasDetectedFeatures", function(object, ...) - standardGeneric("hasDetectedFeatures")) +setGeneric("hasFeatures", function(object, ...) + standardGeneric("hasFeatures")) +setGeneric("hasChromPeaks", function(object, ...) + standardGeneric("hasChromPeaks")) ## I @@ -132,10 +184,19 @@ setGeneric("index<-", function(object, value) standardGeneric("index<-")) setGeneric("integrate") ##setGeneric("integrate", function(object, ...) standardGeneric("integrate")) setGeneric("integrate<-", function(object, value) standardGeneric("integrate<-")) +setGeneric("initPenalty", function(object) standardGeneric("initPenalty")) +setGeneric("initPenalty<-", function(object, value) standardGeneric("initPenalty<-")) setGeneric("isCentroided", function(object, ...) standardGeneric("isCentroided")) +## K +setGeneric("kNN", function(object, ...) standardGeneric("kNN")) +setGeneric("kNN<-", function(object, value) standardGeneric("kNN<-")) + + ## L setGeneric("levelplot", function(x, data, ...) standardGeneric("levelplot")) +setGeneric("localAlignment", function(object) standardGeneric("localAlignment")) +setGeneric("localAlignment<-", function(object, value) standardGeneric("localAlignment<-")) setGeneric("loadRaw", function(object, ...) standardGeneric("loadRaw")) ## M @@ -144,19 +205,25 @@ setGeneric("max") setGeneric("max<-", function(object, value) standardGeneric("max<-")) setGeneric("maxCharge", function(object) standardGeneric("maxCharge")) setGeneric("maxCharge<-", function(object, value) standardGeneric("maxCharge<-")) +setGeneric("maxFeatures", function(object) standardGeneric("maxFeatures")) +setGeneric("maxFeatures<-", function(object, value) standardGeneric("maxFeatures<-")) setGeneric("maxIso", function(object) standardGeneric("maxIso")) setGeneric("maxIso<-", function(object, value) standardGeneric("maxIso<-")) setGeneric("makeacqNum", function(object, freq, start=1) standardGeneric("makeacqNum")) +setGeneric("minFraction", function(object) standardGeneric("minFraction")) +setGeneric("minFraction<-", function(object, value) standardGeneric("minFraction<-")) setGeneric("minNoiseLevel", function(object, ...) standardGeneric("minNoiseLevel")) setGeneric("minNoiseLevel<-", function(object, value) standardGeneric("minNoiseLevel<-")) +setGeneric("minSamples", function(object) standardGeneric("minSamples")) +setGeneric("minSamples<-", function(object, value) standardGeneric("minSamples<-")) setGeneric("mslevel", function(object, ...) standardGeneric("mslevel")) setGeneric("mslevel<-", function(object, value) standardGeneric("mslevel<-")) setGeneric("msnparent2ms", function(object, ...) standardGeneric("msnparent2ms")) setGeneric("msn2ms", function(object, ...) standardGeneric("msn2ms")) setGeneric("mzdiff", function(object, ...) standardGeneric("mzdiff")) setGeneric("mzdiff<-", function(object, value) standardGeneric("mzdiff<-")) -setGeneric("mzrange", function(object) standardGeneric("mzrange")) +setGeneric("mzrange", function(object, ...) standardGeneric("mzrange")) setGeneric("mzCenterFun", function(object, ...) standardGeneric("mzCenterFun")) setGeneric("mzCenterFun<-", function(object, value) standardGeneric("mzCenterFun<-")) @@ -164,6 +231,10 @@ setGeneric("mzIntervalExtension", function(object, ...) standardGeneric("mzIntervalExtension")) setGeneric("mzIntervalExtension<-", function(object, value) standardGeneric("mzIntervalExtension<-")) +setGeneric("mzVsRtBalance", function(object, ...) + standardGeneric("mzVsRtBalance")) +setGeneric("mzVsRtBalance<-", function(object, value) + standardGeneric("mzVsRtBalance<-")) ## N setGeneric("nearbyPeak", function(object, ...) standardGeneric("nearbyPeak")) @@ -172,6 +243,10 @@ setGeneric("noise", function(object, ...) standardGeneric("noise")) setGeneric("noise<-", function(object, value) standardGeneric("noise<-")) ## P +setGeneric("peakGroupsMatrix", function(object, ...) + standardGeneric("peakGroupsMatrix")) +setGeneric("peakGroupsMatrix<-", function(object, value) + standardGeneric("peakGroupsMatrix<-")) setGeneric("peaks<-", function(object, value) standardGeneric("peaks<-")) setGeneric("peakScaleRange", function(object, ...) standardGeneric("peakScaleRange")) @@ -208,6 +283,7 @@ setGeneric("processParam<-", function(object, value) setGeneric("processType", function(object, ...) standardGeneric("processType")) setGeneric("processType<-", function(object, value) standardGeneric("processType<-")) setGeneric("processHistory", function(object, ...) standardGeneric("processHistory")) +setGeneric("productMz", function(object, value) standardGeneric("productMz")) setGeneric("profinfo", function(object) standardGeneric("profinfo")) setGeneric("profinfo<-", function(object, value) standardGeneric("profinfo<-")) setGeneric("profMat", function(object, ...) standardGeneric("profMat")) @@ -227,6 +303,8 @@ setGeneric("progressInfoUpdate", function(object) standardGeneric("progressInfoU setGeneric("rawEIC", function(object, ...) standardGeneric("rawEIC")) setGeneric("rawMat", function(object, ...) standardGeneric("rawMat")) setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ")) +setGeneric("response", function(object) standardGeneric("response")) +setGeneric("response<-", function(object, value) standardGeneric("response<-")) setGeneric("retcor", function(object, ...) standardGeneric("retcor")) setGeneric("retcor.peakgroups", function(object, ...) standardGeneric("retcor.peakgroups")) setGeneric("retcor.obiwarp", function(object, ...) standardGeneric("retcor.obiwarp")) @@ -242,15 +320,19 @@ setGeneric("rtrange", function(object) standardGeneric("rtrange")) ## S setGeneric("sampclass", function(object) standardGeneric("sampclass")) setGeneric("sampclass<-", function(object, value) standardGeneric("sampclass<-")) +setGeneric("sampleGroups", function(object) standardGeneric("sampleGroups")) +setGeneric("sampleGroups<-", function(object, value) + standardGeneric("sampleGroups<-")) setGeneric("sampnames", function(object) standardGeneric("sampnames")) setGeneric("sampnames<-", function(object, value) standardGeneric("sampnames<-")) setGeneric("scales", function(object, ...) standardGeneric("scales")) setGeneric("scales<-", function(object, value) standardGeneric("scales<-")) setGeneric("scanrange", function(object, ...) standardGeneric("scanrange")) setGeneric("scanrange<-", function(object, value) standardGeneric("scanrange<-")) -setGeneric("sigma") +setGeneric("sigma", function(object, value) standardGeneric("sigma")) setGeneric("sigma<-", function(object, value) standardGeneric("sigma<-")) setGeneric("showError", function(object, ...) standardGeneric("showError")) +setGeneric("smooth<-", function(object, value) standardGeneric("smooth<-")) setGeneric("snthresh", function(object, ...) standardGeneric("snthresh")) setGeneric("snthresh<-", function(object, value) standardGeneric("snthresh<-")) setGeneric("snthreshIsoROIs", function(object, ...) @@ -258,6 +340,8 @@ setGeneric("snthreshIsoROIs", function(object, ...) setGeneric("snthreshIsoROIs<-", function(object, value) standardGeneric("snthreshIsoROIs<-")) setGeneric("sortMz", function(object, ...) standardGeneric("sortMz")) +setGeneric("span", function(object, ...) standardGeneric("span")) +setGeneric("span<-", function(object, value) standardGeneric("span<-")) setGeneric("specDist", function(object, ...) standardGeneric("specDist")) setGeneric("specDist.meanMZmatch", function(peakTable1, peakTable2, matchdist=1, matchrate=1, diff --git a/R/DataClasses.R b/R/DataClasses.R index ca1f34747..9bde64946 100644 --- a/R/DataClasses.R +++ b/R/DataClasses.R @@ -43,7 +43,7 @@ setClass("xcmsSet", progressCallback = function(progress) NULL, .processHistory = list()), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() ## Check if all slots are present. slNames <- slotNames(object) missingSlots <- character() @@ -52,10 +52,10 @@ setClass("xcmsSet", missingSlots <- c(missingSlots, slNames[i]) } if (length(missingSlots) > 0) - msg <- validMsg(msg, paste0("This xcmsSet lacks slot(s): ", - paste(missingSlots, collapse = ","), - ". Please update the object using", - " the 'updateObject' method.")) + msg <- c(msg, paste0("This xcmsSet lacks slot(s): ", + paste(missingSlots, collapse = ","), + ". Please update the object using", + " the 'updateObject' method.")) ## Check the .processHistory slot. if (!any(missingSlots == ".processHistory")) { inh <- unlist(lapply(object@.processHistory, @@ -63,16 +63,15 @@ setClass("xcmsSet", return(inherits(z, "ProcessHistory")) })) if (!all(inh)) - msg <- validMsg(msg, - paste0("Slot '.processHistory' should", - " only contain 'ProcessHistory'", - " objects!")) + msg <- c(msg, + paste0("Slot '.processHistory' should", + " only contain 'ProcessHistory'", + " objects!")) } - if (!is.null(msg)) + if (length(msg)) return(msg) return(TRUE) - } - ) + }) ############################################################ ## xcmsEIC @@ -167,29 +166,15 @@ setClass("xcmsRaw", representation(env = "environment", ############################################################ ## netCdfSource -setClass("netCdfSource", - ## representation(cdf="integer"), - contains="xcmsFileSource", - ## validity=function(object) { - ## if (!is.null(attr(object@cdf, "errortext"))) { - ## mzR:::netCDFClose(object@cdf) - ## attr(object@cdf, "errortext") - ## } else TRUE - ## } - ) +setClass("netCdfSource", contains="xcmsFileSource") ############################################################ ## rampSource -setClass("rampSource", - ## representation(rampid="integer"), - contains="xcmsFileSource", - ## validity=function(object) { - ## if (object@rampid < 0) { - ## mzR:::rampClose(object@rampid) - ## paste("Could not open mzML/mzXML/mzData file:", object) - ## } else TRUE - ## } - ) +setClass("rampSource", contains="xcmsFileSource") + +############################################################ +## pwizSource +setClass("pwizSource", contains="xcmsFileSource") ############################################################ ## xcmsPeaks @@ -198,37 +183,44 @@ setClass("xcmsPeaks", contains = "matrix") ############################################################ ## Processing history type statics .PROCSTEP.UNKNOWN <- "Unknown" -.PROCSTEP.FEATURE.DETECTION <- "Feature detection" -.PROCSTEP.FEATURE.ALIGNMENT <- "Feature alignment" +.PROCSTEP.PEAK.DETECTION <- "Peak detection" +.PROCSTEP.PEAK.GROUPING <- "Peak grouping" .PROCSTEP.RTIME.CORRECTION <- "Retention time correction" +.PROCSTEP.PEAK.FILLING <- "Missing peak filling" .PROCSTEPS <- c( .PROCSTEP.UNKNOWN, - .PROCSTEP.FEATURE.DETECTION, - .PROCSTEP.FEATURE.ALIGNMENT, - .PROCSTEP.RTIME.CORRECTION + .PROCSTEP.PEAK.DETECTION, + .PROCSTEP.PEAK.GROUPING, + .PROCSTEP.RTIME.CORRECTION, + .PROCSTEP.PEAK.FILLING ) ############################################################ ## ProcessHistory -##' @aliases ProcessHistory -##' @title Tracking data processing -##' -##' @description Objects of the type \code{ProcessHistory} allow to keep track -##' of any data processing step in an metabolomics experiment. They are created -##' by the data processing methods, such as \code{\link{detectFeatures}} and -##' added to the corresponding results objects. Thus, usually, users don't need -##' to create them. -##' -##' @slot type character(1): string defining the type of the processing step. -##' This string has to match predefined values defined in the internal variable -##' \code{.PROCSTEPS}. -##' -##' @slot date character(1): date time stamp when the processing step was started. -##' @slot info character(1): optional additional information. -##' @slot fileIndex integer of length 1 or > 1 to specify on which -##' samples of the object the processing was performed. -##' @slot error (ANY): used to store eventual calculation errors. -##' @rdname ProcessHistory-class +#' @aliases ProcessHistory +#' +#' @title Tracking data processing +#' +#' @description Objects of the type \code{ProcessHistory} allow to keep track +#' of any data processing step in an metabolomics experiment. They are +#' created by the data processing methods, such as +#' \code{\link{findChromPeaks}} and added to the corresponding results +#' objects. Thus, usually, users don't need to create them. +#' +#' @slot type character(1): string defining the type of the processing step. +#' This string has to match predefined values. Use +#' \code{\link{processHistoryTypes}} to list them. +#' +#' @slot date character(1): date time stamp when the processing step was started. +#' +#' @slot info character(1): optional additional information. +#' +#' @slot fileIndex integer of length 1 or > 1 to specify on which +#' samples of the object the processing was performed. +#' +#' @slot error (ANY): used to store eventual calculation errors. +#' +#' @rdname ProcessHistory-class setClass("ProcessHistory", slots = c( type = "character", @@ -244,27 +236,28 @@ setClass("ProcessHistory", info = character(), fileIndex = integer(), ## This can be of length 1 or > 1. error = NULL -## new("Versioned", versions = c(ProcessHistory = "0.0.2")) ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() ## check type: if (!any(object@type == .PROCSTEPS)) - msg <- validMsg(msg, paste0("Got invalid type '", object@type, - "'! Allowd are: ", - paste0("\"", .PROCSTEPS, "\"", - collapse = ", "))) + msg <- c(msg, paste0("Got invalid type '", object@type, + "'! Allowd are: ", + paste0("\"", .PROCSTEPS, "\"", + collapse = ", "))) if (length(object@type) > 1) - msg <- validMsg(msg, paste0("length of 'type' should not be ", - "larger than 1!")) + msg <- c(msg, paste0("length of 'type' should not be ", + "larger than 1!")) if (length(object@date) > 1) - msg <- validMsg(msg, paste0("length of 'date' should not be ", - "larger than 1!")) + msg <- c(msg, paste0("length of 'date' should not be ", + "larger than 1!")) if (length(object@info) > 1) - msg <- validMsg(msg, paste0("length of 'info' should not be ", - "larger than 1!")) - if (is.null(msg)) TRUE - else msg + msg <- c(msg, paste0("length of 'info' should not be ", + "larger than 1!")) + if (length(msg)) + msg + else + TRUE } ) @@ -275,16 +268,70 @@ setClass("Param", contains = c("Versioned")) setClassUnion("ParamOrNULL", c("Param", "NULL")) -##' @aliases XProcessHistory -##' @title Tracking data processing -##' -##' @description The \code{XProcessHistory} extends the \code{ProcessHistory} by -##' adding a slot \code{param} that allows to store the actual parameter class -##' of the processing step. -##' -##' @slot param (Param): an object of type \code{Param} (e.g. -##' \code{\link{CentWaveParam}}) specifying the settings of the processing step. -##' @rdname ProcessHistory-class +#' @aliases GenericParam +#' +#' @title Generic parameter class +#' +#' @description The \code{GenericParam} class allows to store generic parameter +#' information such as the name of the function that was/has to be called +#' (slot \code{fun}) and its arguments (slot \code{args}). This object is +#' used to track the process history of the data processings of an +#' \code{\link{XCMSnExp}} object. This is in contrast to e.g. the +#' \code{\link{CentWaveParam}} object that is passed to the actual +#' processing method. +#' +#' @seealso \code{\link{processHistory}} for how to access the process history +#' of an \code{\link{XCMSnExp}} object. +#' +#' @slot fun \code{character} specifying the function name. +#' +#' @slot args \code{list} (ideally named) with the arguments to the +#' function. +#' +#' @slot .__classVersion__ the version of the class. +#' +#' @author Johannes Rainer +#' +#' @rdname GenericParam +#' +#' @examples +#' prm <- GenericParam(fun = "mean") +#' +#' prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE)) +setClass("GenericParam", + slots = c(fun = "character", + args = "list"), + contains = "Param", + prototype = prototype( + fun = character(), + args = list() + ), + validity = function(object) { + msg <- character() + if (length(object@args) > 0) + if (!length(object@fun) > 0) + msg <- c(msg, paste0("No function name specified in '@fun'", + " but got '@args'")) + if (length(object@fun) > 1) + msg <- c(msg, paste0("'@fun' has to be of length 1")) + if (length(msg)) msg + else TRUE + } + ) + +#' @aliases XProcessHistory +#' +#' @title Tracking data processing +#' +#' @description The \code{XProcessHistory} extends the \code{ProcessHistory} by +#' adding a slot \code{param} that allows to store the actual parameter +#' class of the processing step. +#' +#' @slot param (Param): an object of type \code{Param} (e.g. +#' \code{\link{CentWaveParam}}) specifying the settings of the processing +#' step. +#' +#' @rdname ProcessHistory-class setClass("XProcessHistory", slots = c( param = "ParamOrNULL" @@ -294,167 +341,192 @@ setClass("XProcessHistory", param = NULL ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@param) > 0) if(!is(object@param, "Param")) - msg <- validMsg(msg, - paste0("Only objects from type 'Param' ", - "allowed in slot '@param'! I got ", - class(object@param))) - if (is.null(msg)) TRUE - else msg + msg <- c(msg, + paste0("Only objects from type 'Param' ", + "allowed in slot '@param'! I got ", + class(object@param))) + if (length(msg)) msg + else TRUE }) - - -## General detectFeatures method. -##' @title Feature detection methods. -##' -##' @description The \code{detectFeature} methods are part of the modernized -##' \code{xcms} user interface. -##' -##' The implemented feature detection methods are: -##' \describe{ -##' \item{centWave}{feature detection using the \emph{centWave} method. -##' See \code{\link{centWave}} for more details.} -##' -##' \item{centWave with predicted isotopes}{feature detection using a two-step -##' centWave-based approach considering also feature isotopes. See -##' \code{\link{centWaveWithPredIsoROIs}} for more details.} -##' -##' \item{matchedFilter}{peak detection in chromatographic space. See -##' \code{\link{matchedFilter}} for more details.} -##' -##' \item{massifquant}{peak detection using the Kalman filter-based feature -##' method. See \code{\link{massifquant}} for more details.} -##' -##' \item{MSW}{single-spectrum non-chromatography MS data feature detection. -##' See \code{\link{MSW}} for more details.} -##' -##' } -##' @name detectFeatures -##' @family feature detection methods -##' @seealso \code{\link{findPeaks}} for the \emph{old} feature detection -##' methods. -##' @author Johannes Rainer +#' @aliases findChromPeaks +#' +#' @title Chromatographic peak detection methods. +#' +#' @description The \code{findChromPeaks} methods perform the chromatographic +#' peak detection on LC/GC-MS data and are part of the modernized +#' \code{xcms} user interface. +#' +#' The implemented peak detection methods in chromatographic space are: +#' \describe{ +#' \item{centWave}{chromatographic peak detection using the \emph{centWave} +#' method. See \code{\link{centWave}} for more details.} +#' +#' \item{centWave with predicted isotopes}{peak detection using a two-step +#' centWave-based approach considering also feature isotopes. See +#' \code{\link{centWaveWithPredIsoROIs}} for more details.} +#' +#' \item{matchedFilter}{peak detection in chromatographic space. See +#' \code{\link{matchedFilter}} for more details.} +#' +#' \item{massifquant}{peak detection using the Kalman filter-based +#' method. See \code{\link{massifquant}} for more details.} +#' +#' \item{MSW}{single-spectrum non-chromatography MS data peak detection. +#' See \code{\link{MSW}} for more details.} +#' +#' } +#' +#' @name chromatographic-peak-detection +#' +#' @family peak detection methods +#' +#' @seealso \code{\link{findPeaks}} for the \emph{old} peak detection +#' methods. +#' +#' @author Johannes Rainer NULL #> NULL ## Main centWave documentation. -##' @title Feature detection using the centWave method -##' -##' @aliases centWave -##' -##' @description The centWave algorithm perform peak density and wavelet based -##' feature detection for high resolution LC/MS data in centroid -##' mode [Tautenhahn 2008]. -##' -##' @param ppm Maximal tolerated m/z deviation in consecutive scans in parts -##' per million (ppm). -##' @param peakwidth numeric(2) with the expected approximate -##' feature/peak width in chromatographic space. Given as a range (min, max) -##' in seconds. -##' @param snthresh numeric(1) defining the signal to noise ratio cutoff. -##' @param prefilter numeric(2): \code{c(k, I)} specifying the prefilter -##' step for the first analysis step (ROI detection). Mass traces are only -##' retained if they contain at least \code{k} peaks with intensity \code{>= I}. -##' @param mzCenterFun Name of the function to calculate the m/z center of the -##' feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -##' m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -##' use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -##' mean of the m/z value at the peak apex and the m/z values left and right of -##' it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -##' m/z values left and right of it. -##' @param integrate Integration method. For \code{integrate = 1} peak limits -##' are found through descent on the mexican hat filtered data, for -##' \code{integrate = 2} the descent is done on the real data. The latter method -##' is more accurate but prone to noise, while the former is more robust, but -##' less exact. -##' @param mzdiff Numeric representing the minimum difference in m/z dimension -##' for peaks with overlapping retention times; can be negatove to allow overlap. -##' @param fitgauss Logical whether or not a Gaussian should be fitted to each -##' peak. -##' @param noise numeric(1) allowing to set a minimum intensity required -##' for centroids to be considered in the first analysis step (centroids with -##' intensity \code{< noise} are omitted from ROI detection). -##' @param verboseColumns Logical whether additional feature meta data columns -##' should be returned. -##' @param roiList An optional list of regions-of-interest (ROI) representing -##' detected mass traces. If ROIs are submitted the first analysis step is -##' omitted and feature detection is performed on the submitted ROIs. Each -##' ROI is expected to have the following elements specified: -##' \code{scmin} (start scan index), \code{scmax} (end scan index), -##' \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} -##' (number of scans), \code{intensity} (summed intensity). Each ROI should be -##' represented by a \code{list} of elements or a single row \code{data.frame}. -##' @param firstBaselineCheck logical(1). If \code{TRUE} continuous -##' data within regions of interest is checked to be above the first baseline. -##' @param roiScales Optional numeric vector with length equal to \code{roiList} -##' defining the scale for each region of interest in \code{roiList} that should -##' be used for the centWave-wavelets. -##' -##' @details The centWave algorithm is most suitable for high resolution -##' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase the -##' method identifies \emph{regions of interest} (ROIs) representing mass traces -##' that are characterized as regions with less than \code{ppm} m/z deviation in -##' consecutive scans in the LC/MS map. These ROIs are then subsequently -##' analyzed using continuous wavelet transform (CWT) to locate chromatographic -##' peaks on different scales. The first analysis step is skipped, if regions -##' of interest are passed \emph{via} the \code{param} parameter. -##' -##' @note These methods and classes are part of the updated and modernized -##' \code{xcms} user interface which will eventually replace the -##' \code{\link{findPeaks}} methods. It supports feature detection on -##' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects (both defined in the \code{MSnbase} package). All of the settings -##' to the centWave algorithm can be passed with a \code{CentWaveParam} object. -##' -##' @family feature detection methods -##' @seealso The \code{\link{do_detectFeatures_centWave}} core API function and -##' \code{\link{findPeaks.centWave}} for the old user interface. -##' -##' @references -##' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly -##' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} -##' 2008, 9:504 -##' @name featureDetection-centWave -##' @author Ralf Tautenhahn, Johannes Rainer +#' @title Chromatographic peak detection using the centWave method +#' +#' @aliases centWave +#' +#' @description The centWave algorithm perform peak density and wavelet based +#' chromatographic peak detection for high resolution LC/MS data in centroid +#' mode [Tautenhahn 2008]. +#' +#' @param ppm \code{numeric(1)} defining the maximal tolerated m/z deviation in +#' consecutive scans in parts per million (ppm) for the initial ROI +#' definition. +#' +#' @param peakwidth \code{numeric(2)} with the expected approximate +#' peak width in chromatographic space. Given as a range (min, max) +#' in seconds. +#' +#' @param snthresh \code{numeric(1)} defining the signal to noise ratio cutoff. +#' +#' @param prefilter \code{numeric(2)}: \code{c(k, I)} specifying the prefilter +#' step for the first analysis step (ROI detection). Mass traces are only +#' retained if they contain at least \code{k} peaks with intensity +#' \code{>= I}. +#' +#' @param mzCenterFun Name of the function to calculate the m/z center of the +#' chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +#' mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +#' values, \code{"apex"}: use the m/z value at the peak apex, +#' \code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +#' peak apex and the m/z values left and right of it and \code{"meanApex3"}: +#' mean of the m/z value of the peak apex and the m/z values left and right +#' of it. +#' +#' @param integrate Integration method. For \code{integrate = 1} peak limits +#' are found through descent on the mexican hat filtered data, for +#' \code{integrate = 2} the descent is done on the real data. The latter +#' method is more accurate but prone to noise, while the former is more +#' robust, but less exact. +#' +#' @param mzdiff \code{numeric(1)} representing the minimum difference in m/z +#' dimension for peaks with overlapping retention times; can be negatove to +#' allow overlap. +#' +#' @param fitgauss \code{logical(1)} whether or not a Gaussian should be fitted +#' to each peak. +#' +#' @param noise \code{numeric(1)} allowing to set a minimum intensity required +#' for centroids to be considered in the first analysis step (centroids with +#' intensity \code{< noise} are omitted from ROI detection). +#' +#' @param verboseColumns \code{logical(1)} whether additional peak meta data +#' columns should be returned. +#' +#' @param roiList An optional list of regions-of-interest (ROI) representing +#' detected mass traces. If ROIs are submitted the first analysis step is +#' omitted and chromatographic peak detection is performed on the submitted +#' ROIs. Each ROI is expected to have the following elements specified: +#' \code{scmin} (start scan index), \code{scmax} (end scan index), +#' \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} +#' (number of scans), \code{intensity} (summed intensity). Each ROI should +#' be represented by a \code{list} of elements or a single row +#' \code{data.frame}. +#' +#' @param firstBaselineCheck \code{logical(1)}. If \code{TRUE} continuous +#' data within regions of interest is checked to be above the first baseline. +#' +#' @param roiScales Optional numeric vector with length equal to \code{roiList} +#' defining the scale for each region of interest in \code{roiList} that +#' should be used for the centWave-wavelets. +#' +#' @details The centWave algorithm is most suitable for high resolution +#' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase +#' the method identifies \emph{regions of interest} (ROIs) representing +#' mass traces that are characterized as regions with less than \code{ppm} +#' m/z deviation in consecutive scans in the LC/MS map. These ROIs are +#' then subsequently analyzed using continuous wavelet transform (CWT) +#' to locate chromatographic peaks on different scales. The first analysis +#' step is skipped, if regions of interest are passed \emph{via} the +#' \code{param} parameter. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{findPeaks}} methods. It supports peak detection on +#' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} +#' objects (both defined in the \code{MSnbase} package). All of the settings +#' to the centWave algorithm can be passed with a \code{CentWaveParam} +#' object. +#' +#' @family peak detection methods +#' +#' @seealso The \code{\link{do_findChromPeaks_centWave}} core API function and +#' \code{\link{findPeaks.centWave}} for the old user interface. +#' +#' @references +#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly +#' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} +#' 2008, 9:504 +#' +#' @name findChromPeaks-centWave +#' +#' @author Ralf Tautenhahn, Johannes Rainer NULL #> NULL -##' @description The \code{CentWaveParam} class allows to specify all settings for -##' a feature detection using the centWave method. Instances should be created -##' with the \code{CentWaveParam} constructor. -##' -##' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales See corresponding parameter above. \code{.__classVersion__} stores -##' the version from the class. Slots values should exclusively be accessed -##' \emph{via} the corresponding getter and setter methods listed above. -##' -##' @rdname featureDetection-centWave -##' -##' @examples -##' -##' ## Create a CentWaveParam object. Note that the noise is set to 10000 to -##' ## speed up the execution of the example - in a real use case the default -##' ## value should be used, or it should be set to a reasonable value. -##' cwp <- CentWaveParam(ppm = 20, noise = 10000) -##' ## Change snthresh parameter -##' snthresh(cwp) <- 25 -##' cwp -##' -##' ## Perform the feature detection using centWave on some of the files from the -##' ## faahKO package. Files are read using the readMSData2 from the MSnbase -##' ## package -##' library(faahKO) -##' library(MSnbase) -##' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -##' full.names = TRUE) -##' raw_data <- readMSData2(fls[1:2]) -##' -##' ## Perform the feature detection using the settings defined above. We're -##' ## returning the results as an xcmsSet object. -##' res <- detectFeatures(raw_data, param = cwp, return.type = "xcmsSet") -##' head(peaks(res)) +#' @description The \code{CentWaveParam} class allows to specify all settings +#' for a chromatographic peak detection using the centWave method. Instances +#' should be created with the \code{CentWaveParam} constructor. +#' +#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname findChromPeaks-centWave +#' +#' @examples +#' +#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to +#' ## speed up the execution of the example - in a real use case the default +#' ## value should be used, or it should be set to a reasonable value. +#' cwp <- CentWaveParam(ppm = 20, noise = 10000) +#' ## Change snthresh parameter +#' snthresh(cwp) <- 25 +#' cwp +#' +#' ## Perform the peak detection using centWave on some of the files from the +#' ## faahKO package. Files are read using the readMSData2 from the MSnbase +#' ## package +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the settings defined above. +#' res <- findChromPeaks(raw_data, param = cwp) +#' head(chromPeaks(res)) setClass("CentWaveParam", slots = c( ppm = "numeric", @@ -488,44 +560,44 @@ setClass("CentWaveParam", roiScales = numeric() ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@ppm) != 1 | any(object@ppm < 0)) - msg <- validMsg(msg, paste0("'ppm' has to be positive numeric", - " of length 1.")) + msg <- c(msg, paste0("'ppm' has to be positive numeric", + " of length 1.")) if (length(object@peakwidth) != 2 | any(object@peakwidth < 0)) - msg <- validMsg(msg, paste0("'peakwidth' has to be a numeric", - " of length 2 with only positive", - " values.")) + msg <- c(msg, paste0("'peakwidth' has to be a numeric", + " of length 2 with only positive", + " values.")) if (length(object@snthresh) != 1 | any(object@snthresh < 0)) - msg <- validMsg(msg, paste0("'snthresh' has to be a positive", - " numeric of length 1.")) + msg <- c(msg, paste0("'snthresh' has to be a positive", + " numeric of length 1.")) if (length(object@prefilter) != 2) - msg <- validMsg(msg, paste0("'prefilter' has to be a numeric", - " of length 2.")) + msg <- c(msg, paste0("'prefilter' has to be a numeric", + " of length 2.")) allowed_vals <- c("wMean", "mean", "apex", "wMeanApex3", "meanApex3") if (!(object@mzCenterFun) %in% allowed_vals) - msg <- validMsg(msg, paste0("'mzCenterFun' has to be one of ", - paste0("'", allowed_vals, "'", + msg <- c(msg, paste0("'mzCenterFun' has to be one of ", + paste0("'", allowed_vals, "'", collapse = ", "), ".")) if (!(object@integrate %in% c(1L, 2L))) - msg <- validMsg(msg, paste0("'integrate' has to be either 1", - " or 2.")) + msg <- c(msg, paste0("'integrate' has to be either 1", + " or 2.")) if (length(object@mzdiff) != 1) - msg <- validMsg(msg, paste0("'mzdiff' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'mzdiff' has to be a numeric of", + " length 1.")) if (length(object@noise) != 1) - msg <- validMsg(msg, paste0("'noise' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'noise' has to be a numeric of", + " length 1.")) if (length(object@fitgauss) != 1) - msg <- validMsg(msg, paste0("'fitgauss' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'fitgauss' has to be a numeric of", + " length 1.")) if (length(object@verboseColumns) != 1) - msg <- validMsg(msg, paste0("'verboseColumns' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'verboseColumns' has to be a ", + "numeric of length 1.")) if (length(object@firstBaselineCheck) != 1) - msg <- validMsg(msg, paste0("'firstBaselineCheck' has to be a", - " numeric of length 1.")) + msg <- c(msg, paste0("'firstBaselineCheck' has to be a", + " numeric of length 1.")) if (length(object@roiList) > 0) { doHaveExpectedEls <- function(z) { need <- c("scmax", "scmin", "mzmin", "mzmax", "length", @@ -539,128 +611,140 @@ setClass("CentWaveParam", } OKs <- unlist(lapply(object@roiList, doHaveExpectedEls)) if (any(!OKs)) - msg <- validMsg(msg, paste0("'roiList' does not provide ", - "all required fields!")) + msg <- c(msg, paste0("'roiList' does not provide ", + "all required fields!")) } - if (length(object@roiList) > 0 & - length(object@roiList) != length(object@roiScales)) - msg <- validMsg(msg, paste0("'roiScales' has to have the same", - " length than 'roiList'.")) - if (is.null(msg)) { - return(TRUE) - } else { - return(msg) + if (length(object@roiScales) > 0) { + if (length(object@roiList) != length(object@roiScales)) + msg <- c(msg, paste0("'roiScales' has to have the same", + " length than 'roiList'.")) } + if (length(msg)) + msg + else + TRUE }) ## Main matchedFilter documentation. -##' @title Peak detection in the chromatographic time domain -##' -##' @aliases matchedFilter -##' -##' @description The \emph{matchedFilter} algorithm identifies features in the -##' chromatographic time domain as described in [Smith 2006]. The intensity -##' values are binned by cutting The LC/MS data into slices (bins) of a mass unit -##' (\code{binSize} m/z) wide. Within each bin the maximal intensity is selected. -##' The feature detection is then performed in each bin by extending it based on -##' the \code{steps} parameter to generate slices comprising bins -##' \code{current_bin - steps +1} to \code{current_bin + steps - 1}. Each of -##' these slices is then filtered with matched filtration using a second-derative -##' Gaussian as the model feature/peak shape. After filtration features are -##' detected using a signal-to-ration cut-off. For more details and -##' illustrations see [Smith 2006]. -##' -##' @param binSize numeric(1) specifying the width of the -##' bins/slices in m/z dimension. -##' @param impute Character string specifying the method to be used for missing -##' value imputation. Allowed values are \code{"none"} (no linear interpolation), -##' \code{"lin"} (linear interpolation), \code{"linbase"} (linear interpolation -##' within a certain bin-neighborhood) and \code{"intlin"}. See -##' \code{\link{imputeLinInterpol}} for more details. -##' @param fwhm numeric(1) specifying the full width at half maximum -##' of matched filtration gaussian model peak. Only used to calculate the actual -##' sigma, see below. -##' @param sigma numeric(1) specifying the standard deviation (width) -##' of the matched filtration model peak. -##' @param max numeric(1) representing the maximum number of peaks -##' that are expected/will be identified per slice. -##' @param snthresh numeric(1) defining the signal to noise cutoff -##' to be used in the feature detection step. -##' @param steps numeric(1) defining the number of bins to be -##' merged before filtration (i.e. the number of neighboring bins that will be -##' joined to the slice in which filtration and peak detection will be -##' performed). -##' @param mzdiff numeric(1) defining the minimum difference -##' in m/z for peaks with overlapping retention times -##' @param index Logical specifying whether indicies should be returned instead -##' of values for m/z and retention times. -##' -##' @details The intensities are binned by the provided m/z values within each -##' spectrum (scan). Binning is performed such that the bins are centered around -##' the m/z values (i.e. the first bin includes all m/z values between -##' \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). -##' -##' For more details on binning and missing value imputation see -##' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. -##' -##' @note These methods and classes are part of the updated and modernized -##' \code{xcms} user interface which will eventually replace the -##' \code{\link{findPeaks}} methods. It supports feature detection on -##' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects (both defined in the \code{MSnbase} package). All of the settings -##' to the matchedFilter algorithm can be passed with a -##' \code{MatchedFilterParam} object. -##' -##' @inheritParams imputeLinInterpol -##' @inheritParams featureDetection-centWave -##' -##' @family feature detection methods -##' @seealso The \code{\link{do_detectFeatures_matchedFilter}} core API function -##' and \code{\link{findPeaks.matchedFilter}} for the old user interface. -##' -##' @references -##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and -##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite -##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" -##' \emph{Anal. Chem.} 2006, 78:779-787. -##' @author Colin A Smith, Johannes Rainer -##' -##' @name featureDetection-matchedFilter +#' @title Peak detection in the chromatographic time domain +#' +#' @aliases matchedFilter +#' +#' @description The \emph{matchedFilter} algorithm identifies peaks in the +#' chromatographic time domain as described in [Smith 2006]. The intensity +#' values are binned by cutting The LC/MS data into slices (bins) of a mass +#' unit (\code{binSize} m/z) wide. Within each bin the maximal intensity is +#' selected. The chromatographic peak detection is then performed in each +#' bin by extending it based on the \code{steps} parameter to generate +#' slices comprising bins \code{current_bin - steps +1} to +#' \code{current_bin + steps - 1}. Each of these slices is then filtered +#' with matched filtration using a second-derative Gaussian as the model +#' peak shape. After filtration peaks are detected using a signal-to-ratio +#' cut-off. For more details and illustrations see [Smith 2006]. +#' +#' @param binSize \code{numeric(1)} specifying the width of the +#' bins/slices in m/z dimension. +#' +#' @param impute Character string specifying the method to be used for missing +#' value imputation. Allowed values are \code{"none"} (no linear +#' interpolation), \code{"lin"} (linear interpolation), \code{"linbase"} +#' (linear interpolation within a certain bin-neighborhood) and +#' \code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details. +#' +#' @param fwhm \code{numeric(1)} specifying the full width at half maximum +#' of matched filtration gaussian model peak. Only used to calculate the +#' actual sigma, see below. +#' +#' @param sigma \code{numeric(1)} specifying the standard deviation (width) +#' of the matched filtration model peak. +#' +#' @param max \code{numeric(1)} representing the maximum number of peaks +#' that are expected/will be identified per slice. +#' +#' @param snthresh \code{numeric(1)} defining the signal to noise cutoff +#' to be used in the chromatographic peak detection step. +#' +#' @param steps \code{numeric(1)} defining the number of bins to be +#' merged before filtration (i.e. the number of neighboring bins that will +#' be joined to the slice in which filtration and peak detection will be +#' performed). +#' +#' @param mzdiff \code{numeric(1)} defining the minimum difference +#' in m/z for peaks with overlapping retention times +#' +#' @param index \code{logical(1)} specifying whether indicies should be +#' returned instead of values for m/z and retention times. +#' +#' @details The intensities are binned by the provided m/z values within each +#' spectrum (scan). Binning is performed such that the bins are centered +#' around the m/z values (i.e. the first bin includes all m/z values between +#' \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). +#' +#' For more details on binning and missing value imputation see +#' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{findPeaks}} methods. It supports chromatographic peak +#' detection on \code{\link[MSnbase]{MSnExp}} and +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the +#' \code{MSnbase} package). All of the settings to the matchedFilter +#' algorithm can be passed with a \code{MatchedFilterParam} object. +#' +#' @inheritParams imputeLinInterpol +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @family peak detection methods +#' +#' @seealso The \code{\link{do_findChromPeaks_matchedFilter}} core API function +#' and \code{\link{findPeaks.matchedFilter}} for the old user interface. +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +#' +#' @author Colin A Smith, Johannes Rainer +#' +#' @name findChromPeaks-matchedFilter NULL #> NULL -##' @description The \code{MatchedFilterParam} class allows to specify all -##' settings for a feature detection using the matchedFilter method. Instances -##' should be created with the \code{MatchedFilterParam} constructor. -##' -##' @slot .__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index See corresponding parameter above. \code{.__classVersion__} stores -##' the version from the class. Slots values should exclusively be accessed -##' \emph{via} the corresponding getter and setter methods listed above. -##' -##' @rdname featureDetection-matchedFilter -##' -##' @examples -##' -##' ## Create a MatchedFilterParam object -##' mfp <- MatchedFilterParam(binSize = 0.5) -##' ## Change snthresh parameter -##' snthresh(mfp) <- 15 -##' mfp -##' -##' ## Perform the feature detection using matchecFilter on the files from the -##' ## faahKO package. Files are read using the readMSData2 from the MSnbase -##' ## package -##' library(faahKO) -##' library(MSnbase) -##' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -##' full.names = TRUE) -##' raw_data <- readMSData2(fls) -##' ## Perform the feature detection using the settings defined above. We're -##' ## returning the results as an xcmsSet object. Note that we are also -##' ## disabling parallel processing in this example by registering a "SerialParam" -##' register(SerialParam()) -##' res <- detectFeatures(raw_data, param = mfp, return.type = "xcmsSet") -##' head(peaks(res)) +#' @description The \code{MatchedFilterParam} class allows to specify all +#' settings for a chromatographic peak detection using the matchedFilter +#' method. Instances should be created with the \code{MatchedFilterParam} +#' constructor. +#' +#' @slot .__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname findChromPeaks-matchedFilter +#' +#' @examples +#' +#' ## Create a MatchedFilterParam object +#' mfp <- MatchedFilterParam(binSize = 0.5) +#' ## Change snthresh parameter +#' snthresh(mfp) <- 15 +#' mfp +#' +#' ## Perform the peak detection using matchecFilter on the files from the +#' ## faahKO package. Files are read using the readMSData2 from the MSnbase +#' ## package +#' library(faahKO) +#' library(MSnbase) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls) +#' ## Perform the chromatographic peak detection using the settings defined +#' ## above. Note that we are also disabling parallel processing in this +#' ## example by registering a "SerialParam" +#' register(SerialParam()) +#' res <- findChromPeaks(raw_data, param = mfp) +#' head(chromPeaks(res)) setClass("MatchedFilterParam", slots = c( binSize = "numeric", @@ -690,179 +774,185 @@ setClass("MatchedFilterParam", index = FALSE ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@binSize) != 1 | any(object@binSize < 0)) - msg <- validMsg(msg, paste0("'binSize' has to be positive", - " numeric of length 1.")) + msg <- c(msg, paste0("'binSize' has to be positive", + " numeric of length 1.")) if (!any(c("none", "lin", "linbase") == object@impute)) - msg <- validMsg(msg, - paste0("Only values 'none', 'lin' and ", - "'linbase' are allowed for'impute'")) + msg <- c(msg, + paste0("Only values 'none', 'lin' and ", + "'linbase' are allowed for'impute'")) if (length(object@baseValue) > 1) - msg <- validMsg(msg, paste0("'baseValue' has to be a", - " numeric of length 1.")) + msg <- c(msg, paste0("'baseValue' has to be a", + " numeric of length 1.")) if (length(object@distance) > 1) - msg <- validMsg(msg, paste0("'distance' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'distance' has to be a numeric", + " of length 1.")) if (length(object@fwhm) != 1) - msg <- validMsg(msg, paste0("'fwhm' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'fwhm' has to be a numeric", + " of length 1.")) if (length(object@sigma) != 1) - msg <- validMsg(msg, paste0("'sigma' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'sigma' has to be a numeric", + " of length 1.")) if (length(object@max) != 1) - msg <- validMsg(msg, paste0("'max' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'max' has to be a numeric", + " of length 1.")) if (length(object@snthresh) != 1) - msg <- validMsg(msg, paste0("'snthresh' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'snthresh' has to be a numeric", + " of length 1.")) if (length(object@steps) != 1) - msg <- validMsg(msg, paste0("'steps' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'steps' has to be a numeric", + " of length 1.")) if (length(object@mzdiff) != 1) - msg <- validMsg(msg, paste0("'mzdiff' has to be a numeric", - " of length 1.")) + msg <- c(msg, paste0("'mzdiff' has to be a numeric", + " of length 1.")) if (length(object@index) != 1) - msg <- validMsg(msg, paste0("'index' has to be a logical", - " of length 1.")) - if (is.null(msg)) { - return(TRUE) - } else { - return(msg) - } + msg <- c(msg, paste0("'index' has to be a logical", + " of length 1.")) + if (length(msg)) + msg + else + TRUE }) ## Main massifquant documentation. -##' @title Feature detection using the massifquant method -##' -##' @aliases massifquant -##' -##' @description Massifquant is a Kalman filter (KF)-based feature -##' detection for XC-MS data in centroid mode. The identified features -##' can be further refined with the \emph{centWave} method (see -##' \code{\link{do_detectFeatures_centWave}} for details on centWave) -##' by specifying \code{withWave = TRUE}. -##' -##' @param peakwidth numeric(2). Only the first element is used by -##' massifquant, which specifices the minimum feature length in time scans. -##' For \code{withWave = TRUE} the second argument represents the maximum -##' feature length subject to being greater than the mininum feature length -##' (see also documentation of \code{\link{do_detectFeatures_centWave}}). -##' @param prefilter numeric(2). The first argument is only used -##' if (\code{withWave = TRUE}); see \code{\link{do_detectFeatures_centWave}} -##' for details. The second argument specifies the minimum threshold for the -##' maximum intensity of a feature that must be met. -##' @param criticalValue numeric(1). Suggested values: -##' (\code{0.1-3.0}). This setting helps determine the the Kalman Filter -##' prediciton margin of error. A real centroid belonging to a bonafide -##' feature must fall within the KF prediction margin of error. Much like -##' in the construction of a confidence interval, \code{criticalVal} loosely -##' translates to be a multiplier of the standard error of the prediction -##' reported by the Kalman Filter. If the features in the XC-MS sample have -##' a small mass deviance in ppm error, a smaller critical value might be -##' better and vice versa. -##' @param consecMissedLimit Integer: Suggested values: (\code{1,2,3}). While -##' a feature is in the proces of being detected by a Kalman Filter, the -##' Kalman Filter may not find a predicted centroid in every scan. After 1 -##' or more consecutive failed predictions, this setting informs Massifquant -##' when to stop a Kalman Filter from following a candidate feature. -##' @param unions Integer: set to \code{1} if apply t-test union on -##' segmentation; set to \code{0} if no t-test to be applied on -##' chromatographically continous features sharing same m/z range. -##' Explanation: With very few data points, sometimes a Kalman Filter stops -##' tracking a feature prematurely. Another Kalman Filter is instantiated -##' and begins following the rest of the signal. Because tracking is done -##' backwards to forwards, this algorithmic defect leaves a real feature -##' divided into two segments or more. With this option turned on, the -##' program identifies segmented features and combines them (merges them) -##' into one with a two sample t-test. The potential danger of this option -##' is that some truly distinct features may be merged. -##' @param checkBack Integer: set to \code{1} if turned on; set to \code{0} -##' if turned off. The convergence of a Kalman Filter to a feature's precise -##' m/z mapping is very fast, but sometimes it incorporates erroneous centroids -##' as part of a feature (especially early on). The \code{scanBack} option is an -##' attempt to remove the occasional outlier that lies beyond the converged -##' bounds of the Kalman Filter. The option does not directly affect -##' identification of a feature because it is a postprocessing measure; it -##' has not shown to be a extremely useful thus far and the default is set -##' to being turned off. -##' @param withWave Logical: if \code{TRUE}, the features identified first -##' with Massifquant are subsequently filtered with the second step of the -##' centWave algorithm, which includes wavelet estimation. -##' -##' @details This algorithm's performance has been tested rigorously -##' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -##' Simultaneous kalman filters identify features and calculate their -##' area under the curve. The default parameters are set to operate on -##' a complex LC-MS Orbitrap sample. Users will find it useful to do some -##' simple exploratory data analysis to find out where to set a minimum -##' intensity, and identify how many scans an average feature spans. The -##' \code{consecMissedLimit} parameter has yielded good performance on -##' Orbitrap data when set to (\code{2}) and on TOF data it was found best -##' to be at (\code{1}). This may change as the algorithm has yet to be -##' tested on many samples. The \code{criticalValue} parameter is perhaps -##' most dificult to dial in appropriately and visual inspection of peak -##' identification is the best suggested tool for quick optimization. -##' The \code{ppm} and \code{checkBack} parameters have shown less influence -##' than the other parameters and exist to give users flexibility and -##' better accuracy. -##' -##' @note These methods and classes are part of the updated and modernized -##' \code{xcms} user interface which will eventually replace the -##' \code{\link{findPeaks}} methods. It supports feature detection on -##' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects (both defined in the \code{MSnbase} package). All of the settings -##' to the massifquant and centWave algorithm can be passed with a -##' \code{MassifquantParam} object. -##' -##' @inheritParams featureDetection-centWave -##' -##' @family feature detection methods -##' @seealso The \code{\link{do_detectFeatures_massifquant}} core API function -##' and \code{\link{findPeaks.massifquant}} for the old user interface. -##' -##' @references -##' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT -##' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature -##' detection" \emph{Bioinformatics} 2014, 30(18):2636-43. -##' @author Christopher Conley, Johannes Rainer -##' -##' @name featureDetection-massifquant +#' @title Chromatographic peak detection using the massifquant method +#' +#' @aliases massifquant +#' +#' @description Massifquant is a Kalman filter (KF)-based chromatographic peak +#' detection for XC-MS data in centroid mode. The identified peaks +#' can be further refined with the \emph{centWave} method (see +#' \code{\link{findChromPeaks-centWave}} for details on centWave) +#' by specifying \code{withWave = TRUE}. +#' +#' @param peakwidth \code{numeric(2)}. Only the first element is used by +#' massifquant, which specifices the minimum peak length in time scans. +#' For \code{withWave = TRUE} the second argument represents the maximum +#' peak length subject to being greater than the mininum peak length +#' (see also documentation of \code{\link{do_findChromPeaks_centWave}}). +#' +#' @param prefilter \code{numeric(2)}. The first argument is only used +#' if (\code{withWave = TRUE}); see \code{\link{findChromPeaks-centWave}} +#' for details. The second argument specifies the minimum threshold for the +#' maximum intensity of a chromatographic peak that must be met. +#' +#' @param criticalValue \code{numeric(1)}. Suggested values: +#' (\code{0.1-3.0}). This setting helps determine the the Kalman Filter +#' prediciton margin of error. A real centroid belonging to a bonafide +#' peak must fall within the KF prediction margin of error. Much like +#' in the construction of a confidence interval, \code{criticalVal} loosely +#' translates to be a multiplier of the standard error of the prediction +#' reported by the Kalman Filter. If the peak in the XC-MS sample have +#' a small mass deviance in ppm error, a smaller critical value might be +#' better and vice versa. +#' +#' @param consecMissedLimit \code{integer(1)} Suggested values: (\code{1,2,3}). +#' While a peak is in the proces of being detected by a Kalman Filter, the +#' Kalman Filter may not find a predicted centroid in every scan. After 1 +#' or more consecutive failed predictions, this setting informs Massifquant +#' when to stop a Kalman Filter from following a candidate peak. +#' +#' @param unions \code{integer(1)} set to \code{1} if apply t-test union on +#' segmentation; set to \code{0} if no t-test to be applied on +#' chromatographically continous peaks sharing same m/z range. +#' Explanation: With very few data points, sometimes a Kalman Filter stops +#' tracking a peak prematurely. Another Kalman Filter is instantiated +#' and begins following the rest of the signal. Because tracking is done +#' backwards to forwards, this algorithmic defect leaves a real peak +#' divided into two segments or more. With this option turned on, the +#' program identifies segmented peaks and combines them (merges them) +#' into one with a two sample t-test. The potential danger of this option +#' is that some truly distinct peaks may be merged. +#' +#' @param checkBack \code{integer(1)} set to \code{1} if turned on; set to +#' \code{0} if turned off. The convergence of a Kalman Filter to a peak's +#' precise m/z mapping is very fast, but sometimes it incorporates erroneous +#' centroids as part of a peak (especially early on). The \code{scanBack} +#' option is an attempt to remove the occasional outlier that lies beyond +#' the converged bounds of the Kalman Filter. The option does not directly +#' affect identification of a peak because it is a postprocessing measure; +#' it has not shown to be a extremely useful thus far and the default is set +#' to being turned off. +#' +#' @param withWave \code{logical(1)} if \code{TRUE}, the peaks identified first +#' with Massifquant are subsequently filtered with the second step of the +#' centWave algorithm, which includes wavelet estimation. +#' +#' @details This algorithm's performance has been tested rigorously +#' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. +#' Simultaneous kalman filters identify chromatographic peaks and calculate +#' their area under the curve. The default parameters are set to operate on +#' a complex LC-MS Orbitrap sample. Users will find it useful to do some +#' simple exploratory data analysis to find out where to set a minimum +#' intensity, and identify how many scans an average peak spans. The +#' \code{consecMissedLimit} parameter has yielded good performance on +#' Orbitrap data when set to (\code{2}) and on TOF data it was found best +#' to be at (\code{1}). This may change as the algorithm has yet to be +#' tested on many samples. The \code{criticalValue} parameter is perhaps +#' most dificult to dial in appropriately and visual inspection of peak +#' identification is the best suggested tool for quick optimization. +#' The \code{ppm} and \code{checkBack} parameters have shown less influence +#' than the other parameters and exist to give users flexibility and +#' better accuracy. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{findPeaks}} methods. It supports chromatographic peak +#' detection on \code{\link[MSnbase]{MSnExp}} and +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the +#' \code{MSnbase} package). All of the settings to the massifquant and +#' centWave algorithm can be passed with a \code{MassifquantParam} object. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @family peak detection methods +#' +#' @seealso The \code{\link{do_findChromPeaks_massifquant}} core API function +#' and \code{\link{findPeaks.massifquant}} for the old user interface. +#' +#' @references +#' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT +#' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature +#' detection" \emph{Bioinformatics} 2014, 30(18):2636-43. +#' +#' @author Christopher Conley, Johannes Rainer +#' +#' @name findChromPeaks-massifquant NULL #> NULL -##' @description The \code{MassifquantParam} class allows to specify all -##' settings for a feature detection using the massifquant method eventually in -##' combination with the centWave algorithm. Instances should be created with -##' the \code{MassifquantParam} constructor. -##' -##' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave See corresponding parameter above. \code{.__classVersion__} stores -##' the version from the class. Slots values should exclusively be accessed -##' \emph{via} the corresponding getter and setter methods listed above. -##' -##' @rdname featureDetection-massifquant -##' -##' @examples -##' -##' ## Create a MassifquantParam object. -##' mqp <- MassifquantParam() -##' ## Change snthresh parameter -##' snthresh(mqp) <- 30 -##' mqp -##' -##' ## Perform the feature detection using massifquant on the files from the -##' ## faahKO package. Files are read using the readMSData2 from the MSnbase -##' ## package -##' library(faahKO) -##' library(MSnbase) -##' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, -##' full.names = TRUE) -##' raw_data <- readMSData2(fls[1:2]) -##' ## Perform the feature detection using the settings defined above. We're -##' ## returning the results as an xcmsSet object. -##' res <- detectFeatures(raw_data, param = mqp, return.type = "xcmsSet") -##' head(peaks(res)) +#' @description The \code{MassifquantParam} class allows to specify all +#' settings for a chromatographic peak detection using the massifquant +#' method eventually in combination with the centWave algorithm. Instances +#' should be created with the \code{MassifquantParam} constructor. +#' +#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname findChromPeaks-massifquant +#' +#' @examples +#' +#' ## Create a MassifquantParam object. +#' mqp <- MassifquantParam() +#' ## Change snthresh parameter +#' snthresh(mqp) <- 30 +#' mqp +#' +#' ## Perform the peak detection using massifquant on the files from the +#' ## faahKO package. Files are read using the readMSData2 from the MSnbase +#' ## package +#' library(faahKO) +#' library(MSnbase) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' ## Perform the peak detection using the settings defined above. +#' res <- findChromPeaks(raw_data, param = mqp) +#' head(chromPeaks(res)) setClass("MassifquantParam", slots = c( ppm = "numeric", @@ -900,120 +990,130 @@ setClass("MassifquantParam", withWave = FALSE ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@ppm) != 1 | any(object@ppm < 0)) - msg <- validMsg(msg, paste0("'ppm' has to be positive numeric", - " of length 1.")) + msg <- c(msg, paste0("'ppm' has to be positive numeric", + " of length 1.")) if (length(object@peakwidth) != 2 | any(object@peakwidth < 0)) - msg <- validMsg(msg, paste0("'peakwidth' has to be a numeric", - " of length 2 with only positive", - " values.")) + msg <- c(msg, paste0("'peakwidth' has to be a numeric", + " of length 2 with only positive", + " values.")) if (length(object@snthresh) != 1 | any(object@snthresh < 0)) - msg <- validMsg(msg, paste0("'snthresh' has to be a positive", - " numeric of length 1.")) + msg <- c(msg, paste0("'snthresh' has to be a positive", + " numeric of length 1.")) if (length(object@prefilter) != 2) - msg <- validMsg(msg, paste0("'prefilter' has to be a numeric", - " of length 2.")) + msg <- c(msg, paste0("'prefilter' has to be a numeric", + " of length 2.")) allowed_vals <- c("wMean", "mean", "apex", "wMeanApex3", "meanApex3") if (!(object@mzCenterFun) %in% allowed_vals) - msg <- validMsg(msg, paste0("'mzCenterFun' has to be one of ", - paste0("'", allowed_vals, "'", + msg <- c(msg, paste0("'mzCenterFun' has to be one of ", + paste0("'", allowed_vals, "'", collapse = ", "), ".")) if (!(object@integrate %in% c(1L, 2L))) - msg <- validMsg(msg, paste0("'integrate' has to be either 1", - " or 2.")) + msg <- c(msg, paste0("'integrate' has to be either 1", + " or 2.")) if (length(object@mzdiff) != 1) - msg <- validMsg(msg, paste0("'mzdiff' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'mzdiff' has to be a numeric of", + " length 1.")) if (length(object@noise) != 1) - msg <- validMsg(msg, paste0("'noise' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'noise' has to be a numeric of", + " length 1.")) if (length(object@fitgauss) != 1) - msg <- validMsg(msg, paste0("'fitgauss' has to be a numeric of", - " length 1.")) + msg <- c(msg, paste0("'fitgauss' has to be a numeric of", + " length 1.")) if (length(object@verboseColumns) != 1) - msg <- validMsg(msg, paste0("'verboseColumns' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'verboseColumns' has to be a ", + "numeric of length 1.")) if (length(object@criticalValue) != 1) - msg <- validMsg(msg, paste0("'criticalValue' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'criticalValue' has to be a ", + "numeric of length 1.")) if (length(object@consecMissedLimit) != 1) - msg <- validMsg(msg, paste0("'consecMissedLimit' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'consecMissedLimit' has to be a ", + "numeric of length 1.")) if (length(object@unions) != 1) - msg <- validMsg(msg, paste0("'unions' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'unions' has to be a ", + "numeric of length 1.")) if (object@unions != 0 & object@unions != 1) - msg <- validMsg(msg, paste0("'unions' has to be either 0 or 1!")) + msg <- c(msg, paste0("'unions' has to be either 0 or 1!")) if (length(object@checkBack) != 1) - msg <- validMsg(msg, paste0("'checkBack' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'checkBack' has to be a ", + "numeric of length 1.")) if (object@checkBack != 0 & object@checkBack != 1) - msg <- validMsg(msg, paste0("'checkBack' has to be either 0", - " or 1!")) + msg <- c(msg, paste0("'checkBack' has to be either 0", + " or 1!")) if (length(object@withWave) != 1) - msg <- validMsg(msg, paste0("'withWave' has to be a ", - "numeric of length 1.")) - if (is.null(msg)) { - return(TRUE) - } else { - return(msg) - } + msg <- c(msg, paste0("'withWave' has to be a ", + "numeric of length 1.")) + if (length(msg)) + msg + else TRUE }) ## Main MSW documentation. -##' @title Single-spectrum non-chromatography MS data feature detection -##' -##' @aliases MSW -##' -##' @description Perform feature detection in mass spectrometry -##' direct injection spectrum using a wavelet based algorithm. -##' -##' @details This is a wrapper for the peak picker in Bioconductor's -##' \code{MassSpecWavelet} package calling -##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and -##' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the -##' \emph{xcmsDirect} vignette for more information. -##' -##' @note These methods and classes are part of the updated and modernized -##' \code{xcms} user interface which will eventually replace the -##' \code{\link{findPeaks}} methods. It supports feature detection on -##' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects (both defined in the \code{MSnbase} package). All of the settings -##' to the massifquant and centWave algorithm can be passed with a -##' \code{MassifquantParam} object. -##' -##' @inheritParams featureDetection-centWave -##' -##' @family feature detection methods -##' @seealso The \code{\link{do_detectFeatures_MSW}} core API function -##' and \code{\link{findPeaks.MSW}} for the old user interface. -##' -##' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer -##' -##' @name featureDetection-MSW +#' @title Single-spectrum non-chromatography MS data peak detection +#' +#' @aliases MSW +#' +#' @description Perform peak detection in mass spectrometry +#' direct injection spectrum using a wavelet based algorithm. +#' +#' @details This is a wrapper for the peak picker in Bioconductor's +#' \code{MassSpecWavelet} package calling +#' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and +#' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the +#' \emph{xcmsDirect} vignette for more information. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{findPeaks}} methods. It supports peak detection on +#' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} +#' objects (both defined in the \code{MSnbase} package). All of the settings +#' to the algorithm can be passed with a \code{MSWParam} object. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @family peak detection methods +#' +#' @seealso The \code{\link{do_findPeaks_MSW}} core API function +#' and \code{\link{findPeaks.MSW}} for the old user interface. +#' +#' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer +#' +#' @name findPeaks-MSW NULL #> NULL -##' @description The \code{MSWParam} class allows to specify all -##' settings for a feature detection using the MSW method. Instances should be -##' created with the \code{MSWParam} constructor. -##' -##' @slot .__classVersion__,snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams See corresponding parameter above. \code{.__classVersion__} stores the version from the class. Slots values -##' should exclusively be accessed \emph{via} the corresponding getter and -##' setter methods listed above. -##' -##' @rdname featureDetection-MSW -##' -##' @examples -##' -##' ## Create a MassifquantParam object -##' mp <- MSWParam() -##' ## Change snthresh parameter -##' snthresh(mp) <- 15 -##' mp -##' +#' @description The \code{MSWParam} class allows to specify all +#' settings for a peak detection using the MSW method. Instances should be +#' created with the \code{MSWParam} constructor. +#' +#' @slot .__classVersion__,snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams See corresponding parameter above. \code{.__classVersion__} stores the version from the class. Slots values +#' should exclusively be accessed \emph{via} the corresponding getter and +#' setter methods listed above. +#' +#' @rdname findPeaks-MSW +#' +#' @examples +#' +#' ## Create a MSWParam object +#' mp <- MSWParam() +#' ## Change snthresh parameter +#' snthresh(mp) <- 15 +#' mp +#' +#' ## Loading a small subset of direct injection, single spectrum files +#' library(msdata) +#' fticrf <- list.files(system.file("fticr", package = "msdata"), +#' recursive = TRUE, full.names = TRUE) +#' fticr <- readMSData2(fticrf[1:2], msLevel. = 1) +#' +#' ## Perform the MSW peak detection on these: +#' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, +#' SNR.method = "data.mean", winSize.noise = 500) +#' fticr <- findChromPeaks(fticr, param = p) +#' +#' head(chromPeaks(fticr)) setClass("MSWParam", slots = c( snthresh = "numeric", @@ -1044,116 +1144,118 @@ setClass("MSWParam", addParams = list() ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@snthresh) != 1 | any(object@snthresh < 0)) - msg <- validMsg(msg, paste0("'snthresh' has to be a positive", - " numeric of length 1.")) + msg <- c(msg, paste0("'snthresh' has to be a positive", + " numeric of length 1.")) if (length(object@verboseColumns) != 1) - msg <- validMsg(msg, paste0("'verboseColumns' has to be a ", - "numeric of length 1.")) + msg <- c(msg, paste0("'verboseColumns' has to be a ", + "numeric of length 1.")) if (length(object@nearbyPeak) != 1) - msg <- validMsg(msg, paste0("'nearbyPeak' has to be a ", - "logical of length 1.")) + msg <- c(msg, paste0("'nearbyPeak' has to be a ", + "logical of length 1.")) if (length(object@peakScaleRange) != 1 | any(object@peakScaleRange < 0)) - msg <- validMsg(msg, paste0("'peakScaleRange' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'peakScaleRange' has to be a ", + "positive numeric of length 1.")) if (length(object@ampTh) != 1 | any(object@ampTh < 0)) - msg <- validMsg(msg, paste0("'ampTh' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'ampTh' has to be a ", + "positive numeric of length 1.")) if (length(object@minNoiseLevel) != 1 | any(object@minNoiseLevel < 0)) - msg <- validMsg(msg, paste0("'minNoiseLevel' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'minNoiseLevel' has to be a ", + "positive numeric of length 1.")) if (length(object@ridgeLength) != 1 | any(object@ridgeLength < 0)) - msg <- validMsg(msg, paste0("'ridgeLength' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'ridgeLength' has to be a ", + "positive numeric of length 1.")) if (length(object@peakThr) > 1) - msg <- validMsg(msg, paste0("'peakThr' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'peakThr' has to be a ", + "positive numeric of length 1.")) if (length(object@tuneIn) != 1) - msg <- validMsg(msg, paste0("'tuneIn' has to be a ", - "logical of length 1.")) - if (is.null(msg)) { - return(TRUE) - } else { - return(msg) - } + msg <- c(msg, paste0("'tuneIn' has to be a ", + "logical of length 1.")) + if (length(msg)) + msg + else TRUE }) -## Main centWave documentation. -##' @title Two-step centWave feature detection considering also feature isotopes -##' -##' @aliases centWaveWithPredIsoROIs -##' -##' @description This method performs a two-step centWave-based feature -##' detection: in a first centWave run features are identified for which then -##' the location of their potential isotopes in the mz-retention time is -##' predicted. A second centWave run is then performed on these -##' \emph{regions of interest} (ROIs). The final list of features comprises all -##' non-overlapping features from both centWave runs. -##' -##' @inheritParams featureDetection-centWave -##' -##' @param maxCharge integer(1) defining the maximal isotope charge. Isotopes -##' will be defined for charges \code{1:maxCharge}. -##' -##' @param maxIso integer(1) defining the number of isotope peaks that should be -##' predicted for each feature identified in the first centWave run. -##' -##' @param mzIntervalExtension logical(1) whether the mz range for the predicted -##' isotope ROIs should be extended to increase detection of low intensity peaks. -##' -##' @param snthreshIsoROIs numeric(1) defining the signal to noise ratio cutoff -##' to be used in the second centWave run to identify features for predicted -##' isotope ROIs. -##' -##' @param polarity character(1) specifying the polarity of the data. Currently -##' not used, but has to be \code{"positive"}, \code{"negative"} or -##' \code{"unknown"} if provided. -##' -##' @details See \code{\link{centWave}} for details on the centWave method. -##' -##' @note These methods and classes are part of the updated and modernized -##' \code{xcms} user interface which will eventually replace the -##' \code{\link{findPeaks}} methods. It supports feature detection on -##' \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects (both defined in the \code{MSnbase} package). All of the settings -##' to the centWave algorithm can be passed with a \code{CentWaveParam} object. -##' -##' @family feature detection methods -##' @seealso The \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}} core -##' API function and \code{\link{findPeaks.centWave}} for the old user interface. -##' \code{\link{CentWaveParam}} for the class the \code{CentWavePredIsoParam} -##' extends. -##' -##' @name featureDetection-centWaveWithPredIsoROIs -##' @author Hendrik Treutler, Johannes Rainer +#' @title Two-step centWave peak detection considering also isotopes +#' +#' @aliases centWaveWithPredIsoROIs +#' +#' @description This method performs a two-step centWave-based chromatographic +#' peak detection: in a first centWave run peaks are identified for which +#' then the location of their potential isotopes in the mz-retention time is +#' predicted. A second centWave run is then performed on these +#' \emph{regions of interest} (ROIs). The final list of chromatographic +#' peaks comprises all non-overlapping peaks from both centWave runs. +#' +#' @inheritParams findChromPeaks-centWave +#' +#' @param maxCharge \code{integer(1)} defining the maximal isotope charge. +#' Isotopes will be defined for charges \code{1:maxCharge}. +#' +#' @param maxIso \code{integer(1)} defining the number of isotope peaks that +#' should be predicted for each peak identified in the first centWave run. +#' +#' @param mzIntervalExtension \code{logical(1)} whether the mz range for the +#' predicted isotope ROIs should be extended to increase detection of low +#' intensity peaks. +#' +#' @param snthreshIsoROIs \code{numeric(1)} defining the signal to noise ratio +#' cutoff to be used in the second centWave run to identify peaks for +#' predicted isotope ROIs. +#' +#' @param polarity \code{character(1)} specifying the polarity of the data. +#' Currently not used, but has to be \code{"positive"}, \code{"negative"} or +#' \code{"unknown"} if provided. +#' +#' @details See \code{\link{centWave}} for details on the centWave method. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{findPeaks}} methods. It supports chromatographic peak +#' detection on \code{\link[MSnbase]{MSnExp}} and +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the +#' \code{MSnbase} package). All of the settings to the algorithm can be +#' passed with a \code{CentWavePredIsoParam} object. +#' +#' @family peak detection methods +#' +#' @seealso The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core +#' API function and \code{\link{findPeaks.centWave}} for the old user +#' interface. \code{\link{CentWaveParam}} for the class the +#' \code{CentWavePredIsoParam} extends. +#' +#' @name findChromPeaks-centWaveWithPredIsoROIs +#' +#' @author Hendrik Treutler, Johannes Rainer NULL #> NULL -##' @description The \code{CentWavePredIsoParam} class allows to specify all -##' settings for the two-step centWave-based feature detection considering also -##' predicted isotopes of features identified in the first centWave run. -##' Instances should be created with the \code{CentWavePredIsoParam} constructor. -##' See also the documentation of the \code{\link{CentWaveParam}} for all methods -##' and arguments this class inherits. -##' -##' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity See corresponding parameter above. \code{.__classVersion__} stores -##' the version from the class. Slots values should exclusively be accessed -##' \emph{via} the corresponding getter and setter methods listed above. -##' -##' @rdname featureDetection-centWaveWithPredIsoROIs -##' -##' @examples -##' -##' ## Create a CentWaveParam object -##' p <- CentWavePredIsoParam(maxCharge = 4) -##' ## Change snthresh parameter -##' snthresh(p) <- 25 -##' p -##' +#' @description The \code{CentWavePredIsoParam} class allows to specify all +#' settings for the two-step centWave-based peak detection considering also +#' predicted isotopes of peaks identified in the first centWave run. +#' Instances should be created with the \code{CentWavePredIsoParam} +#' constructor. See also the documentation of the +#' \code{\link{CentWaveParam}} for all methods and arguments this class +#' inherits. +#' +#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs +#' +#' @examples +#' +#' ## Create a param object +#' p <- CentWavePredIsoParam(maxCharge = 4) +#' ## Change snthresh parameter +#' snthresh(p) <- 25 +#' p +#' setClass("CentWavePredIsoParam", slots = c( snthreshIsoROIs = "numeric", @@ -1171,186 +1273,1064 @@ setClass("CentWavePredIsoParam", polarity = "unknown" ), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@snthreshIsoROIs) != 1 | any(object@snthreshIsoROIs < 0)) - msg <- validMsg(msg, paste0("'snthreshIsoROIs' has to be a ", - "positive numeric of length 1.")) + msg <- c(msg, paste0("'snthreshIsoROIs' has to be a ", + "positive numeric of length 1.")) if (length(object@maxCharge) != 1 | any(object@maxCharge < 0)) - msg <- validMsg(msg, paste0("'maxCharge' has to be a ", - "positive integer of length 1.")) + msg <- c(msg, paste0("'maxCharge' has to be a ", + "positive integer of length 1.")) if (length(object@maxIso) != 1 | any(object@maxIso < 0)) - msg <- validMsg(msg, paste0("'maxIso' has to be a ", - "positive integer of length 1.")) + msg <- c(msg, paste0("'maxIso' has to be a ", + "positive integer of length 1.")) if (length(object@mzIntervalExtension) != 1) - msg <- validMsg(msg, paste0("'mzIntervalExtension' has to be a", - " logical of length 1.")) + msg <- c(msg, paste0("'mzIntervalExtension' has to be a", + " logical of length 1.")) if (length(object@polarity) != 1) - msg <- validMsg(msg, paste0("'polarity' has to be a", - " character of length 1.")) + msg <- c(msg, paste0("'polarity' has to be a", + " character of length 1.")) if (!(object@polarity %in% c("positive", "negative", "unknown"))) - msg <- validMsg(msg, paste0("'polarity' has to be either ", - "'positive', 'negative' or ", - "'unknown'!")) - if (is.null(msg)) + msg <- c(msg, paste0("'polarity' has to be either ", + "'positive', 'negative' or ", + "'unknown'!")) + if (length(msg)) + msg + else TRUE + }) + + +## General groupChromPeaks method. +#' @title Correspondence: Chromatographic peak grouping methods. +#' +#' @description The \code{groupChromPeaks} method(s) perform the correspondence, +#' i.e. the grouping of chromatographic peaks within and between samples. +#' These methods are part of the modernized \code{xcms} user interface. +#' The resulting peak groups are referred to as (mz-rt) features and can be +#' accessed \emph{via} the \code{\link{featureDefinitions}} method on the +#' result object. +#' +#' The implemented peak grouping methods are: +#' \describe{ +#' +#' \item{density}{peak grouping based on time dimension peak densities. +#' See \code{\link{groupChromPeaks-density}} for more details.} +#' +#' \item{mzClust}{high resolution peak grouping for single spectra (direct +#' infusion) MS data. See \code{\link{groupChromPeaks-mzClust}} for more +#' details.} +#' +#' \item{nearest}{chromatographic peak grouping based on their proximity in +#' the mz-rt space. See \code{\link{groupChromPeaks-nearest}} for more +#' details.} +#' +#' } +#' @name groupChromPeaks +#' +#' @family peak grouping methods +#' +#' @seealso \code{\link{group}} for the \emph{old} peak grouping methods. +#' \code{\link{featureDefinitions}} and +#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak +#' grouping results. +#' +#' @author Johannes Rainer +NULL +#> NULL + +#' @title Peak grouping based on time dimension peak densities +#' +#' @description This method performs performs correspondence (chromatographic +#' peak grouping) based on the density (distribution) of identified peaks +#' along the retention time axis within slices of overlapping mz ranges. +#' All peaks (from the same or from different samples) being close on the +#' retention time axis are grouped into a feature (\emph{peak group}). +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{group}} methods. All of the settings to the algorithm +#' can be passed with a \code{PeakDensityParam} object. +#' +#' @param sampleGroups A vector of the same length than samples defining the +#' sample group assignments (i.e. which samples belong to which sample +#' group). +#' +#' @param bw \code{numeric(1)} defining the bandwidth (standard deviation ot the +#' smoothing kernel) to be used. This argument is passed to the +#' \code{\link{density}} method. +#' +#' @param minFraction \code{numeric(1)} defining the minimum fraction of samples +#' in at least one sample group in which the peaks have to be present to be +#' considered as a peak group (feature). +#' +#' @param minSamples \code{numeric(1)} with the minimum number of samples in at +#' least one sample group in which the peaks have to be detected to be +#' considered a peak group (feature). +#' +#' @param binSize \code{numeric(1)} defining the size of the overlapping slices +#' in mz dimension. +#' +#' @param maxFeatures \code{numeric(1)} with the maximum number of peak groups +#' to be identified in a single mz slice. +#' +#' @family peak grouping methods +#' +#' @seealso The \code{\link{do_groupChromPeaks_density}} core +#' API function and \code{\link{group.density}} for the old user interface. +#' +#' @seealso \code{\link{plotChromPeakDensity}} to plot peak densities and +#' evaluate different algorithm settings. +#' \code{\link{featureDefinitions}} and +#' \code{\link{featureValues,XCMSnExp-method}} for methods to access the +#' features (i.e. the peak grouping results). +#' +#' @name groupChromPeaks-density +#' +#' @author Colin Smith, Johannes Rainer +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +NULL +#> NULL + +#' @description The \code{PeakDensityParam} class allows to specify all +#' settings for the peak grouping based on peak densities along the time +#' dimension. Instances should be created with the \code{PeakDensityParam} +#' constructor. +#' +#' @slot .__classVersion__,sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname groupChromPeaks-density +#' +#' @examples +#' +#' ## Create a PeakDensityParam object +#' p <- PeakDensityParam(binSize = 0.05) +#' ## Change hte minSamples slot +#' minSamples(p) <- 3 +#' p +#' +#' ############################## +#' ## Chromatographic peak detection and grouping. +#' ## +#' ## Below we perform first a peak detection (using the matchedFilter +#' ## method) on some of the test files from the faahKO package followed by +#' ## a peak grouping using the density method. +#' library(faahKO) +#' library(MSnbase) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the chromatographic peak detection using the matchedFilter method. +#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' head(chromPeaks(res)) +#' ## The number of peaks identified per sample: +#' table(chromPeaks(res)[, "sample"]) +#' +#' ## Performing the chromatographic peak grouping +#' fdp <- PeakDensityParam() +#' res <- groupChromPeaks(res, fdp) +#' +#' ## The definition of the features (peak groups): +#' featureDefinitions(res) +#' +#' ## Using the featureValues method to extract a matrix with the intensities of +#' ## the features per sample. +#' head(featureValues(res, value = "into")) +#' +#' ## The process history: +#' processHistory(res) +setClass("PeakDensityParam", + slots = c(sampleGroups = "ANY", + bw = "numeric", + minFraction = "numeric", + minSamples = "numeric", + binSize = "numeric", + maxFeatures = "numeric"), + contains = "Param", + prototype = prototype( + sampleGroups = numeric(), + bw = 30, + minFraction = 0.5, + minSamples = 1, + binSize = 0.25, + maxFeatures = 50), + validity = function(object) { + msg <- character() + if (length(object@bw) > 1 | any(object@bw < 0)) + msg <- c(msg, paste0("'bw' has to be a ", + "positive numeric of length 1!")) + if (length(object@minFraction) > 1 | any(object@minFraction < 0) | + any(object@minFraction > 1)) + msg <- c(msg, paste0("'minFraction' has to be a ", + "single positive number between ", + "0 and 1!")) + if (length(object@minSamples) > 1 | any(object@minSamples < 0)) + msg <- c(msg, paste0("'minSamples' has to be a ", + "positive numeric of length 1!")) + if (length(object@binSize) > 1 | any(object@binSize < 0)) + msg <- c(msg, paste0("'binSize' has to be a ", + "positive numeric of length 1!")) + if (length(object@maxFeatures) > 1 | any(object@maxFeatures < 0)) + msg <- c(msg, paste0("'maxFeatures' has to be a ", + "positive numeric of length 1!")) + if (length(msg)) + return(msg) + else return(TRUE) + }) + +## Main group.mzClust documentation. +#' @title High resolution peak grouping for single spectra samples +#' +#' @description This method performs high resolution correspondence for single +#' spectra samples. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{group}} methods. All of the settings to the algorithm +#' can be passed with a \code{MzClustParam} object. +#' +#' @inheritParams groupChromPeaks-density +#' +#' @param ppm \code{numeric(1)} representing the relative mz error for the +#' clustering/grouping (in parts per million). +#' +#' @param absMz \code{numeric(1)} representing the absolute mz error for the +#' clustering. +#' +#' @family peak grouping methods +#' +#' @seealso The \code{\link{do_groupPeaks_mzClust}} core API function and +#' \code{\link{group.mzClust}} for the old user interface. +#' \code{\link{featureDefinitions}} and +#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak +#' grouping results (i.e. the features). +#' +#' @name groupChromPeaks-mzClust +#' +#' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill +#' and David F. Grant\cr \emph{Alignment of high resolution mass spectra: +#' development of a heuristic approach for metabolomics}.\cr Metabolomics, +#' Vol. 2, No. 2, 75-83 (2006) +NULL +#> NULL + +#' @description The \code{MzClustParam} class allows to specify all +#' settings for the peak grouping based on the \emph{mzClust} algorithm. +#' Instances should be created with the \code{MzClustParam} constructor. +#' +#' @slot .__classVersion__,sampleGroups,ppm,absMz,minFraction,minSamples See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname groupChromPeaks-mzClust +#' +#' @examples +#' +#' ## Loading a small subset of direct injection, single spectrum files +#' library(msdata) +#' fticrf <- list.files(system.file("fticr", package = "msdata"), +#' recursive = TRUE, full.names = TRUE) +#' fticr <- readMSData2(fticrf[1:2], msLevel. = 1) +#' +#' ## Perform the MSW peak detection on these: +#' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, +#' SNR.method = "data.mean", winSize.noise = 500) +#' fticr <- findChromPeaks(fticr, param = p) +#' +#' head(chromPeaks(fticr)) +#' +#' ## Now create the MzClustParam parameter object: we're assuming here that +#' ## both samples are from the same sample group. +#' p <- MzClustParam(sampleGroups = c(1, 1)) +#' +#' fticr <- groupChromPeaks(fticr, param = p) +#' +#' ## Get the definition of the features. +#' featureDefinitions(fticr) +setClass("MzClustParam", + slots = c(sampleGroups = "ANY", + ppm = "numeric", + absMz = "numeric", + minFraction = "numeric", + minSamples = "numeric"), + contains = "Param", + prototype = prototype( + sampleGroups = numeric(), + ppm = 20, + absMz = 0, + minFraction = 0.5, + minSamples = 1), + validity = function(object) { + msg <- character() + if (length(object@ppm) > 1 | any(object@ppm < 0)) + msg <- c(msg, paste0("'ppm' has to be a ", + "positive numeric of length 1!")) + if (length(object@absMz) > 1 | any(object@absMz < 0)) + msg <- c(msg, paste0("'absMz' has to be a ", + "positive numeric of length 1!")) + if (length(object@minFraction) > 1 | any(object@minFraction < 0) | + any(object@minFraction > 1)) + msg <- c(msg, paste0("'minFraction' has to be a ", + "single positive number between ", + "0 and 1!")) + if (length(object@minSamples) > 1 | any(object@minSamples < 0)) + msg <- c(msg, paste0("'minSamples' has to be a ", + "positive numeric of length 1!")) + if (length(msg)) + msg else - return(msg) + TRUE + }) + +## Main group.nearest documentation. +#' @title Peak grouping based on proximity in the mz-rt space +#' +#' @description This method is inspired by the grouping algorithm of mzMine +#' [Katajamaa 2006] and performs correspondence based on proximity of peaks +#' in the space spanned by retention time and mz values. +#' The method creates first a \emph{master peak list} consisting of all +#' chromatographic peaks from the sample in which most peaks were +#' identified, and starting from that, calculates distances to peaks from +#' the sample with the next most number of peaks. If peaks are closer than +#' the defined threshold they are grouped together. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{group}} methods. All of the settings to the algorithm +#' can be passed with a \code{NearestPeaksParam} object. +#' +#' @inheritParams groupChromPeaks-density +#' +#' @param mzVsRtBalance \code{numeric(1)} representing the factor by which mz +#' values are multiplied before calculating the (euclician) distance between +#' two peaks. +#' +#' @param absMz \code{numeric(1)} maximum tolerated distance for mz values. +#' +#' @param absRt \code{numeric(1)} maximum tolerated distance for rt values. +#' +#' @param kNN \code{numeric(1)} representing the number of nearest neighbors +#' to check. +#' +#' @family peak grouping methods +#' +#' @seealso The \code{\link{do_groupChromPeaks_nearest}} core +#' API function and \code{\link{group.nearest}} for the old user interface. +#' \code{\link{featureDefinitions}} and +#' \code{\link{featureValues,XCMSnExp-method}} for methods to access +#' peak grouping results (i.e. the features). +#' +#' @name groupChromPeaks-nearest +#' +#' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for +#' processing and visualization of mass spectrometry based molecular profile +#' data. \emph{Bioinformatics} 2006, 22:634-636. +NULL +#> NULL + +#' @description The \code{NearestPeaksParam} class allows to specify all +#' settings for the peak grouping based on the \emph{nearest} algorithm. +#' Instances should be created with the \code{NearestPeaksParam} constructor. +#' +#' @slot .__classVersion__,sampleGroups,mzVsRtBalance,absMz,absRt,kNN See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname groupChromPeaks-nearest +#' +#' @examples +#' +#' ## Create a NearestPeaksParam object +#' p <- NearestPeaksParam(kNN = 3) +#' p +#' +#' ############################## +#' ## Chromatographi peak detection and grouping. +#' ## +#' ## Below we perform first a chromatographic peak detection (using the +#' ## matchedFilter method) on some of the test files from the faahKO package +#' ## followed by a peaks grouping using the "nearest" method. +#' library(faahKO) +#' library(MSnbase) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the matchedFilter method. +#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' head(chromPeaks(res)) +#' ## The number of peaks identified per sample: +#' table(chromPeaks(res)[, "sample"]) +#' +#' ## Performing the peak grouping +#' p <- NearestPeaksParam() +#' res <- groupChromPeaks(res, param = p) +#' +#' ## The results from the peak grouping: +#' featureDefinitions(res) +#' +#' ## Using the featureValues method to extract a matrix with the intensities of +#' ## the features per sample. +#' head(featureValues(res, value = "into")) +#' +#' ## The process history: +#' processHistory(res) +setClass("NearestPeaksParam", + slots = c(sampleGroups = "ANY", + mzVsRtBalance = "numeric", + absMz = "numeric", + absRt = "numeric", + kNN = "numeric"), + contains = "Param", + prototype = prototype( + sampleGroups = numeric(), + mzVsRtBalance = 10, + absMz = 0.2, + absRt = 15, + kNN = 10), + validity = function(object) { + msg <- character() + if (length(object@mzVsRtBalance) > 1 | + any(object@mzVsRtBalance < 0)) + msg <- c(msg, paste0("'mzVsRtBalance' has to be a ", + "positive numeric of length 1!")) + if (length(object@absMz) > 1 | any(object@absMz < 0)) + msg <- c(msg, paste0("'absMz' has to be a ", + "positive numeric of length 1!")) + if (length(object@absRt) > 1 | any(object@absRt < 0)) + msg <- c(msg, paste0("'absRt' has to be a ", + "positive numeric of length 1!")) + if (length(object@kNN) > 1 | any(object@kNN < 0)) + msg <- c(msg, paste0("'kNN' has to be a ", + "positive numeric of length 1!")) + if (length(msg)) + msg + else TRUE }) -#### -## DataFrame or matrix? -## row subsetting: 400:800, : matrix very fast, data.frame, DataFrame. -## column subsetting: , 2: data.frame fast, matrix, DataFrame -## splitting: matrix fastest (but return type is not a matrix). -##' @aliases MsFeatureData -##' @title Data container storing xcms preprocessing results -##' -##' @description The \code{MsFeatureData} class is designed to encapsule all -##' data related to the preprocessing of metabolomics data using the \code{xcms} -##' package, i.e. it contains a \code{matrix} with the features identified by the -##' feature detection, a \code{DataFrame} with the information on aligned -##' features across samples and a \code{list} with the adjusted retention times -##' per sample. -##' -##' @rdname XCMSnExp-class + +#' @title Alignment: Retention time correction methods. +#' +#' @description The \code{adjustRtime} method(s) perform retention time +#' correction (alignment) between chromatograms of different samples. These +#' methods are part of the modernized \code{xcms} user interface. +#' +#' The implemented retention time adjustment methods are: +#' \describe{ +#' \item{peakGroups}{retention time correction based on aligment of +#' features (peak groups) present in most/all samples. +#' See \code{\link{adjustRtime-peakGroups}} for more details.} +#' +#' \item{obiwarp}{alignment based on the complete mz-rt data. This method +#' does not require any identified peaks or defined features. See +#' \code{\link{adjustRtime-obiwarp}} for more details.} +#' } +#' @name adjustRtime +#' +#' @family retention time correction methods +#' +#' @seealso \code{\link{retcor}} for the \emph{old} retention time correction +#' methods. +#' \code{\link{plotAdjustedRtime}} for visualization of alignment results. +#' +#' @author Johannes Rainer +NULL +#> NULL + +## Main retcor.peakgroups documentation. +#' @title Retention time correction based on alignment of house keeping peak +#' groups +#' +#' @description This method performs retention time adjustment based on the +#' alignment of chromatographic peak groups present in all/most samples +#' (hence corresponding to house keeping compounds). First the retention +#' time deviation of these peak groups is described by fitting either a +#' polynomial (\code{smooth = "loess"}) or a linear ( +#' \code{smooth = "linear"}) model to the data points. These models are +#' subsequently used to adjust the retention time of each spectrum in +#' each sample. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{group}} methods. All of the settings to the alignment +#' algorithm can be passed with a \code{PeakGroupsParam} object. +#' +#' The matrix with the (raw) retention times of the peak groups used +#' in the alignment is added to the \code{peakGroupsMatrix} slot of the +#' \code{PeakGroupsParam} object that is stored into the corresponding +#' \emph{process history step} (see \code{\link{processHistory}} for how +#' to access the process history). +#' +#' @param minFraction \code{numeric(1)} between 0 and 1 defining the minimum +#' required fraction of samples in which peaks for the peak group were +#' identified. Peak groups passing this criteria will aligned across +#' samples and retention times of individual spectra will be adjusted +#' based on this alignment. For \code{minFraction = 1} the peak group +#' has to contain peaks in all samples of the experiment. +#' +#' @param extraPeaks \code{numeric(1)} defining the maximal number of +#' additional peaks for all samples to be assigned to a peak group (i.e. +#' feature) for retention time correction. For a data set with 6 samples, +#' \code{extraPeaks = 1} uses all peak groups with a total peak count +#' \code{<= 6 + 1}. The total peak count is the total number of peaks being +#' assigned to a peak group and considers also multiple peaks within a +#' sample being assigned to the group. +#' +#' @param smooth character defining the function to be used, to interpolate +#' corrected retention times for all peak groups. Either \code{"loess"} or +#' \code{"linear"}. +#' +#' @param span \code{numeric(1)} defining the degree of smoothing (if +#' \code{smooth = "loess"}). This parameter is passed to the internal call +#' to \code{\link{loess}}. +#' +#' @param family character defining the method to be used for loess smoothing. +#' Allowed values are \code{"gaussian"} and \code{"symmetric"}.See +#' \code{\link{loess}} for more information. +#' +#' @param peakGroupsMatrix optional \code{matrix} of (raw) retention times for +#' the peak groups on which the alignment should be performed. Each column +#' represents a sample, each row a feature/peak group. Such a matrix is +#' for example returned by the \code{\link{adjustRtimePeakGroups}} method. +#' +#' @family retention time correction methods +#' +#' @seealso The \code{\link{do_adjustRtime_peakGroups}} core +#' API function and \code{\link{retcor.peakgroups}} for the old user +#' interface. +#' \code{\link{plotAdjustedRtime}} for visualization of alignment results. +#' +#' @name adjustRtime-peakGroups +#' +#' @author Colin Smith, Johannes Rainer +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +NULL +#> NULL + +#' @description The \code{PeakGroupsParam} class allows to specify all +#' settings for the retention time adjustment based on \emph{house keeping} +#' peak groups present in most samples. +#' Instances should be created with the \code{PeakGroupsParam} constructor. +#' +#' @slot .__classVersion__,minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname adjustRtime-peakGroups +#' +#' @examples +#' ############################## +#' ## Chromatographic peak detection and grouping. +#' ## +#' ## Below we perform first a peak detection (using the matchedFilter +#' ## method) on some of the test files from the faahKO package followed by +#' ## a peak grouping. +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the matchedFilter method. +#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' head(chromPeaks(res)) +#' ## The number of peaks identified per sample: +#' table(chromPeaks(res)[, "sample"]) +#' +#' ## Performing the peak grouping using the "peak density" method. +#' p <- PeakDensityParam(sampleGroups = c(1, 1)) +#' res <- groupChromPeaks(res, param = p) +#' +#' ## Perform the retention time adjustment using peak groups found in both +#' ## files. +#' fgp <- PeakGroupsParam(minFraction = 1) +#' +#' ## Before running the alignment we can evaluate which features (peak groups) +#' ## would be used based on the specified parameters. +#' pkGrps <- adjustRtimePeakGroups(res, param = fgp) +#' +#' ## We can also plot these to evaluate if the peak groups span a large portion +#' ## of the retention time range. +#' plot(x = pkGrps[, 1], y = rep(1, nrow(pkGrps)), xlim = range(rtime(res)), +#' ylim = c(1, 2), xlab = "rt", ylab = "", yaxt = "n") +#' points(x = pkGrps[, 2], y = rep(2, nrow(pkGrps))) +#' segments(x0 = pkGrps[, 1], x1 = pkGrps[, 2], +#' y0 = rep(1, nrow(pkGrps)), y1 = rep(2, nrow(pkGrps))) +#' grid() +#' axis(side = 2, at = c(1, 2), labels = colnames(pkGrps)) +#' +#' ## Next we perform the alignment. +#' res <- adjustRtime(res, param = fgp) +#' +#' ## Any grouping information was dropped +#' hasFeatures(res) +#' +#' ## Plot the raw against the adjusted retention times. +#' plot(rtime(raw_data), rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) +#' +#' ## Adjusterd retention times can be accessed using +#' ## rtime(object, adjusted = TRUE) and adjustedRtime +#' all.equal(rtime(res), adjustedRtime(res)) +#' +#' ## To get the raw, unadjusted retention times: +#' all.equal(rtime(res, adjusted = FALSE), rtime(raw_data)) +#' +#' ## To extract the retention times grouped by sample/file: +#' rts <- rtime(res, bySample = TRUE) +setClass("PeakGroupsParam", + slots = c(minFraction = "numeric", + extraPeaks = "numeric", + smooth = "character", + span = "numeric", + family = "character", + peakGroupsMatrix = "matrix"), + contains = "Param", + prototype = prototype( + minFraction = 0.9, + extraPeaks = 1, + smooth = "loess", + span = 0.2, + family = "gaussian", + peakGroupsMatrix = matrix(ncol = 0, nrow = 0) + ), + validity = function(object) { + msg <- character() + if (length(object@minFraction) > 1 | + any(object@minFraction < 0) | + any(object@minFraction > 1)) + msg <- c(msg, paste0("'minFraction' has to be a single", + " number between 0 and 1!")) + if (length(object@extraPeaks) > 1 | + any(object@extraPeaks < 0)) + msg <- c(msg, paste0("'extraPeaks' has to be a ", + "positive numeric of length 1!")) + if (length(object@span) > 1 | any(object@span < 0)) + msg <- c(msg, paste0("'span' has to be a ", + "positive numeric of length 1!")) + if (length(object@smooth) > 1 | + !all(object@smooth %in% c("loess", "linear"))) + msg <- c(msg, paste0("'smooth' has to be either \"", + "loess\" or \"linear\"!")) + if (length(object@family) > 1 | + !all(object@family %in% c("gaussian", "symmetric"))) + msg <- c(msg, paste0("'family' has to be either \"", + "gaussian\" or \"symmetric\"!")) + if (length(msg)) + msg + else TRUE + }) + +#' @title Align retention times across samples using Obiwarp +#' +#' @description This method performs retention time adjustment using the +#' Obiwarp method [Prince 2006]. It is based on the code at +#' \url{http://obi-warp.sourceforge.net} but supports alignment of multiple +#' samples by aligning each against a \emph{center} sample. The alignment is +#' performed directly on the \code{\link{profile-matrix}} and can hence be +#' performed independently of the peak detection or peak grouping. +#' +#' @note These methods and classes are part of the updated and modernized +#' \code{xcms} user interface which will eventually replace the +#' \code{\link{retcor}} methods. All of the settings to the alignment +#' algorithm can be passed with a \code{ObiwarpParam} object. +#' +#' @param binSize \code{numeric(1)} defining the bin size (in mz dimension) +#' to be used for the \emph{profile matrix} generation. See \code{step} +#' parameter in \code{\link{profile-matrix}} documentation for more details. +#' +#' @param centerSample \code{integer(1)} defining the index of the center sample +#' in the experiment. It defaults to +#' \code{floor(median(1:length(fileNames(object))))}. +#' +#' @param response \code{numeric(1)} defining the \emph{responsiveness} of +#' warping with \code{response = 0} giving linear warping on start and end +#' points and \code{response = 100} warping using all bijective anchors. +#' +#' @param distFun character defining the distance function to be used. Allowed +#' values are \code{"cor"} (Pearson's correlation), \code{"cor_opt"} +#' (calculate only 10\% diagonal band of distance matrix; better runtime), +#' \code{"cov"} (covariance), \code{"prd"} (product) and \code{"euc"} +#' (Euclidian distance). The default value is \code{distFun = "cor_opt"}. +#' +#' @param gapInit \code{numeric(1)} defining the penalty for gap opening. The +#' default value for \code{gapInit} depends on the value of \code{distFun}: +#' for \code{distFun = "cor"} and \code{distFun = "cor_opt"} it is +#' \code{0.3}, for \code{distFun = "cov"} and \code{distFun = "prd"} +#' \code{0.0} and for \code{distFun = "euc"} \code{0.9}. +#' +#' @param gapExtend \code{numeric(1)} defining the penalty for gap enlargement. +#' The default value for \code{gapExtend} depends on the value of +#' \code{distFun}, for \code{distFun = "cor"} and +#' \code{distFun = "cor_opt"} it is \code{2.4}, for \code{distFun = "cov"} +#' \code{11.7}, for \code{distFun = "euc"} \code{1.8} and for +#' \code{distFun = "prd"} {7.8}. +#' +#' @param factorDiag \code{numeric(1)} defining the local weight applied to +#' diagonal moves in the alignment. +#' +#' @param factorGap \code{numeric(1)} defining the local weight for gap moves +#' in the alignment. +#' +#' @param localAlignment \code{logical(1)} whether a local alignment should be +#' performed instead of the default global alignment. +#' +#' @param initPenalty \code{numeric(1)} defining the penalty for initiating an +#' alignment (for local alignment only). +#' +#' @family retention time correction methods +#' +#' @seealso \code{\link{retcor.obiwarp}} for the old user interface. +#' \code{\link{plotAdjustedRtime}} for visualization of alignment results. +#' +#' @name adjustRtime-obiwarp +#' +#' @author Colin Smith, Johannes Rainer +#' +#' @references +#' John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of +#' ESI-LC-MS Proteomics Data Sets by Ordered Bijective Interpolated Warping" +#' \emph{Anal. Chem.} 2006, 78(17):6140-6152. + +NULL +#> NULL + +#' @description The \code{ObiwarpParam} class allows to specify all +#' settings for the retention time adjustment based on the \emph{obiwarp} +#' method. Class Instances should be created using the +#' \code{ObiwarpParam} constructor. +#' +#' @slot .__classVersion__,binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty See corresponding parameter above. \code{.__classVersion__} stores +#' the version from the class. Slots values should exclusively be accessed +#' \emph{via} the corresponding getter and setter methods listed above. +#' +#' @rdname adjustRtime-obiwarp +#' +#' @examples +#' library(faahKO) +#' library(MSnbase) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform retention time correction on the OnDiskMSnExp: +#' res <- adjustRtime(raw_data, param = ObiwarpParam()) +#' +#' ## As a result we get a numeric vector with the adjusted retention times for +#' ## all spectra. +#' head(res) +#' +#' ## We can split this by file to get the adjusted retention times for each +#' ## file +#' resL <- split(res, fromFile(raw_data)) +#' +#' ############################## +#' ## Perform retention time correction on an XCMSnExp: +#' ## +#' ## Perform first the chromatographic peak detection using the matchedFilter +#' ## method. +#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' ## Performing the retention time adjustment using obiwarp. +#' res_2 <- adjustRtime(res, param = ObiwarpParam()) +#' +#' head(rtime(res_2)) +#' head(rtime(raw_data)) +#' +#' ## Also the retention times of the detected peaks were adjusted. +#' tail(chromPeaks(res)) +#' tail(chromPeaks(res_2)) +setClass("ObiwarpParam", + slots = c(binSize = "numeric", + centerSample = "integer", + response = "integer", + distFun = "character", + gapInit = "numeric", + gapExtend = "numeric", + factorDiag = "numeric", + factorGap = "numeric", + localAlignment = "logical", + initPenalty = "numeric"), + contains = "Param", + prototype = prototype( + binSize = 1, + centerSample = integer(), + response = 1L, + distFun = "cor_opt", + gapInit = numeric(), + gapExtend = numeric(), + factorDiag = 2, + factorGap = 1, + localAlignment = FALSE, + initPenalty = 0), + validity = function(object) { + msg <- character() + if (length(object@binSize) > 1 | + any(object@binSize < 0)) + msg <- c(msg, paste0("'binSize' has to be a positive", + " numeric of length 1!")) + if (length(object@centerSample) > 1 | + any(object@centerSample < 0)) + msg <- c(msg, paste0("'centerSample' has to be a positive", + " numeric of length 1!")) + if (length(object@response) > 1 | + any(object@response < 0) | + any(object@response > 100)) + msg <- c(msg, paste0("'response' has to be a single ", + " integer from 1 to 100!")) + if (length(object@distFun) > 1 | + any(!(object@distFun %in% c("cor", "cor_opt", "cov", "euc", + "prd")))) + msg <- c(msg, paste0("'distFun' has to be one of \"cor\"", + ", \"cor_opt\", \"cov\", \"euc\"", + " or \"prd\"!")) + if (length(object@gapInit) > 1 | any(object@gapInit < 0)) + msg <- c(msg, paste0("'gapInit' has to be a positive", + " numeric of length 1!")) + if (length(object@gapExtend) > 1 | any(object@gapExtend < 0)) + msg <- c(msg, paste0("'gapExtend' has to be a positive", + " numeric of length 1!")) + if (length(object@factorDiag) > 1 | any(object@factorDiag < 0)) + msg <- c(msg, paste0("'factorDiag' has to be a positive", + " numeric of length 1!")) + if (length(object@factorGap) > 1 | any(object@factorGap < 0)) + msg <- c(msg, paste0("'factorGap' has to be a positive", + " numeric of length 1!")) + if (length(object@localAlignment) > 1) + msg <- c(msg, paste0("'localAlignment' has to be a ", + "logical of length 1!")) + if (length(object@initPenalty) > 1 | any(object@initPenalty < 0)) + msg <- c(msg, paste0("'initPenalty' has to be a positive", + " numeric of length 1!")) + if (length(msg)) + msg + else TRUE + }) + +#' @description The \code{FillChromPeaksParam} object encapsules all settings for +#' the signal integration for missing peaks. +#' +#' @slot .__classVersion__,expandMz,expandRt,ppm See corresponding parameter above. \code{.__classVersion__} stores the version of the class. +#' +#' @rdname fillChromPeaks +setClass("FillChromPeaksParam", + slots = c(expandMz = "numeric", + expandRt = "numeric", + ppm = "numeric"), + contains = "Param", + prototype = prototype( + expandMz = 0, + expandRt = 0, + ppm = 0 + ), + validity = function(object) { + msg <- character() + if (length(object@expandMz) > 1 | any(object@expandMz < -1)) + msg <- c(msg, "'expandMz' has to be > -1 and of length 1") + if (length(object@expandRt) > 1 | any(object@expandRt < -1)) + msg <- c(msg, "'expandRt' has to be > -1 and of length 1") + if (length(object@ppm) > 1 | any(object@ppm < 0)) + msg <- c(msg, paste0("'ppm' has to be a positive", + " numeric of length 1!")) + if (length(msg)) + msg + else TRUE + } + ) + +#' @aliases MsFeatureData +#' +#' @title Data container storing xcms preprocessing results +#' +#' @description The \code{MsFeatureData} class is designed to encapsule all +#' data related to the preprocessing of metabolomics data using the +#' \code{xcms} package, i.e. it contains a \code{matrix} with the +#' chromatographic peaks identified by the peak detection, a +#' \code{DataFrame} with the definition on grouped chromatographic peaks +#' across samples and a \code{list} with the adjusted retention times per +#' sample. +#' +#' @rdname XCMSnExp-class setClass("MsFeatureData", contains = c("environment", "Versioned"), prototype = prototype(.xData = new.env(parent = emptyenv()))) -.XCMS_REQ_FEATS_COLS <- c("mz", "mzmin", "mzmax", "rt", "rtmin", - "rtmax", "into", "sample") -.XCMS_REQ_FEATG_COLS <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", - "featureidx") +.REQ_PEAKS_COLS <- c("mz", "mzmin", "mzmax", "rt", "rtmin", + "rtmax", "into", "sample") +.REQ_PEAKG_COLS <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", + "peakidx") -##' @aliases XCMSnExp -##' @title Data container storing xcms preprocessing results -##' -##' @description The \code{XCMSnExp} object is designed to contain all results -##' from metabolomics data preprocessing (feature detection, feature alignment -##' and retention time correction). The corresponding elements in the -##' \code{msFeatureData} slot are \code{"features"} (a \code{matrix}), -##' \code{"featureGroups"} (a \code{DataFrame}) and \code{"adjustedRtime"} (a -##' \code{list} of numeric vectors). Note that these should not be accessed -##' directly but rather \emph{via} their accessor methods. Along with the results, -##' the object contains the processing history that allow to track each -##' processing step along with the used settings. The object also directly -##' extends the \code{\link[MSnbase]{OnDiskMSnExp}} object hence allowing easy -##' access to the full data on which the feature detection was performed. -##' -##' Objects from this class should not be created directly, they are returned as -##' result from the \code{\link{detectFeatures}} method. -##' -##' \code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}} -##' objects using the \code{as} method. -##' -##' @note The \code{"features"} element in the \code{msFeatureData} slot is -##' equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the -##' \code{"featureGroups"} contains information from the \code{} -##' -##' @slot .processHistory \code{list} with \code{XProcessHistory} objects -##' tracking all individual analysis steps that have been performed. -##' -##' @slot msFeatureData \code{MsFeatureData} class extending \code{environment} -##' and containing the results from a feature detection (element -##' \code{"features"}), feature alignment (element \code{"featureGroups"}) and -##' retention time correction (element \code{""}) steps. -##' -##' @param object For \code{adjustedRtime}, \code{featureGroups}, -##' \code{features}, \code{hasAdjustedRtime}, \code{hasAlignedFeatures} and -##' \code{hasDetectedFeatures} either a \code{MsFeatureData} or a \code{XCMSnExp} -##' object, for all other methods a \code{XCMSnExp} object. -##' -##' @param value For \code{adjustedRtime<-}: a \code{list} (length equal to the -##' number of samples) with numeric vectors representing the adjusted retention -##' times per scan. -##' -##' For \code{featureGroups<-}: a \code{DataFrame} with feature -##' alignment information. See return value for the \code{featureGroups} method -##' for the expected format. -##' -##' For \code{features<-}: a \code{matrix} with information on -##' detected features. See return value for the \code{features} method for the -##' expected format. -##' -##' @author Johannes Rainer -##' -##' @seealso \code{\linkS4class{xcmsSet}} for the old implementation. -##' @seealso \code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} -##' and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. -##' @seealso \code{\link{detectFeatures}} for available feature detection methods -##' returning a \code{XCMSnExp} object as a result. -##' -##' @rdname XCMSnExp-class -##' -##' @examples -##' -##' ## Loading the data from 2 files of the faahKO package. -##' library(faahKO) -##' od <- readMSData2(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), -##' system.file("cdf/KO/ko16.CDF", package = "faahKO"))) -##' ## Now we perform a feature detection on this data set using the -##' ## matched filter method. We are tuning the settings such that it performs -##' ## faster. -##' mfp <- MatchedFilterParam(binSize = 4) -##' xod <- detectFeatures(od, param = mfp) -##' -##' ## The results from the feature detection are now stored in the XCMSnExp -##' ## object -##' xod -##' -##' ## The detected features can be accessed with the features method. -##' head(features(xod)) -##' -##' ## The settings of the feature detection can be accessed with the -##' ## processHistory method -##' processHistory(xod) -##' -##' ## Also the parameter class for the feature detection can be accessed -##' processParam(processHistory(xod)[[1]]) -##' -##' ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes -##' ## defined in Bioconductor's MSnbase package. To access the (raw) retention -##' ## time for each spectrum we can use the rtime method. Setting bySample = TRUE -##' ## would cause the retention times to be grouped by sample -##' head(rtime(xod)) -##' -##' ## Similarly it is possible to extract the mz values or the intensity values -##' ## using the mz and intensity method, respectively, also with the option to -##' ## return the results grouped by sample instead of the default, which is -##' ## grouped by spectrum. Finally, to extract all of the data we can use the -##' ## spectra method which returns Spectrum objects containing all raw data. -##' ## Note that all these methods read the information from the original input -##' ## files and subsequently apply eventual data processing steps to them. -##' head(mz(xod, bySample = TRUE)) -##' -##' ## Reading all data -##' spctr <- spectra(xod) -##' ## To get all spectra of the first file we can split them by file -##' head(split(spctr, fromFile(xod))[[1]]) -##' -##' ############ -##' ## Filtering -##' ## -##' ## XCMSnExp objects can be filtered by file, retention time, mz values or -##' ## MS level. For some of these filter preprocessing results (mostly -##' ## retention time correction and feature alignment results) will be dropped. -##' ## Below we filter the XCMSnExp object by file to extract the results for -##' ## only the second file. -##' xod_2 <- filterFile(xod, file = 2) -##' xod_2 -##' -##' ## Now the objects contains only the idenfified features for the second file -##' head(features(xod_2)) -##' -##' head(features(xod)[features(xod)[, "sample"] == 2, ]) -##' -##' ########## -##' ## Coercing to an xcmsSet object -##' ## -##' ## We can also coerce the XCMSnExp object into an xcmsSet object: -##' xs <- as(xod, "xcmsSet") -##' head(peaks(xs)) +#' @aliases XCMSnExp +#' +#' @title Data container storing xcms preprocessing results +#' +#' @description The \code{XCMSnExp} object is designed to contain all results +#' from metabolomics data preprocessing (chromatographic peak detection, +#' peak grouping (correspondence) and retention time correction). The +#' corresponding elements in the \code{msFeatureData} slot are +#' \code{"chromPeaks"} (a \code{matrix}), \code{"featureDefinitions"} +#' (a \code{DataFrame}) and \code{"adjustedRtime"} (a \code{list} of +#' numeric vectors). Note that these should not be accessed directly but +#' rather \emph{via} their accessor methods. +#' Along with the results, the object contains the processing history that +#' allow to track each processing step along with the used settings. The +#' object also directly extends the \code{\link[MSnbase]{OnDiskMSnExp}} +#' object hence allowing easy access to the full data on which the peak +#' detection was performed. +#' +#' Objects from this class should not be created directly, they are +#' returned as result from the \code{\link{findChromPeaks}} method. +#' +#' \code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}} +#' objects using the \code{as} method. +#' +#' @note The \code{"chromPeaks"} element in the \code{msFeatureData} slot is +#' equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the +#' \code{"featureDefinitions"} contains information from the \code{@groups} +#' and \code{@groupidx} slots from an \code{xcmsSet} object. +#' +#' @slot .processHistory \code{list} with \code{XProcessHistory} objects +#' tracking all individual analysis steps that have been performed. +#' +#' @slot msFeatureData \code{MsFeatureData} class extending \code{environment} +#' and containing the results from a chromatographic peak detection (element +#' \code{"chromPeaks"}), peak grouping (element \code{"featureDefinitions"}) +#' and retention time correction (element \code{"adjustedRtime"}) steps. +#' +#' @param object For \code{adjustedRtime}, \code{featureDefinitions}, +#' \code{chromPeaks}, \code{hasAdjustedRtime}, \code{hasFeatures} and +#' \code{hasChromPeaks} either a \code{MsFeatureData} or a \code{XCMSnExp} +#' object, for all other methods a \code{XCMSnExp} object. +#' +#' @param value For \code{adjustedRtime<-}: a \code{list} (length equal to the +#' number of samples) with numeric vectors representing the adjusted +#' retention times per scan. +#' +#' For \code{featureDefinitions<-}: a \code{DataFrame} with peak +#' grouping information. See return value for the \code{featureDefinitions} +#' method for the expected format. +#' +#' For \code{chromPeaks<-}: a \code{matrix} with information on +#' detected peaks. See return value for the \code{chromPeaks} method for the +#' expected format. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\linkS4class{xcmsSet}} for the old implementation. +#' \code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} +#' and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. +#' +#' \code{\link{findChromPeaks}} for available peak detection methods +#' returning a \code{XCMSnExp} object as a result. +#' +#' \code{\link{groupChromPeaks}} for available peak grouping +#' methods and \code{\link{featureDefinitions}} for the method to extract +#' the feature definitions representing the peak grouping results. +#' \code{\link{adjustRtime}} for retention time adjustment methods. +#' +#' \code{\link{extractChromatograms}} to extract MS data as +#' \code{\link{Chromatogram}} objects. +#' +#' \code{\link{extractMsData}} for the method to extract MS data as +#' \code{data.frame}s. +#' +#' @rdname XCMSnExp-class +#' +#' @examples +#' +#' ## Loading the data from 2 files of the faahKO package. +#' library(faahKO) +#' od <- readMSData2(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), +#' system.file("cdf/KO/ko16.CDF", package = "faahKO"))) +#' ## Now we perform a chromatographic peak detection on this data set using the +#' ## matched filter method. We are tuning the settings such that it performs +#' ## faster. +#' mfp <- MatchedFilterParam(binSize = 4) +#' xod <- findChromPeaks(od, param = mfp) +#' +#' ## The results from the peak detection are now stored in the XCMSnExp +#' ## object +#' xod +#' +#' ## The detected peaks can be accessed with the chromPeaks method. +#' head(chromPeaks(xod)) +#' +#' ## The settings of the chromatographic peak detection can be accessed with +#' ## the processHistory method +#' processHistory(xod) +#' +#' ## Also the parameter class for the peak detection can be accessed +#' processParam(processHistory(xod)[[1]]) +#' +#' ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes +#' ## defined in Bioconductor's MSnbase package. To access the (raw) retention +#' ## time for each spectrum we can use the rtime method. Setting bySample = TRUE +#' ## would cause the retention times to be grouped by sample +#' head(rtime(xod)) +#' +#' ## Similarly it is possible to extract the mz values or the intensity values +#' ## using the mz and intensity method, respectively, also with the option to +#' ## return the results grouped by sample instead of the default, which is +#' ## grouped by spectrum. Finally, to extract all of the data we can use the +#' ## spectra method which returns Spectrum objects containing all raw data. +#' ## Note that all these methods read the information from the original input +#' ## files and subsequently apply eventual data processing steps to them. +#' head(mz(xod, bySample = TRUE)) +#' +#' ## Reading all data +#' spctr <- spectra(xod) +#' ## To get all spectra of the first file we can split them by file +#' head(split(spctr, fromFile(xod))[[1]]) +#' +#' ############ +#' ## Filtering +#' ## +#' ## XCMSnExp objects can be filtered by file, retention time, mz values or +#' ## MS level. For some of these filter preprocessing results (mostly +#' ## retention time correction and peak grouping results) will be dropped. +#' ## Below we filter the XCMSnExp object by file to extract the results for +#' ## only the second file. +#' xod_2 <- filterFile(xod, file = 2) +#' xod_2 +#' +#' ## Now the objects contains only the idenfified peaks for the second file +#' head(chromPeaks(xod_2)) +#' +#' head(chromPeaks(xod)[chromPeaks(xod)[, "sample"] == 2, ]) +#' +#' ########## +#' ## Coercing to an xcmsSet object +#' ## +#' ## We can also coerce the XCMSnExp object into an xcmsSet object: +#' xs <- as(xod, "xcmsSet") +#' head(peaks(xs)) setClass("XCMSnExp", slots = c( .processHistory = "list", @@ -1362,65 +2342,114 @@ setClass("XCMSnExp", ), contains = c("OnDiskMSnExp"), validity = function(object) { - msg <- validMsg(NULL, NULL) + msg <- character() if (length(object@.processHistory) > 0) { isOK <- unlist(lapply(object@.processHistory, function(z) { return(inherits(z, "ProcessHistory")) })) if (!all(isOK)) - msg <- validMsg(msg, paste0("Only 'ProcessHistory' ", - "objects are allowed in slot ", - ".processHistory!")) + msg <- c(msg, paste0("Only 'ProcessHistory' ", + "objects are allowed in slot ", + ".processHistory!")) } ## TODO @jo add checks: ## 1) call validMsFeatureData - msg <- validMsg(msg, validateMsFeatureData(object@msFeatureData)) - if (!is.null(msg)) return(msg) - ## 2) features[, "sample"] is within 1:number of samples - if (any(ls(object@msFeatureData) == "features")) { - if (!all(object@msFeatureData$features[, "sample"] %in% + msg <- c(msg, validateMsFeatureData(object@msFeatureData)) + if (length(msg)) return(msg) + ## 2) peaks[, "sample"] is within 1:number of samples + if (any(ls(object@msFeatureData) == "chromPeaks")) { + if (!all(object@msFeatureData$chromPeaks[, "sample"] %in% 1:length(fileNames(object)))) - msg <- validMsg(msg, paste0("The number of available ", - "samples does not match with ", - "the sample assignment of ", - "features in the 'features' ", - "element of the msFeatureData ", - "slot!")) + msg <- c(msg, paste0("The number of available ", + "samples does not match with ", + "the sample assignment of ", + "peaks in the 'chromPeaks' ", + "element of the msFeatureData ", + "slot!")) } ## 3) Check that the length of the adjustedRtime matches! if (any(ls(object@msFeatureData) == "adjustedRtime")) { rt <- rtime(object, bySample = TRUE) if (length(rt) != length(object@msFeatureData$adjustedRtime)) { - msg <- validMsg(msg, paste0("The number of numeric vectors", - " in the 'adjustedRtime' element", - " of the msFeatureData slot does", - " not match the number of", - " samples!")) + msg <- c(msg, paste0("The number of numeric vectors", + " in the 'adjustedRtime' element", + " of the msFeatureData slot does", + " not match the number of", + " samples!")) } else { if (any(lengths(rt) != lengths(object@msFeatureData$adjustedRtime))) - msg <- validMsg(msg, - paste0("The lengths of the numeric ", - "vectors in the 'adjustedRtime'", - " element of the msFeatureData ", - "slot does not match the number", - " of scans per sample!")) + msg <- c(msg, + paste0("The lengths of the numeric ", + "vectors in the 'adjustedRtime'", + " element of the msFeatureData ", + "slot does not match the number", + " of scans per sample!")) } } - ## 3) If we've got features, check that we have also a related + ## 3) If we've got peaks, check that we have also a related ## processing history step. - if (is.null(msg)) - return(TRUE) - else return(msg) + if (length(msg)) + msg + else TRUE } ) -## testfun <- function(x, value = "index") { -## res <- lapply(x@groupidx, function(z, nsamps = length(filepaths(x))) { -## sampidx <- x@peaks[z, c("into", "sample"), drop = FALSE] -## tmp <- rep(NA, nsamps) -## tmp[sampidx[, "sample"]] <- z -## return(tmp) -## }) -## return(do.call(rbind, res)) -## } +#' @title Representation of chromatographic MS data +#' +#' @description The \code{Chromatogram} class is designed to store +#' chromatographic MS data, i.e. pairs of retention time and intensity +#' values. Instances of the class can be created with the +#' \code{Chromatogram} constructor function but in most cases the dedicated +#' methods for \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} +#' objects extracting chromatograms should be used instead (i.e. the +#' \code{\link{extractChromatograms}}). +#' +#' @details The \code{mz}, \code{filterMz}, \code{precursorMz} and +#' \code{productMz} are stored as a \code{numeric(2)} representing a range +#' even if the chromatogram was generated for only a single ion (i.e. a +#' single mz value). Using ranges for \code{mz} values allow this class to +#' be used also for e.g. total ion chromatograms or base peak chromatograms. +#' +#' The slots \code{precursorMz} and \code{productMz} allow to represent SRM +#' (single reaction monitoring) and MRM (multiple SRM) chromatograms. As +#' example, a \code{Chromatogram} for a SRM transition 273 -> 153 will have +#' a \code{@precursorMz = c(273, 273)} and a +#' \code{@productMz = c(153, 153)}. +#' +#' @rdname Chromatogram-class +#' +#' @export +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{extractChromatograms}} for the method to extract +#' \code{Chromatogram} objects from \code{\link{XCMSnExp}} or +#' \code{\link[MSnbase]{OnDiskMSnExp}} objects. +#' +#' \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. +setClass("Chromatogram", + slots = c( + rtime = "numeric", + intensity = "numeric", + mz = "numeric", + filterMz = "numeric", + precursorMz = "numeric", ## Or call that Q1mz? + productMz = "numeric", ## Or call that Q3mz? + fromFile = "integer", + aggregationFun = "character" + ), + contains = "Versioned", + prototype = prototype( + rtime = numeric(), + intensity = numeric(), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), + fromFile = integer(), + aggregationFun = character() + ), + validity = function(object) + validChromatogram(object) + ) diff --git a/R/MPI.R b/R/MPI.R index 65fefbb97..1d4a7b2cf 100644 --- a/R/MPI.R +++ b/R/MPI.R @@ -1,4 +1,3 @@ - ## ## findPeaks slave function for parallel execution ## @@ -42,14 +41,18 @@ findPeaksPar <- function(arg) { peaks <- do.call(method, args = c(list(object = xRaw), params)) - list(scantime=xRaw@scantime, - peaks=cbind(peaks, - sample = rep.int(myID, nrow(peaks))), + ## Ensure to remove data to avoid memory accumulation. + scanT <- xRaw@scantime + rm(xRaw) + gc() + list(scantime = scanT, + peaks = cbind(peaks, + sample = rep.int(myID, nrow(peaks))), date = procDate) } ############################################################ -## detectFeatures +## findChromPeaks ## ## Same as findPeaksPar but without the need to pass argument lists ## and read settings from the global options. @@ -58,7 +61,7 @@ findPeaksPar <- function(arg) { ## o readParams: parameter class to read the file; actually we would only ## need the scanrange, the includeMSn and the lockMassFreq here. ## o detectParams: parameter class for the peak detection. -detectFeaturesInFile <- function(args) { +findChromPeaksInFile <- function(args) { ## Placeholder } @@ -87,12 +90,12 @@ fillPeaksChromPar <- function(arg) { if (length(params$dataCorrection) > 1) { ## Note: dataCorrection (as set in the xcmsSet function) is either ## 1 for all or for none. - if (any(params$dataCorrection) == 1) + if (any(params$dataCorrection == 1)) lcraw <- stitch(lcraw, AutoLockMass(lcraw)) } if (exists("params$polarity") && length(params$polarity) >0) { - if (length(params$polarity) >0) { + if (length(params$polarity) > 0) { ## Retain wanted polarity only lcraws <- split(lcraw, lcraw@polarity, DROP=TRUE) lcraw <- lcraws[[params$polarity]] @@ -108,15 +111,14 @@ fillPeaksChromPar <- function(arg) { } - # Expanding the peakrange - peakrange[,"mzmax"] <- peakrange[,"mzmax"] + ( (peakrange[,"mzmax"]-peakrange[,"mzmin"])/2 )*(expand.mz-1) - peakrange[,"mzmin"] <- peakrange[,"mzmin"] - ( (peakrange[,"mzmax"]-peakrange[,"mzmin"])/2 )*(expand.mz-1) - peakrange[,"rtmax"] <- peakrange[,"rtmax"] + ( (peakrange[,"rtmax"]-peakrange[,"rtmin"])/2 )*(expand.rt-1) - peakrange[,"rtmin"] <- peakrange[,"rtmin"] - ( (peakrange[,"rtmax"]-peakrange[,"rtmin"])/2 )*(expand.rt-1) - - - - + ## Expanding the peakrange + incrMz <- (peakrange[, "mzmax"] - peakrange[, "mzmin"]) / 2 * (expand.mz - 1) + peakrange[, "mzmax"] <- peakrange[, "mzmax"] + incrMz + peakrange[, "mzmin"] <- peakrange[, "mzmin"] - incrMz + incrRt <- (peakrange[, "rtmax"] - peakrange[, "rtmin"]) / 2 * (expand.rt - 1) + peakrange[, "rtmax"] <- peakrange[, "rtmax"] + incrRt + peakrange[, "rtmin"] <- peakrange[, "rtmin"] - incrRt + naidx <- which(is.na(gvals[,myID])) newpeaks <- getPeaks(lcraw, peakrange[naidx,,drop=FALSE], step = prof$step) diff --git a/R/cwTools.R b/R/cwTools.R index f6cee5722..05209d87b 100644 --- a/R/cwTools.R +++ b/R/cwTools.R @@ -423,6 +423,15 @@ fitGauss <- function(td, d, pgauss = NA) { as.data.frame(t(fit$m$getPars())) } +## ' @param +## ' @param d numeric vector with intensities of centroids within the peak. +## ' @param otd +## ' @param omz +## ' @param od +## ' @param scantime +## ' @param scan.range +## ' @param peaks +## ' @noRd joinOverlappingPeaks <- function(td, d, otd, omz, od, scantime, scan.range, peaks, maxGaussOverlap=0.5, mzCenterFun) { diff --git a/R/do_adjustRtime-functions.R b/R/do_adjustRtime-functions.R new file mode 100644 index 000000000..8a8c51327 --- /dev/null +++ b/R/do_adjustRtime-functions.R @@ -0,0 +1,488 @@ +## Retention time correction methods. +#' @include DataClasses.R functions-MsFeatureData.R + +#' @title Align spectrum retention times across samples using peak groups +#' found in most samples +#' +#' @description The function performs retention time correction by assessing +#' the retention time deviation across all samples using peak groups +#' (features) containg chromatographic peaks present in most/all samples. +#' The retention time deviation for these features in each sample is +#' described by fitting either a polynomial (\code{smooth = "loess"}) or +#' a linear (\code{smooth = "linear"}) model to the data points. The +#' models are subsequently used to adjust the retention time for each +#' spectrum in each sample. +#' +#' @note The method ensures that returned adjusted retention times are +#' increasingly ordered, just as the raw retention times. +#' +#' @details The alignment bases on the presence of compounds that can be found +#' in all/most samples of an experiment. The retention times of individual +#' spectra are then adjusted based on the alignment of the features +#' corresponding to these \emph{house keeping compounds}. The paraneters +#' \code{minFraction} and \code{extraPeaks} can be used to fine tune which +#' features should be used for the alignment (i.e. which features +#' most likely correspond to the above mentioned house keeping compounds). +#' +#' @inheritParams adjustRtime-peakGroups +#' +#' @param peaks a \code{matrix} or \code{data.frame} with the identified +#' chromatographic peaks in the samples. +#' +#' @param peakIndex a \code{list} of indices that provides the grouping +#' information of the chromatographic peaks (across and within samples). +#' +#' @param rtime a \code{list} of \code{numeric} vectors with the retention +#' times per file/sample. +#' +#' @param peakGroupsMatrix optional \code{matrix} of (raw) retention times for +#' peak groups on which the alignment should be performed. Each column +#' represents a sample, each row a feature/peak group. If not provided, +#' this matrix will be determined depending on parameters +#' \code{minFraction} and \code{extraPeaks}. If provided, +#' \code{minFraction} and \code{extraPeaks} will be ignored. +#' +#' @return A \code{list} with \code{numeric} vectors with the adjusted +#' retention times grouped by sample. +#' +#' @family core retention time correction algorithms +#' +#' @author Colin Smith, Johannes Rainer +#' +#' @references +#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +#' \emph{Anal. Chem.} 2006, 78:779-787. +do_adjustRtime_peakGroups <- + function(peaks, peakIndex, rtime, minFraction = 0.9, extraPeaks = 1, + smooth = c("loess", "linear"), span = 0.2, + family = c("gaussian", "symmetric"), + peakGroupsMatrix = matrix(ncol = 0, nrow = 0)) +{ + ## Check input. + if (missing(peaks) | missing(peakIndex) | missing(rtime)) + stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!") + smooth <- match.arg(smooth) + family <- match.arg(family) + ## minFraction + if (any(minFraction > 1) | any(minFraction < 0)) + stop("'minFraction' has to be between 0 and 1!") + ## Check peaks: + OK <- .validChromPeaksMatrix(peaks) + if (is.character(OK)) + stop(OK) + ## Check peakIndex: + if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks))))) + stop("Some indices listed in 'peakIndex' are outside of ", + "1:nrow(peaks)!") + ## Check rtime: in line with the total number of samples we've got in + ## peaks? + if (!is.list(rtime)) + stop("'rtime' should be a list of numeric vectors with the retention ", + "times of the spectra per sample!") + if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE))) + stop("'rtime' should be a list of numeric vectors with the retention ", + "times of the spectra per sample!") + if (length(rtime) != max(peaks[, "sample"])) + stop("The length of 'rtime' does not match with the total number of ", + "samples according to the 'peaks' matrix!") + nSamples <- length(rtime) + ## Translate minFraction to number of allowed missing samples. + missingSample <- nSamples - (nSamples * minFraction) + ## Check if we've got a valid peakGroupsMatrix + ## o Same number of samples. + ## o range of rt values is within the rtime. + if (nrow(peakGroupsMatrix)) { + if (ncol(peakGroupsMatrix) != nSamples) + stop("'peakGroupsMatrix' has to have the same number of columns ", + "as there are samples!") + pg_range <- range(peakGroupsMatrix, na.rm = TRUE) + rt_range <- range(rtime) + if (!(pg_range[1] >= rt_range[1] & pg_range[2] <= rt_range[2])) + stop("The retention times in 'peakGroupsMatrix' have to be within", + " the retention time range of the experiment!") + rt <- peakGroupsMatrix + } else + rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, nSamples, + missingSample, extraPeaks) + ## Fix for issue #175 + if (length(rt) == 0) + stop("No peak groups found in the data for the provided settings") + ## ## Check if we have peak groups with almost the same retention time. If yes + ## ## select the best matching peaks among these. + ## rtmeds <- rowMedians(rt, na.rm = TRUE) + ## sim_rt <- which(diff(rtmeds) < 1e-6) + ## if (length(sim_rt)) { + ## pk_grps <- list() + ## current_idxs <- NULL + ## last_idx <- -1 + ## for (current_idx in sim_rt) { + ## if ((current_idx - last_idx) > 1) { + ## if (!is.null(current_idxs)) + ## pk_grps <- c(pk_grps, list(current_idxs)) + ## current_idxs <- c(current_idx - 1, current_idx) + ## } else { + ## ## Just add the index. + ## current_idxs <- c(current_idxs, current_idx) + ## } + ## last_idx <- current_idx + ## } + ## pk_grps <- c(pk_grps, list(current_idxs)) + ## ## Now, for each of these select one present in most samples. + ## sel_idx <- unlist(lapply(pk_grps, function(z) { + ## tmp <- rt[z, , drop = FALSE] + ## z[which.max(apply(tmp, MARGIN = 1, function(zz) sum(!is.na(zz))))] + ## })) + ## ## Define the other peaks that we can keep as.is + ## if (any(!(1:nrow(rt) %in% unique(unlist(pk_grps))))) + ## spec_idx <- (1:nrow(rt))[-unique(unlist(pk_grps))] + ## else spec_idx <- NULL + ## sel_idx <- sort(c(spec_idx, sel_idx)) + ## rt <- rt[sel_idx, , drop = FALSE] + ## } + + message("Performing retention time correction using ", nrow(rt), + " peak groups.") + + ## Calculate the deviation of each peak group in each sample from its + ## median + rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) + + if (smooth == "loess") { + mingroups <- min(colSums(!is.na(rt))) + if (mingroups < 4) { + smooth <- "linear" + warning("Too few peak groups for 'loess', reverting to linear", + " method") + } else if (mingroups * span < 4) { + span <- 4 / mingroups + warning("Span too small for 'loess' and the available number of ", + "peak groups, resetting to ", round(span, 2)) + } + } + + rtdevsmo <- vector("list", nSamples) + + ## Code for checking to see if retention time correction is overcorrecting + rtdevrange <- range(rtdev, na.rm = TRUE) + warn.overcorrect <- FALSE + warn.tweak.rt <- FALSE + + for (i in 1:nSamples) { + pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) + + ## order the data.frame such that rt and rtdev are increasingly ordered. + pk_idx <- order(pts$rt, pts$rtdev) + pts <- pts[pk_idx, ] + if (smooth == "loess") { + lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, + degree = 1, family = family)) + + rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) + ## Remove singularities from the loess function + rtdevsmo[[i]][abs(rtdevsmo[[i]]) > + quantile(abs(rtdevsmo[[i]]), 0.9) * 2] <- NA + if (length(naidx <- which(is.na(rtdevsmo[[i]])))) + rtdevsmo[[i]][naidx] <- suppressWarnings( + approx(na.omit(data.frame(rtime[[i]], rtdevsmo[[i]])), + xout = rtime[[i]][naidx], rule = 2)$y + ) + + ## Check if there are adjusted retention times that are not ordered + ## increasingly. If there are, search for each first unordered rt + ## the next rt that is larger and linearly interpolate the values + ## in between (see issue #146 for an illustration). + while (length(decidx <- which(diff(rtime[[i]] - rtdevsmo[[i]]) < 0))) { + warn.tweak.rt <- TRUE ## Warn that we had to tweak the rts. + rtadj <- rtime[[i]] - rtdevsmo[[i]] + rtadj_start <- rtadj[decidx[1]] ## start interpolating from here + ## Define the + next_larger <- which(rtadj > rtadj[decidx[1]]) + if (length(next_larger) == 0) { + ## Fix if there is no larger adjusted rt up to the end. + next_larger <- length(rtadj) + 1 + rtadj_end <- rtadj_start + } else { + next_larger <- min(next_larger) + rtadj_end <- rtadj[next_larger] + } + ## linearly interpolate the values in between. + adj_idxs <- (decidx[1] + 1):(next_larger - 1) + incr <- (rtadj_end - rtadj_start) / length(adj_idxs) + rtdevsmo[[i]][adj_idxs] <- rtime[[i]][adj_idxs] - + (rtadj_start + (1:length(adj_idxs)) * incr) + } + + rtdevsmorange <- range(rtdevsmo[[i]]) + if (any(rtdevsmorange / rtdevrange > 2)) + warn.overcorrect <- TRUE + } else { + if (nrow(pts) < 2) { + stop("Not enough peak groups even for linear smoothing ", + "available!") + } + ## Use lm instead? + fit <- lsfit(pts$rt, pts$rtdev) + rtdevsmo[[i]] <- rtime[[i]] * fit$coef[2] + fit$coef[1] + ptsrange <- range(pts$rt) + minidx <- rtime[[i]] < ptsrange[1] + maxidx <- rtime[[i]] > ptsrange[2] + rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] + rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] + } + ## Finally applying the correction + rtime[[i]] <- rtime[[i]] - rtdevsmo[[i]] + } + + if (warn.overcorrect) { + warning("Fitted retention time deviation curves exceed points by more", + " than 2x. This is dangerous and the algorithm is probably ", + "overcorrecting your data. Consider increasing the span ", + "parameter or switching to the linear smoothing method.") + } + + if (warn.tweak.rt) { + warning(call. = FALSE, "Adjusted retention times had to be ", + "re-adjusted for some files to ensure them being in the same", + " order than the raw retention times. A call to ", + "'dropAdjustedRtime' might thus fail to restore retention ", + "times of chromatographic peaks to their original values. ", + "Eventually consider to increase the value of the 'span' ", + "parameter.") + } + + return(rtime) +} +## That's the original code that fails to fix unsorted adjusted retention times +## (see issue #146). +do_adjustRtime_peakGroups_orig <- function(peaks, peakIndex, rtime, + minFraction = 0.9, extraPeaks = 1, + smooth = c("loess", "linear"), span = 0.2, + family = c("gaussian", "symmetric")) { + ## Check input. + if (missing(peaks) | missing(peakIndex) | missing(rtime)) + stop("Arguments 'peaks', 'peakIndex' and 'rtime' are required!") + smooth <- match.arg(smooth) + family <- match.arg(family) + ## minFraction + if (any(minFraction > 1) | any(minFraction < 0)) + stop("'minFraction' has to be between 0 and 1!") + + ## Check peaks: + OK <- .validChromPeaksMatrix(peaks) + if (is.character(OK)) + stop(OK) + ## Check peakIndex: + if (any(!(unique(unlist(peakIndex)) %in% seq_len(nrow(peaks))))) + stop("Some indices listed in 'peakIndex' are outside of ", + "1:nrow(peaks)!") + ## Check rtime: in line with the total number of samples we've got in + ## peaks? + if (!is.list(rtime)) + stop("'rtime' should be a list of numeric vectors with the retention ", + "times of the spectra per sample!") + if (!all(unlist(lapply(rtime, is.numeric), use.names = FALSE))) + stop("'rtime' should be a list of numeric vectors with the retention ", + "times of the spectra per sample!") + if (length(rtime) != max(peaks[, "sample"])) + stop("The length of 'rtime' does not match with the total number of ", + "samples according to the 'peaks' matrix!") + + nSamples <- length(rtime) + ## Translate minFraction to number of allowed missing samples. + missingSample <- nSamples - (nSamples * minFraction) + + rt <- .getPeakGroupsRtMatrix(peaks, peakIndex, nSamples, + missingSample, extraPeaks) + + message("Performing retention time correction using ", nrow(rt), + " peak groups.") + + ## Calculate the deviation of each peak group in each sample from its + ## median + rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) + + if (smooth == "loess") { + mingroups <- min(colSums(!is.na(rt))) + if (mingroups < 4) { + smooth <- "linear" + warning("Too few peak groups for 'loess', reverting to linear", + " method") + } else if (mingroups * span < 4) { + span <- 4 / mingroups + warning("Span too small for 'loess' and the available number of ", + "peak groups, resetting to ", round(span, 2)) + } + } + + rtdevsmo <- vector("list", nSamples) + + ## Code for checking to see if retention time correction is overcorrecting + rtdevrange <- range(rtdev, na.rm = TRUE) + warn.overcorrect <- FALSE + + for (i in 1:nSamples) { + pts <- na.omit(data.frame(rt = rt[, i], rtdev = rtdev[, i])) + + if (smooth == "loess") { + lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, + degree = 1, family = family)) + + rtdevsmo[[i]] <- na.flatfill(predict(lo, data.frame(rt = rtime[[i]]))) + ## Remove singularities from the loess function + rtdevsmo[[i]][abs(rtdevsmo[[i]]) > + quantile(abs(rtdevsmo[[i]]), 0.9) * 2] <- NA + if (length(naidx <- which(is.na(rtdevsmo[[i]])))) + rtdevsmo[[i]][naidx] <- suppressWarnings( + approx(na.omit(data.frame(rtime[[i]], rtdevsmo[[i]])), + xout = rtime[[i]][naidx], rule = 2)$y + ) + ## That's to ensure that the adjusted retention times are in the + ## same order than the raw retention times - I guess... + ## Check if adjustment changes the order of the adjusted retention + ## times. If yes, the difference between consecutive retention times + ## will be negative. + ## What does this code do: + ## o move the last adjusted retention time to the left by its + ## difference to the next one. + while (length(decidx <- which(diff(rtime[[i]] - rtdevsmo[[i]]) < 0))) { + d <- diff(rtime[[i]] - rtdevsmo[[i]])[tail(decidx, 1)] + rtdevsmo[[i]][tail(decidx, 1)] <- rtdevsmo[[i]][tail(decidx, 1)] - d + if (abs(d) <= 1e-06) + break + } + + rtdevsmorange <- range(rtdevsmo[[i]]) + if (any(rtdevsmorange / rtdevrange > 2)) + warn.overcorrect <- TRUE + } else { + if (nrow(pts) < 2) { + stop("Not enough peak groups even for linear smoothing ", + "available!") + } + ## Use lm instead? + fit <- lsfit(pts$rt, pts$rtdev) + rtdevsmo[[i]] <- rtime[[i]] * fit$coef[2] + fit$coef[1] + ptsrange <- range(pts$rt) + minidx <- rtime[[i]] < ptsrange[1] + maxidx <- rtime[[i]] > ptsrange[2] + rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)] + rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)] + } + ## Finally applying the correction + rtime[[i]] <- rtime[[i]] - rtdevsmo[[i]] + } + + if (warn.overcorrect) { + warning("Fitted retention time deviation curves exceed points by more", + " than 2x. This is dangerous and the algorithm is probably ", + "overcorrecting your data. Consider increasing the span ", + "parameter or switching to the linear smoothing method.") + } + + return(rtime) +} + +#' This function adjusts retentin times in the vector/matrix \code{x} given the +#' provided \code{numeric} vectors \code{rtraw} and \code{rtadj}. +#' +#' @details The function uses the \code{stepfun} to adjust \code{x} and adjusts +#' it given \code{rtraw} towards \code{rtadj}. Hence it is possible to +#' perform or to revert retention time correction in \code{x} depending +#' on what is provided with parameters \code{rtraw} and \code{rtadj}. +#' See examples for details. +#' +#' @param x A numeric or matrix with retention time values that should be +#' adjusted. +#' +#' @noRd +#' +#' @examples +#' +#' ## Perform retention time correction: +#' ## feats is supposed to be the peaks matrix FOR A SINGLE SAMPLE, rtr and +#' ## rtc the raw and adjusted retention times of the spectras from the same +#' ## samples: +#' ## adjFts <- feats +#' ## adjFts[, c("rt", "rtmin", "rtmax")] <- .applyRtAdjustment(feats[, c("rt", "rtmin", "rtmax")], rtr, rtc) +#' +#' ## To revert the adjustment: just switch the order of rtr and rtc +.applyRtAdjustment <- function(x, rtraw, rtadj) { + ## re-order everything if rtraw is not sorted; issue #146 + if (is.unsorted(rtraw)) { + idx <- order(rtraw) + adjFun <- stepfun(rtraw[idx][-1] - diff(rtraw[idx]) / 2, rtadj[idx]) + ## if (!is.null(dim(x))) + ## return(adjFun(x[idx, ])) + ## else + ## return(adjFun(x[idx])) + } else { + adjFun <- stepfun(rtraw[-1] - diff(rtraw) / 2, rtadj) + } + adjFun(x) +} + +#' Helper function to apply retention time adjustment to already identified +#' peaks in the peaks matrix of an XCMSnExp (or peaks matrix of an +#' xcmsSet). +#' +#' @noRd +.applyRtAdjToChromPeaks <- function(x, rtraw, rtadj) { + if (!is.list(rtraw) | !is.list(rtadj)) + stop("'rtraw' and 'rtadj' are supposed to be lists!") + if (length(rtraw) != length(rtadj)) + stop("'rtraw' and 'rtadj' have to have the same length!") + ## Going to adjust the columns rt, rtmin and rtmax in x. + ## Using a for loop here. + for (i in 1:length(rtraw)) { + whichSample <- which(x[, "sample"] == i) + if (length(whichSample)) { + x[whichSample, c("rt", "rtmin", "rtmax")] <- + .applyRtAdjustment(x[whichSample, c("rt", "rtmin", "rtmax")], + rtraw = rtraw[[i]], rtadj = rtadj[[i]]) + } + } + x +} + +#' Simple helper function to create a matrix with retention times for well +#' aligned peak groups, each row containing the rt of a peak group, +#' columns being samples. +#' +#' @details This function is called internally by the +#' do_adjustRtime_peakGroups function and the retcor.peakgroups method. +#' @noRd +.getPeakGroupsRtMatrix <- function(peaks, peakIndex, nSamples, + missingSample, extraPeaks) { + ## For each feature: + ## o extract the retention time of the peak with the highest intensity. + ## o skip peak groups if they are not assigned a peak in at least a + ## minimum number of samples OR if have too many peaks from the same + ## sample assigned to it. + seq_samp <- seq_len(nSamples) + rt <- lapply(peakIndex, function(z) { + cur_fts <- peaks[z, c("rt", "into", "sample"), drop = FALSE] + ## Return NULL if we've got less samples that required or is the total + ## number of peaks is larger than a certain threshold. + ## Note that the original implementation is not completely correct! + ## nsamp > nsamp + extraPeaks might be correct. + nsamp <- length(unique(cur_fts[, "sample"])) + if (nsamp < (nSamples - missingSample) | + nrow(cur_fts) > (nsamp + extraPeaks)) + return(NULL) + cur_fts[] <- cur_fts[order(cur_fts[, 2], decreasing = TRUE), ] + cur_fts[match(seq_samp, cur_fts[, 3]), 1] + }) + rt <- do.call(rbind, rt) + ## Order them by median retention time. NOTE: this is different from the + ## original code, in which the peak groups are ordered by the median + ## retention time that is calculated over ALL peaks within the peak + ## group, not only to one peak selected for each sample (for multi + ## peak per sample assignments). + ## Fix for issue #175 + if (is(rt, "matrix")) { + rt <- rt[order(rowMedians(rt, na.rm = TRUE)), , drop = FALSE] + } + rt +} diff --git a/R/do_detectFeatures-functions.R b/R/do_findChromPeaks-functions.R similarity index 66% rename from R/do_detectFeatures-functions.R rename to R/do_findChromPeaks-functions.R index f2dfd96b1..401e50172 100644 --- a/R/do_detectFeatures-functions.R +++ b/R/do_findChromPeaks-functions.R @@ -1,4 +1,5 @@ -## All low level (API) analysis functions for feature detection should go in here. +## All low level (API) analysis functions for chromatographic peak detection +## should go in here. #' @include c.R functions-binning.R cwTools.R ############################################################ @@ -13,10 +14,11 @@ ## Conclusion: ## o speed improvement can only come from internal methods called withihn. ## -##' @title Core API function for centWave feature detection +##' @title Core API function for centWave peak detection ##' -##' @description This function performs peak density and wavelet based feature -##' detection for high resolution LC/MS data in centroid mode [Tautenhahn 2008]. +##' @description This function performs peak density and wavelet based +##' chromatographic peak detection for high resolution LC/MS data in centroid +##' mode [Tautenhahn 2008]. ##' ##' @details This algorithm is most suitable for high resolution ##' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase the @@ -30,7 +32,7 @@ ##' @note The \emph{centWave} was designed to work on centroided mode, thus it ##' is expected that such data is presented to the function. ##' -##' This function exposes core feature detection functionality of +##' This function exposes core chromatographic peak detection functionality of ##' the \emph{centWave} method. While this function can be called directly, ##' users will generally call the corresponding method for the data object ##' instead. @@ -43,25 +45,26 @@ ##' spectra/scans of the data representing the retention time of each scan. ##' @param valsPerSpect Numeric vector with the number of values for each ##' spectrum. -##' @inheritParams featureDetection-centWave +##' @inheritParams findChromPeaks-centWave ##' -##' @family core feature detection functions +##' @family core peak detection functions ##' @references ##' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly ##' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} ##' 2008, 9:504 ##' @return -##' A matrix, each row representing an identified feature, with columns: +##' A matrix, each row representing an identified chromatographic peak, +##' with columns: ##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} -##' \item{rt}{Retention time of the feature's midpoint.} -##' \item{rtmin}{Minimum retention time of the feature.} -##' \item{rtmax}{Maximum retention time of the feature.} -##' \item{into}{Integrated (original) intensity of the feature.} -##' \item{intb}{Per-feature baseline corrected integrated feature intensity.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +##' \item{mzmin}{Minimum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} +##' \item{rt}{Retention time of the peak's midpoint.} +##' \item{rtmin}{Minimum retention time of the peak.} +##' \item{rtmax}{Maximum retention time of the peak.} +##' \item{into}{Integrated (original) intensity of the peak.} +##' \item{intb}{Per-peak baseline corrected integrated peak intensity.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, ##' \code{sd} being the standard deviation of local chromatographic noise.} ##' \item{egauss}{RMSE of Gaussian fit.} @@ -73,7 +76,7 @@ ##' \item{h}{Gaussian parameter h.} ##' \item{f}{Region number of the m/z ROI where the peak was localized.} ##' \item{dppm}{m/z deviation of mass trace across scanns in ppk.} -##' \item{scale}{Scale on which the feature was localized.} +##' \item{scale}{Scale on which the peak was localized.} ##' \item{scpos}{Peak position found by wavelet analysis (scan number).} ##' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} ##' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} @@ -88,7 +91,7 @@ ##' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") ##' xr <- xcmsRaw(fs, profstep = 0) ##' -##' ## Extracting the data from the xcmsRaw for do_detectFeatures_centWave +##' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave ##' mzVals <- xr@env$mz ##' intVals <- xr@env$intensity ##' ## Define the values per spectrum: @@ -97,10 +100,10 @@ ##' ## Calling the function. We're using a large value for noise to speed up ##' ## the call in the example performance - in a real use case we would either ##' ## set the value to a reasonable value or use the default value. -##' res <- do_detectFeatures_centWave(mz = mzVals, int = intVals, +##' res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, ##' scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) ##' head(res) -do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, +do_findChromPeaks_centWave <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 10, @@ -114,24 +117,569 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, roiList = list(), firstBaselineCheck = TRUE, roiScales = NULL) { - .centWave_orig(mz = mz, int = int, scantime = scantime, - valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, - snthresh = snthresh, prefilter = prefilter, - mzCenterFun = mzCenterFun, integrate = integrate, - mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, - verboseColumns = verboseColumns, roiList = roiList, - firstBaselineCheck = firstBaselineCheck, - roiScales = roiScales) + if (getOption("originalCentWave", default = TRUE)) { + ## message("DEBUG: using original centWave.") + .centWave_orig(mz = mz, int = int, scantime = scantime, + valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, prefilter = prefilter, + mzCenterFun = mzCenterFun, integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, + verboseColumns = verboseColumns, roiList = roiList, + firstBaselineCheck = firstBaselineCheck, + roiScales = roiScales) + } else { + ## message("DEBUG: using modified centWave.") + .centWave_new(mz = mz, int = int, scantime = scantime, + valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, prefilter = prefilter, + mzCenterFun = mzCenterFun, integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, noise = noise, + verboseColumns = verboseColumns, roiList = roiList, + firstBaselineCheck = firstBaselineCheck, + roiScales = roiScales) + } +} +############################################################ +## ORIGINAL code from xcms_1.49.7 +.centWave_orig <- function(mz, int, scantime, valsPerSpect, + ppm = 25, peakwidth = c(20,50), snthresh = 10, + prefilter = c(3,100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, + noise = 0, ## noise.local=TRUE, + sleep = 0, verboseColumns = FALSE, roiList = list(), + firstBaselineCheck = TRUE, roiScales = NULL) { + ## TODO @jo Ensure in upstream method that data is in centroided mode! + ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange + ## Input argument checking. + if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) + stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", + " are required!") + if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) + | length(mz) != sum(valsPerSpect)) + stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", + " have to match. Also, 'length(mz)' should be equal to", + " 'sum(valsPerSpect)'.") + scanindex <- valueCount2ScanIndex(valsPerSpect) ## Get index vector for C calls + if (!is.double(mz)) + mz <- as.double(mz) + if (!is.double(int)) + int <- as.double(int) + ## Fix the mzCenterFun + mzCenterFun <- paste("mzCenter", + gsub(mzCenterFun, pattern = "mzCenter.", + replacement = "", fixed = TRUE), sep=".") + if (!exists(mzCenterFun, mode="function")) + stop("Function '", mzCenterFun, "' not defined !") + + if (!is.logical(firstBaselineCheck)) + stop("Parameter 'firstBaselineCheck' should be logical!") + if (length(firstBaselineCheck) != 1) + stop("Parameter 'firstBaselineCheck' should be a single logical !") + if (length(roiScales) > 0) + if (length(roiScales) != length(roiList) | !is.numeric(roiScales)) + stop("If provided, parameter 'roiScales' has to be a numeric with", + " length equal to the length of 'roiList'!") + ## if (!is.null(roiScales)) { + ## if (!is.numeric(roiScales) | length(roiScales) != length(roiList)) + ## stop("Parameter 'roiScales' has to be a numeric of length equal to", + ## " parameter 'roiList'!") + ##} + + basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", + "into", "intb", "maxo", "sn") + verbosenames <- c("egauss", "mu", "sigma", "h", "f", "dppm", "scale", + "scpos", "scmin", "scmax", "lmin", "lmax") + + ## Peak width: seconds to scales + scalerange <- round((peakwidth / mean(diff(scantime))) / 2) + + if (length(z <- which(scalerange == 0))) + scalerange <- scalerange[-z] + if (length(scalerange) < 1) { + warning("No scales? Please check peak width!") + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + return(invisible(nopeaks)) + } + + if (length(scalerange) > 1) + scales <- seq(from = scalerange[1], to = scalerange[2], by = 2) + else + scales <- scalerange + + minPeakWidth <- scales[1] + noiserange <- c(minPeakWidth * 3, max(scales) * 3) + maxGaussOverlap <- 0.5 + minPtsAboveBaseLine <- max(4, minPeakWidth - 2) + minCentroids <- minPtsAboveBaseLine + scRangeTol <- maxDescOutlier <- floor(minPeakWidth / 2) + scanrange <- c(1, length(scantime)) + + ## If no ROIs are supplied then search for them. + if (length(roiList) == 0) { + message("Detecting mass traces at ", ppm, " ppm ... ", appendLF = FALSE) + ## flush.console(); + ## We're including the findmzROI code in this function to reduce + ## the need to copy objects etc. + ## We could also sort the data by m/z anyway; wouldn't need that + ## much time. Once we're using classes from MSnbase we can be + ## sure that values are correctly sorted. + withRestarts( + tryCatch({ + tmp <- capture.output( + roiList <- .Call("findmzROI", + mz, int, scanindex, + as.double(c(0.0, 0.0)), + as.integer(scanrange), + as.integer(length(scantime)), + as.double(ppm * 1e-6), + as.integer(minCentroids), + as.integer(prefilter), + as.integer(noise), + PACKAGE ='xcms' ) + ) + }, + error = function(e){ + if (grepl("m/z sort assumption violated !", e$message)) { + invokeRestart("fixSort") + } else { + simpleError(e) + } + }), + fixSort = function() { + ## Force ordering of values within spectrum by mz: + ## o split values into a list -> mz per spectrum, intensity per + ## spectrum. + ## o define the ordering. + ## o re-order the mz and intensity and unlist again. + ## Note: the Rle split is faster than the "conventional" factor split. + splitF <- Rle(1:length(valsPerSpect), valsPerSpect) + mzl <- as.list(S4Vectors::split(mz, f = splitF)) + oidx <- lapply(mzl, order) + mz <<- unlist(mapply(mzl, oidx, FUN = function(y, z) { + return(y[z]) + }, SIMPLIFY = FALSE, USE.NAMES = FALSE), use.names = FALSE) + int <<- unlist(mapply(as.list(split(int, f = splitF)), oidx, + FUN=function(y, z) { + return(y[z]) + }, SIMPLIFY = FALSE, USE.NAMES = FALSE), + use.names = FALSE) + rm(mzl) + rm(splitF) + tmp <- capture.output( + roiList <<- .Call("findmzROI", + mz, int, scanindex, + as.double(c(0.0, 0.0)), + as.integer(scanrange), + as.integer(length(scantime)), + as.double(ppm * 1e-6), + as.integer(minCentroids), + as.integer(prefilter), + as.integer(noise), + PACKAGE ='xcms' ) + ) + } + ) + message("OK") + ## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise) + if (length(roiList) == 0) { + warning("No ROIs found! \n") + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + return(invisible(nopeaks)) + } + } + + ## Second stage: process the ROIs + peaklist <- list() + Nscantime <- length(scantime) + lf <- length(roiList) + + ## cat('\n Detecting chromatographic peaks ... \n % finished: ') + ## lp <- -1 + message("Detecting chromatographic peaks in ", length(roiList), + " regions of interest ...", appendLF = FALSE) + + for (f in 1:lf) { + + ## ## Show progress + ## perc <- round((f/lf) * 100) + ## if ((perc %% 10 == 0) && (perc != lp)) + ## { + ## cat(perc," ",sep=""); + ## lp <- perc; + ## } + ## flush.console() + + feat <- roiList[[f]] + N <- feat$scmax - feat$scmin + 1 + peaks <- peakinfo <- NULL + mzrange <- c(feat$mzmin, feat$mzmax) + sccenter <- feat$scmin[1] + floor(N/2) - 1 + scrange <- c(feat$scmin, feat$scmax) + ## scrange + noiserange, used for baseline detection and wavelet analysis + sr <- c(max(scanrange[1], scrange[1] - max(noiserange)), + min(scanrange[2], scrange[2] + max(noiserange))) + eic <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), + as.integer(sr), as.integer(length(scanindex)), + PACKAGE = "xcms") + ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr) + d <- eic$intensity + td <- sr[1]:sr[2] + scan.range <- c(sr[1], sr[2]) + ## original mzROI range + idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) + mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) + ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) + omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), + as.integer(scrange), as.integer(length(scantime)), + PACKAGE = 'xcms') + ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) + if (all(omz == 0)) { + warning("centWave: no peaks found in ROI.") + next + } + od <- mzROI.EIC$intensity + otd <- mzROI.EIC$scan + if (all(od == 0)) { + warning("centWave: no peaks found in ROI.") + next + } + + ## scrange + scRangeTol, used for gauss fitting and continuous + ## data above 1st baseline detection + ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)], + scrange[2] + scRangeTol) + fd <- d[match(ftd, td)] + + ## 1st type of baseline: statistic approach + if (N >= 10*minPeakWidth) { + ## in case of very long mass trace use full scan range + ## for baseline detection + noised <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), + as.integer(scanrange), as.integer(length(scanindex)), + PACKAGE="xcms")$intensity + ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity + } else { + noised <- d + } + ## 90% trimmed mean as first baseline guess + noise <- estimateChromNoise(noised, trim = 0.05, + minPts = 3 * minPeakWidth) + ## any continuous data above 1st baseline ? + if (firstBaselineCheck & + !continuousPtsAboveThreshold(fd, threshold = noise, + num = minPtsAboveBaseLine)) + next + ## 2nd baseline estimate using not-peak-range + lnoise <- getLocalNoiseEstimate(d, td, ftd, noiserange, Nscantime, + threshold = noise, + num = minPtsAboveBaseLine) + ## Final baseline & Noise estimate + baseline <- max(1, min(lnoise[1], noise)) + sdnoise <- max(1, lnoise[2]) + sdthr <- sdnoise * snthresh + ## is there any data above S/N * threshold ? + if (!(any(fd - baseline >= sdthr))) + next + wCoefs <- MSW.cwt(d, scales = scales, wavelet = 'mexh') + if (!(!is.null(dim(wCoefs)) && any(wCoefs- baseline >= sdthr))) + next + if (td[length(td)] == Nscantime) ## workaround, localMax fails otherwise + wCoefs[nrow(wCoefs),] <- wCoefs[nrow(wCoefs) - 1, ] * 0.99 + localMax <- MSW.getLocalMaximumCWT(wCoefs) + rL <- MSW.getRidge(localMax) + wpeaks <- sapply(rL, + function(x) { + w <- min(1:length(x),ncol(wCoefs)) + any(wCoefs[x,w]- baseline >= sdthr) + }) + if (any(wpeaks)) { + wpeaksidx <- which(wpeaks) + ## check each peak in ridgeList + for (p in 1:length(wpeaksidx)) { + opp <- rL[[wpeaksidx[p]]] + pp <- unique(opp) + if (length(pp) >= 1) { + dv <- td[pp] %in% ftd + if (any(dv)) { ## peaks in orig. data range + ## Final S/N check + if (any(d[pp[dv]]- baseline >= sdthr)) { + ## if(!is.null(roiScales)) { + ## allow roiScales to be a numeric of length 0 + if(length(roiScales) > 0) { + ## use given scale + best.scale.nr <- which(scales == roiScales[[f]]) + if(best.scale.nr > length(opp)) + best.scale.nr <- length(opp) + } else { + ## try to decide which scale describes the peak best + inti <- numeric(length(opp)) + irange <- rep(ceiling(scales[1]/2), length(opp)) + for (k in 1:length(opp)) { + kpos <- opp[k] + r1 <- ifelse(kpos - irange[k] > 1, + kpos-irange[k], 1) + r2 <- ifelse(kpos + irange[k] < length(d), + kpos + irange[k], length(d)) + inti[k] <- sum(d[r1:r2]) + } + maxpi <- which.max(inti) + if (length(maxpi) > 1) { + m <- wCoefs[opp[maxpi], maxpi] + bestcol <- which(m == max(m), + arr.ind = TRUE)[2] + best.scale.nr <- maxpi[bestcol] + } else best.scale.nr <- maxpi + } + + best.scale <- scales[best.scale.nr] + best.scale.pos <- opp[best.scale.nr] + + pprange <- min(pp):max(pp) + ## maxint <- max(d[pprange]) + lwpos <- max(1,best.scale.pos - best.scale) + rwpos <- min(best.scale.pos + best.scale, length(td)) + p1 <- match(td[lwpos], otd)[1] + p2 <- match(td[rwpos], otd) + p2 <- p2[length(p2)] + if (is.na(p1)) p1 <- 1 + if (is.na(p2)) p2 <- N + mz.value <- omz[p1:p2] + mz.int <- od[p1:p2] + maxint <- max(mz.int) + + ## re-calculate m/z value for peak range + mzrange <- range(mz.value) + mzmean <- do.call(mzCenterFun, + list(mz = mz.value, + intensity = mz.int)) + + ## Compute dppm only if needed + dppm <- NA + if (verboseColumns) { + if (length(mz.value) >= (minCentroids + 1)) { + dppm <- round(min(running(abs(diff(mz.value)) / + (mzrange[2] * 1e-6), + fun = max, + width = minCentroids))) + } else { + dppm <- round((mzrange[2] - mzrange[1]) / + (mzrange[2] * 1e-6)) + } + } + peaks <- rbind(peaks, + c(mzmean,mzrange, ## mz + NA, NA, NA, ## rt, rtmin, rtmax, + NA, ## intensity (sum) + NA, ## intensity (-bl) + maxint, ## max intensity + round((maxint - baseline) / sdnoise), ## S/N Ratio + NA, ## Gaussian RMSE + NA,NA,NA, ## Gaussian Parameters + f, ## ROI Position + dppm, ## max. difference between the [minCentroids] peaks in ppm + best.scale, ## Scale + td[best.scale.pos], + td[lwpos], + td[rwpos], ## Peak positions guessed from the wavelet's (scan nr) + NA, NA)) ## Peak limits (scan nr) + peakinfo <- rbind(peakinfo, + c(best.scale, best.scale.nr, + best.scale.pos, lwpos, rwpos)) + ## Peak positions guessed from the wavelet's + } + } + } + } ##for + } ## if + + ## postprocessing + if (!is.null(peaks)) { + colnames(peaks) <- c(basenames, verbosenames) + colnames(peakinfo) <- c("scale", "scaleNr", "scpos", + "scmin", "scmax") + for (p in 1:dim(peaks)[1]) { + ## find minima, assign rt and intensity values + if (integrate == 1) { + lm <- descendMin(wCoefs[, peakinfo[p,"scaleNr"]], + istart = peakinfo[p,"scpos"]) + gap <- all(d[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak + if ((lm[1] == lm[2]) || gap )## fall-back + lm <- descendMinTol(d, + startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), + maxDescOutlier) + } else { + lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), + maxDescOutlier) + } + ## narrow down peak rt boundaries by skipping zeros + pd <- d[lm[1]:lm[2]] + np <- length(pd) + lm.l <- findEqualGreaterUnsorted(pd, 1) + lm.l <- max(1, lm.l - 1) + lm.r <- findEqualGreaterUnsorted(rev(pd), 1) + lm.r <- max(1, lm.r - 1) + lm <- lm + c(lm.l - 1, -(lm.r - 1) ) + + peakrange <- td[lm] + peaks[p, "rtmin"] <- scantime[peakrange[1]] + peaks[p, "rtmax"] <- scantime[peakrange[2]] + peaks[p, "maxo"] <- max(d[lm[1]:lm[2]]) + pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / + (peakrange[2] - peakrange[1]) + if (is.na(pwid)) + pwid <- 1 + peaks[p, "into"] <- pwid * sum(d[lm[1]:lm[2]]) + db <- d[lm[1]:lm[2]] - baseline + peaks[p, "intb"] <- pwid * sum(db[db>0]) + peaks[p, "lmin"] <- lm[1] + peaks[p, "lmax"] <- lm[2] + + if (fitgauss) { + ## perform gaussian fits, use wavelets for inital parameters + md <- max(d[lm[1]:lm[2]]) + d1 <- d[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. + pgauss <- fitGauss(td[lm[1]:lm[2]], d[lm[1]:lm[2]], + pgauss = list(mu = peaks[p, "scpos"], + sigma = peaks[p, "scmax"] - + peaks[p, "scmin"], + h = peaks[p, "maxo"])) + rtime <- peaks[p, "scpos"] + if (!any(is.na(pgauss)) && all(pgauss > 0)) { + gtime <- td[match(round(pgauss$mu), td)] + if (!is.na(gtime)) { + rtime <- gtime + peaks[p, "mu"] <- pgauss$mu + peaks[p, "sigma"] <- pgauss$sigma + peaks[p, "h"] <- pgauss$h + peaks[p,"egauss"] <- sqrt((1 / length(td[lm[1]:lm[2]])) * + sum(((d1-gauss(td[lm[1]:lm[2]], + pgauss$h / md, + pgauss$mu, + pgauss$sigma))^2))) + } + } + peaks[p, "rt"] <- scantime[rtime] + ## avoid fitting side effects + if (peaks[p, "rt"] < peaks[p, "rtmin"]) + peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] + } else + peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] + } + peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime, + scan.range, peaks, maxGaussOverlap, + mzCenterFun = mzCenterFun) + } + + ## if ((sleep >0) && (!is.null(peaks))) { + ## tdp <- scantime[td]; trange <- range(tdp) + ## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") + ## cdppm <- paste(peaks[,"dppm"],collapse=", ") + ## csn <- paste(peaks[,"sn"],collapse=", ") + ## par(bg = "white") + ## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); + ## par(mar= c(2, 4, 4, 2) + 0.1) + ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') + ## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) + ## par(mar= c(1, 4, 1, 2) + 0.1) + ## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') + ## par(mar= c(4, 4, 1, 2) + 0.1) + ## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) + ## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range + ## abline(h=baseline,col='green') + ## bwh <- length(sr[1]:sr[2]) - length(baseline) + ## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} + ## if (any(!is.na(peaks[,"scpos"]))) + ## { ## plot centers and width found through wavelet analysis + ## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') + ## } + ## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) + ## if (fitgauss) { + ## tdx <- seq(min(td),max(td),length.out=200) + ## tdxp <- seq(trange[1],trange[2],length.out=200) + ## fitted.peaks <- which(!is.na(peaks[,"mu"])) + ## for (p in fitted.peaks) + ## { ## plot gaussian fits + ## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) + ## lines(tdxp,yg,col='blue') + ## } + ## } + ## Sys.sleep(sleep) + ## } + + if (!is.null(peaks)) { + peaklist[[length(peaklist) + 1]] <- peaks + } + } ## f + + if (length(peaklist) == 0) { + warning("No peaks found!") + + if (verboseColumns) { + nopeaks <- matrix(nrow = 0, ncol = length(basenames) + + length(verbosenames)) + colnames(nopeaks) <- c(basenames, verbosenames) + } else { + nopeaks <- matrix(nrow = 0, ncol = length(basenames)) + colnames(nopeaks) <- c(basenames) + } + message(" FAIL: none found!") + return(nopeaks) + } + p <- do.call(rbind, peaklist) + if (!verboseColumns) + p <- p[, basenames, drop = FALSE] + + uorder <- order(p[, "into"], decreasing = TRUE) + pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE]) + uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks + pr <- p[uindex, , drop = FALSE] + message(" OK: ", nrow(pr), " found.") + + return(pr) } -############################################################ -## ORIGINAL code from xcms_1.49.7 -.centWave_orig <- function(mz, int, scantime, valsPerSpect, - ppm = 25, peakwidth = c(20,50), snthresh = 10, - prefilter = c(3,100), mzCenterFun = "wMean", - integrate = 1, mzdiff = -0.001, fitgauss = FALSE, - noise = 0, ## noise.local=TRUE, - sleep = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = NULL) { +## This version fixes issue #135, i.e. that the peak signal is integrated based +## on the mzrange of the ROI and not of the actually reported peak. +## Issue #136. +## +## What's different to the original version? +## +## 1) The mz range of the peaks is calculated only using mz values with a +## measured intensity. This avoids mz ranges from 0 to max mz of the peak, +## with the mz=0 corresponding actually to scans in which no intensity was +## measured. Search for "@MOD1" to jump to the respective code. +## +## 2) The intensities for the peak are reloaded with the refined mz range during +## the postprocessing. Search for "@MOD2" to jump to the respective code. +## +## What I don't like: +## o Might be better if the getEIC and getMZ C functions returned NA instead of 0 +## if nothing was measured. +## o The joinOverlappingPeaks is still calculated using the variable "d" which +## contains all intensities from the ROI - might actually not be too bad +## though. +.centWave_new <- function(mz, int, scantime, valsPerSpect, + ppm = 25, peakwidth = c(20,50), snthresh = 10, + prefilter = c(3,100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, + noise = 0, ## noise.local=TRUE, + sleep = 0, verboseColumns = FALSE, roiList = list(), + firstBaselineCheck = TRUE, roiScales = NULL) { ## TODO @jo Ensure in upstream method that data is in centroided mode! ## TODO @jo Ensure the upstream method did eventual sub-setting on scanrange ## Input argument checking. @@ -163,11 +711,6 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, if (length(roiScales) != length(roiList) | !is.numeric(roiScales)) stop("If provided, parameter 'roiScales' has to be a numeric with", " length equal to the length of 'roiList'!") - ## if (!is.null(roiScales)) { - ## if (!is.numeric(roiScales) | length(roiScales) != length(roiList)) - ## stop("Parameter 'roiScales' has to be a numeric of length equal to", - ## " parameter 'roiList'!") - ##} basenames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn") @@ -271,7 +814,6 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, } ) message("OK") - ## ROI.list <- findmzROI(object,scanrange=scanrange,dev=ppm * 1e-6,minCentroids=minCentroids, prefilter=prefilter, noise=noise) if (length(roiList) == 0) { warning("No ROIs found! \n") if (verboseColumns) { @@ -293,24 +835,17 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, ## cat('\n Detecting chromatographic peaks ... \n % finished: ') ## lp <- -1 - message("Detecting features in ", length(roiList), + message("Detecting chromatographic peaks in ", length(roiList), " regions of interest ...", appendLF = FALSE) for (f in 1:lf) { - ## ## Show progress - ## perc <- round((f/lf) * 100) - ## if ((perc %% 10 == 0) && (perc != lp)) - ## { - ## cat(perc," ",sep=""); - ## lp <- perc; - ## } - ## flush.console() - + ## cat("\nProcess roi ", f, "\n") feat <- roiList[[f]] N <- feat$scmax - feat$scmin + 1 peaks <- peakinfo <- NULL mzrange <- c(feat$mzmin, feat$mzmax) + mzrange_ROI <- mzrange sccenter <- feat$scmin[1] + floor(N/2) - 1 scrange <- c(feat$scmin, feat$scmax) ## scrange + noiserange, used for baseline detection and wavelet analysis @@ -319,18 +854,15 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, eic <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), as.integer(sr), as.integer(length(scanindex)), PACKAGE = "xcms") - ## eic <- rawEIC(object,mzrange=mzrange,scanrange=sr) d <- eic$intensity td <- sr[1]:sr[2] scan.range <- c(sr[1], sr[2]) ## original mzROI range idxs <- which(eic$scan %in% seq(scrange[1], scrange[2])) mzROI.EIC <- list(scan=eic$scan[idxs], intensity=eic$intensity[idxs]) - ## mzROI.EIC <- rawEIC(object,mzrange=mzrange,scanrange=scrange) omz <- .Call("getMZ", mz, int, scanindex, as.double(mzrange), as.integer(scrange), as.integer(length(scantime)), PACKAGE = 'xcms') - ## omz <- rawMZ(object,mzrange=mzrange,scanrange=scrange) if (all(omz == 0)) { warning("centWave: no peaks found in ROI.") next @@ -341,7 +873,6 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, warning("centWave: no peaks found in ROI.") next } - ## scrange + scRangeTol, used for gauss fitting and continuous ## data above 1st baseline detection ftd <- max(td[1], scrange[1] - scRangeTol) : min(td[length(td)], @@ -355,7 +886,6 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, noised <- .Call("getEIC", mz, int, scanindex, as.double(mzrange), as.integer(scanrange), as.integer(length(scanindex)), PACKAGE="xcms")$intensity - ## noised <- rawEIC(object,mzrange=mzrange,scanrange=scanrange)$intensity } else { noised <- d } @@ -439,14 +969,34 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, p1 <- match(td[lwpos], otd)[1] p2 <- match(td[rwpos], otd) p2 <- p2[length(p2)] + ## cat("p1: ", p1, " p2: ", p2, "\n") if (is.na(p1)) p1 <- 1 if (is.na(p2)) p2 <- N mz.value <- omz[p1:p2] + ## cat("mz.value: ", paste0(mz.value, collapse = ", "), + ## "\n") mz.int <- od[p1:p2] maxint <- max(mz.int) + ## @MOD1: Remove mz values for which no intensity was + ## measured. Would be better if getEIC returned NA + ## if nothing was measured. + mzorig <- mz.value + mz.value <- mz.value[mz.int > 0] + mz.int <- mz.int[mz.int > 0] + ## Call next to avoid reporting peaks without mz + ## values (issue #165). + if (length(mz.value) == 0) + next + ## cat("mz.value: ", paste0(mz.value, collapse = ", "), + ## "\n") ## re-calculate m/z value for peak range + ## cat("mzrange refined: [", + ## paste0(mzrange, collapse = ", "), "]") + ## hm, shouldn't we get rid of the mz = 0 here? mzrange <- range(mz.value) + ## cat(" -> [", + ## paste0(mzrange, collapse = ", "), "]\n") mzmean <- do.call(mzCenterFun, list(mz = mz.value, intensity = mz.int)) @@ -496,23 +1046,41 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, colnames(peakinfo) <- c("scale", "scaleNr", "scpos", "scmin", "scmax") for (p in 1:dim(peaks)[1]) { + ## @MOD2 + ## Fix for issue #135: reload the EIC data if the + ## mzrange differs from that of the ROI, but only if the mz + ## range of the peak is different from the one of the ROI. + mzr <- peaks[p, c("mzmin", "mzmax")] + if (any(mzr != mzrange_ROI)) { + eic <- .Call("getEIC", mz, int, scanindex, + as.double(mzr), as.integer(sr), + as.integer(length(scanindex)), + PACKAGE = "xcms") + current_ints <- eic$intensity + ## Force re-loading also of a potential additional peak in + ## the same ROI. + mzrange_ROI <- c(0, 0) + } else { + current_ints <- d + } ## find minima, assign rt and intensity values if (integrate == 1) { lm <- descendMin(wCoefs[, peakinfo[p,"scaleNr"]], istart = peakinfo[p,"scpos"]) - gap <- all(d[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak + gap <- all(current_ints[lm[1]:lm[2]] == 0) ## looks like we got stuck in a gap right in the middle of the peak if ((lm[1] == lm[2]) || gap )## fall-back - lm <- descendMinTol(d, + lm <- descendMinTol(current_ints, startpos = c(peakinfo[p, "scmin"], peakinfo[p, "scmax"]), maxDescOutlier) } else { - lm <- descendMinTol(d, startpos = c(peakinfo[p, "scmin"], - peakinfo[p, "scmax"]), + lm <- descendMinTol(current_ints, + startpos = c(peakinfo[p, "scmin"], + peakinfo[p, "scmax"]), maxDescOutlier) } ## narrow down peak rt boundaries by skipping zeros - pd <- d[lm[1]:lm[2]] + pd <- current_ints[lm[1]:lm[2]] np <- length(pd) lm.l <- findEqualGreaterUnsorted(pd, 1) lm.l <- max(1, lm.l - 1) @@ -523,22 +1091,24 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, peakrange <- td[lm] peaks[p, "rtmin"] <- scantime[peakrange[1]] peaks[p, "rtmax"] <- scantime[peakrange[2]] - peaks[p, "maxo"] <- max(d[lm[1]:lm[2]]) + peaks[p, "maxo"] <- max(current_ints[lm[1]:lm[2]]) pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / (peakrange[2] - peakrange[1]) if (is.na(pwid)) pwid <- 1 - peaks[p, "into"] <- pwid * sum(d[lm[1]:lm[2]]) - db <- d[lm[1]:lm[2]] - baseline + peaks[p, "into"] <- pwid * sum(current_ints[lm[1]:lm[2]]) + db <- current_ints[lm[1]:lm[2]] - baseline peaks[p, "intb"] <- pwid * sum(db[db>0]) peaks[p, "lmin"] <- lm[1] peaks[p, "lmax"] <- lm[2] + ## cat("[", paste0(peaks[p, c("rtmin", "rtmax")], collapse = ", "), + ## "] into ", peaks[p, "into"], "\n") if (fitgauss) { ## perform gaussian fits, use wavelets for inital parameters - md <- max(d[lm[1]:lm[2]]) - d1 <- d[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. - pgauss <- fitGauss(td[lm[1]:lm[2]], d[lm[1]:lm[2]], + md <- max(current_ints[lm[1]:lm[2]]) + d1 <- current_ints[lm[1]:lm[2]] / md ## normalize data for gaussian error calc. + pgauss <- fitGauss(td[lm[1]:lm[2]], current_ints[lm[1]:lm[2]], pgauss = list(mu = peaks[p, "scpos"], sigma = peaks[p, "scmax"] - peaks[p, "scmin"], @@ -565,47 +1135,12 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, } else peaks[p, "rt"] <- scantime[peaks[p, "scpos"]] } - peaks <- joinOverlappingPeaks(td, d, otd, omz, od, scantime, - scan.range, peaks, maxGaussOverlap, + ## Use d here instead of current_ints + peaks <- joinOverlappingPeaks(td, d, otd, omz, od, + scantime, scan.range, peaks, + maxGaussOverlap, mzCenterFun = mzCenterFun) } - - ## if ((sleep >0) && (!is.null(peaks))) { - ## tdp <- scantime[td]; trange <- range(tdp) - ## egauss <- paste(round(peaks[,"egauss"],3),collapse=", ") - ## cdppm <- paste(peaks[,"dppm"],collapse=", ") - ## csn <- paste(peaks[,"sn"],collapse=", ") - ## par(bg = "white") - ## l <- layout(matrix(c(1,2,3),nrow=3,ncol=1,byrow=T),heights=c(.5,.75,2)); - ## par(mar= c(2, 4, 4, 2) + 0.1) - ## plotRaw(object,mzrange=mzrange,rtrange=trange,log=TRUE,title='') - ## title(main=paste(f,': ', round(mzrange[1],4),' - ',round(mzrange[2],4),' m/z , dppm=',cdppm,', EGauss=',egauss ,', S/N =',csn,sep='')) - ## par(mar= c(1, 4, 1, 2) + 0.1) - ## image(y=scales[1:(dim(wCoefs)[2])],z=wCoefs,col=terrain.colors(256),xaxt='n',ylab='CWT coeff.') - ## par(mar= c(4, 4, 1, 2) + 0.1) - ## plot(tdp,d,ylab='Intensity',xlab='Scan Time');lines(tdp,d,lty=2) - ## lines(scantime[otd],od,lty=2,col='blue') ## original mzbox range - ## abline(h=baseline,col='green') - ## bwh <- length(sr[1]:sr[2]) - length(baseline) - ## if (odd(bwh)) {bwh1 <- floor(bwh/2); bwh2 <- bwh1+1} else {bwh1<-bwh2<-bwh/2} - ## if (any(!is.na(peaks[,"scpos"]))) - ## { ## plot centers and width found through wavelet analysis - ## abline(v=scantime[na.omit(peaks[(peaks[,"scpos"] >0),"scpos"])],col='red') - ## } - ## abline(v=na.omit(c(peaks[,"rtmin"],peaks[,"rtmax"])),col='green',lwd=1) - ## if (fitgauss) { - ## tdx <- seq(min(td),max(td),length.out=200) - ## tdxp <- seq(trange[1],trange[2],length.out=200) - ## fitted.peaks <- which(!is.na(peaks[,"mu"])) - ## for (p in fitted.peaks) - ## { ## plot gaussian fits - ## yg<-gauss(tdx,peaks[p,"h"],peaks[p,"mu"],peaks[p,"sigma"]) - ## lines(tdxp,yg,col='blue') - ## } - ## } - ## Sys.sleep(sleep) - ## } - if (!is.null(peaks)) { peaklist[[length(peaklist) + 1]] <- peaks } @@ -626,10 +1161,12 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, return(nopeaks) } + ## cat("length peaklist: ", length(peaklist), "\n") p <- do.call(rbind, peaklist) if (!verboseColumns) p <- p[, basenames, drop = FALSE] - + return(p) + uorder <- order(p[, "into"], decreasing = TRUE) pm <- as.matrix(p[,c("mzmin", "mzmax", "rtmin", "rtmax"), drop = FALSE]) uindex <- rectUnique(pm, uorder, mzdiff, ydiff = -0.00001) ## allow adjacent peaks @@ -645,21 +1182,21 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, ############################################################ ## massifquant ## -##' @title Core API function for massifquant feature detection +##' @title Core API function for massifquant peak detection ##' -##' @description Massifquant is a Kalman filter (KF)-based feature -##' detection for XC-MS data in centroid mode. The identified features +##' @description Massifquant is a Kalman filter (KF)-based chromatographic peak +##' detection for XC-MS data in centroid mode. The identified peaks ##' can be further refined with the \emph{centWave} method (see -##' \code{\link{do_detectFeatures_centWave}} for details on centWave) +##' \code{\link{do_findChromPeaks_centWave}} for details on centWave) ##' by specifying \code{withWave = TRUE}. ##' ##' @details This algorithm's performance has been tested rigorously ##' on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -##' Simultaneous kalman filters identify features and calculate their +##' Simultaneous kalman filters identify peaks and calculate their ##' area under the curve. The default parameters are set to operate on ##' a complex LC-MS Orbitrap sample. Users will find it useful to do some ##' simple exploratory data analysis to find out where to set a minimum -##' intensity, and identify how many scans an average feature spans. The +##' intensity, and identify how many scans an average peak spans. The ##' \code{consecMissedLimit} parameter has yielded good performance on ##' Orbitrap data when set to (\code{2}) and on TOF data it was found best ##' to be at (\code{1}). This may change as the algorithm has yet to be @@ -669,25 +1206,26 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, ##' The \code{ppm} and \code{checkBack} parameters have shown less influence ##' than the other parameters and exist to give users flexibility and ##' better accuracy. -##' @inheritParams do_detectFeatures_centWave -##' @inheritParams featureDetection-centWave -##' @inheritParams featureDetection-massifquant +##' @inheritParams do_findChromPeaks_centWave +##' @inheritParams findChromPeaks-centWave +##' @inheritParams findChromPeaks-massifquant ##' @return -##' A matrix, each row representing an identified feature, with columns: +##' A matrix, each row representing an identified chromatographic peak, +##' with columns: ##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the features across +##' \item{mz}{Intensity weighted mean of m/z values of the peaks across ##' scans.} -##' \item{mzmin}{Minumum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} -##' \item{rtmin}{Minimum retention time of the feature.} -##' \item{rtmax}{Maximum retention time of the feature.} -##' \item{rt}{Retention time of the feature's midpoint.} -##' \item{into}{Integrated (original) intensity of the feature.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{mzmin}{Minumum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} +##' \item{rtmin}{Minimum retention time of the peak.} +##' \item{rtmax}{Maximum retention time of the peak.} +##' \item{rt}{Retention time of the peak's midpoint.} +##' \item{into}{Integrated (original) intensity of the peak.} +##' \item{maxo}{Maximum intensity of the peak.} ##' } ##' If \code{withWave} is set to \code{TRUE}, the result is the same as -##' returned by the \code{\link{do_detectFeatures_centWave}} method. -##' @family core feature detection functions +##' returned by the \code{\link{do_findChromPeaks_centWave}} method. +##' @family core peak detection functions ##' @seealso \code{\link{massifquant}} for the standard user interface method. ##' @references ##' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT @@ -708,11 +1246,11 @@ do_detectFeatures_centWave <- function(mz, int, scantime, valsPerSpect, ##' ## Define the values per spectrum: ##' valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) ##' -##' ## Perform the feature detection using massifquant -##' res <- do_detectFeatures_massifquant(mz = mzVals, int = intVals, +##' ## Perform the peak detection using massifquant +##' res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, ##' scantime = xraw@scantime, valsPerSpect = valsPerSpect) ##' head(res) -do_detectFeatures_massifquant <- function(mz, +do_findChromPeaks_massifquant <- function(mz, int, scantime, valsPerSpect, @@ -770,7 +1308,7 @@ do_detectFeatures_massifquant <- function(mz, ppm = ppm) message("OK") if (withWave) { - featlist <- do_detectFeatures_centWave(mz = mz, int = int, + featlist <- do_findChromPeaks_centWave(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, @@ -795,7 +1333,7 @@ do_detectFeatures_massifquant <- function(mz, return(nopeaks) } - ## Get the max intensity for each feature. + ## Get the max intensity for each peak. maxo <- lapply(massifquantROIs, function(z) { raw <- .rawMat(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, @@ -858,18 +1396,18 @@ do_detectFeatures_massifquant <- function(mz, ## impute: none (=bin), binlin, binlinbase, intlin ## baseValue default: min(int)/2 (smallest value in the whole data set). ## -##' @title Core API function for matchedFilter feature detection +##' @title Core API function for matchedFilter peak detection ##' -##' @description This function identifies features in the chromatographic +##' @description This function identifies peaks in the chromatographic ##' time domain as described in [Smith 2006]. The intensity values are ##' binned by cutting The LC/MS data into slices (bins) of a mass unit ##' (\code{binSize} m/z) wide. Within each bin the maximal intensity is -##' selected. The feature detection is then performed in each bin by +##' selected. The peak detection is then performed in each bin by ##' extending it based on the \code{steps} parameter to generate slices ##' comprising bins \code{current_bin - steps +1} to \code{current_bin + steps - 1}. ##' Each of these slices is then filtered with matched filtration using -##' a second-derative Gaussian as the model feature/peak shape. After filtration -##' features are detected using a signal-to-ration cut-off. For more details +##' a second-derative Gaussian as the model peak shape. After filtration +##' peaks are detected using a signal-to-ration cut-off. For more details ##' and illustrations see [Smith 2006]. ##' ##' @details The intensities are binned by the provided m/z values within each @@ -881,30 +1419,31 @@ do_detectFeatures_massifquant <- function(mz, ##' \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. ##' ##' @note -##' This function exposes core feature detection functionality of +##' This function exposes core peak detection functionality of ##' the \emph{matchedFilter} method. While this function can be called directly, ##' users will generally call the corresponding method for the data object ##' instead (e.g. the \code{link{findPeaks.matchedFilter}} method). ##' -##' @inheritParams do_detectFeatures_centWave -##' @inheritParams featureDetection-centWave +##' @inheritParams do_findChromPeaks_centWave +##' @inheritParams findChromPeaks-centWave ##' @inheritParams imputeLinInterpol -##' @inheritParams featureDetection-matchedFilter +##' @inheritParams findChromPeaks-matchedFilter ##' -##' @return A matrix, each row representing an identified feature, with columns: +##' @return A matrix, each row representing an identified chromatographic peak, +##' with columns: ##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} -##' \item{rt}{Retention time of the feature's midpoint.} -##' \item{rtmin}{Minimum retention time of the feature.} -##' \item{rtmax}{Maximum retention time of the feature.} -##' \item{into}{Integrated (original) intensity of the feature.} +##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +##' \item{mzmin}{Minimum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} +##' \item{rt}{Retention time of the peak's midpoint.} +##' \item{rtmin}{Minimum retention time of the peak.} +##' \item{rtmax}{Maximum retention time of the peak.} +##' \item{into}{Integrated (original) intensity of the peak.} ##' \item{intf}{Integrated intensity of the filtered peak.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{maxf}{Maximum intensity of the filtered peak.} -##' \item{i}{Rank of feature in merged EIC (\code{<= max}).} -##' \item{sn}{Signal to noise ratio of the feature} +##' \item{i}{Rank of peak in merged EIC (\code{<= max}).} +##' \item{sn}{Signal to noise ratio of the peak} ##' } ##' @references ##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and @@ -912,7 +1451,7 @@ do_detectFeatures_massifquant <- function(mz, ##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" ##' \emph{Anal. Chem.} 2006, 78:779-787. ##' @author Colin A Smith, Johannes Rainer -##' @family core feature detection functions +##' @family core peak detection functions ##' @seealso \code{\link{binYonX}} for a binning function, ##' \code{\link{imputeLinInterpol}} for the interpolation of missing values. ##' \code{\link{matchedFilter}} for the standard user interface method. @@ -922,16 +1461,16 @@ do_detectFeatures_massifquant <- function(mz, ##' fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") ##' xr <- xcmsRaw(fs) ##' -##' ## Extracting the data from the xcmsRaw for do_detectFeatures_centWave +##' ## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave ##' mzVals <- xr@env$mz ##' intVals <- xr@env$intensity ##' ## Define the values per spectrum: ##' valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) ##' -##' res <- do_detectFeatures_matchedFilter(mz = mzVals, int = intVals, +##' res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, ##' scantime = xr@scantime, valsPerSpect = valsPerSpect) ##' head(res) -do_detectFeatures_matchedFilter <- function(mz, +do_findChromPeaks_matchedFilter <- function(mz, int, scantime, valsPerSpect, @@ -1129,389 +1668,6 @@ do_detectFeatures_matchedFilter <- function(mz, return(rmat) } -## ############################################################ -## ## Same as do_detectFeatures_matchedFilter except: -## ## -## ## o Using the binYtoX and imputeLinInterpol instead of the -## ## profBin* methods. -## ## THIS IS MATTER OF REMOVAL -## .matchedFilter_binYonX_iter <- function(mz, -## int, -## scantime, -## valsPerSpect, -## binSize = 0.1, -## impute = "none", -## baseValue, -## distance, -## fwhm = 30, -## sigma = fwhm/2.3548, -## max = 5, -## snthresh = 10, -## steps = 2, -## mzdiff = 0.8 - binSize * steps, -## index = FALSE -## ){ - -## ## Input argument checking. -## if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) -## stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", -## " are required!") -## if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) -## | length(mz) != sum(valsPerSpect)) -## stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", -## " have to match. Also, 'length(mz)' should be equal to", -## " 'sum(valsPerSpect)'.") -## ## Get the profile/binning function: allowed: bin, lin, linbase and intlin -## impute <- match.arg(impute, c("none", "lin", "linbase", "intlin")) -## if (impute == "intlin") -## stop("Not yet implemented!") -## toIdx <- cumsum(valsPerSpect) -## fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) - -## ## Create EIC buffer -## mrange <- range(mz) -## mass <- seq(floor(mrange[1]/binSize)*binSize, -## ceiling(mrange[2]/binSize)*binSize, by = binSize) -## bufsize <- min(100, length(mass)) - -## ## Calculate the breaks; we will re-use these in all calls. -## ## Calculate breaks and "correct" binSize; using seq ensures we're closer -## ## to the xcms profBin* results. -## binFromX <- min(mass) -## binToX <- max(mass) -## bin_size <- (binToX - binFromX) / (length(mass) - 1) -## brks <- seq(binFromX - bin_size/2, binToX + bin_size/2, by = bin_size) - -## ## Problem with sequential binning is that we don't want to have the condition -## ## <= last_break in each iteration since this would cause some values being -## ## considered for multiple bins. Thus we add an additional last bin, for which -## ## we however want to get rid of later. -## binRes <- binYonX(mz, int, -## breaks = brks[1:(bufsize+2)], -## fromIdx = fromIdx, -## toIdx = toIdx, -## baseValue = ifelse(impute == "none", yes = 0, no = NA), -## sortedX = TRUE, -## returnIndex = TRUE -## ) -## if (length(toIdx) == 1) -## binRes <- list(binRes) -## ## Remove the last bin value unless bufsize + 2 is equal to the length of brks -## if (length(brks) > (bufsize + 2)) { -## binRes <- lapply(binRes, function(z) { -## len <- length(z$x) -## return(list(x = z$x[-len], y = z$y[-len], index = z$index[-len])) -## }) -## } -## bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index))) -## bin_size <- binRes[[1]]$x[2] - binRes[[1]]$x[1] -## if (missing(baseValue)) -## baseValue <- numeric() -## if (length(baseValue) == 0) -## baseValue <- min(int, na.rm = TRUE) / 2 -## if (missing(distance)) -## distance <- numeric() -## if (length(distance) == 0) -## distance <- floor(0.075 / bin_size) -## binVals <- lapply(binRes, function(z) { -## return(imputeLinInterpol(z$y, method = impute, -## noInterpolAtEnds = TRUE, -## distance = distance, -## baseValue = baseValue)) -## }) -## buf <- do.call(cbind, binVals) - -## bufidx <- integer(length(mass)) -## idxrange <- c(1, bufsize) -## bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize -## lookahead <- steps-1 -## lookbehind <- 1 - -## N <- nextn(length(scantime)) -## xrange <- range(scantime) -## x <- c(0:(N/2), -(ceiling(N/2-1)):-1)*(xrange[2]-xrange[1])/(length(scantime)-1) - -## filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi))*exp(-x^2/(2*sigma^2)), "x")), "hessian") -## filt <- filt/sqrt(sum(filt^2)) -## filt <- fft(filt, inverse = TRUE)/length(filt) - -## cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf", -## "maxo", "maxf", "i", "sn") -## rmat <- matrix(nrow = 2048, ncol = length(cnames)) -## num <- 0 - -## for (i in seq(length = length(mass)-steps+1)) { -## ## Update EIC buffer if necessary -## if (bufidx[i+lookahead] == 0) { -## bufidx[idxrange[1]:idxrange[2]] <- 0 -## idxrange <- c(max(1, i - lookbehind), min(bufsize+i-1-lookbehind, -## length(mass))) -## bufidx[idxrange[1]:idxrange[2]] <- 1:(diff(idxrange)+1) -## ## Avoid the problem reported above for the sequential buffering: -## ## add an additional bin for which we remove the value afterwards. -## additionalBin <- 0 -## if ((idxrange[2] + 1) < length(brks)) { -## additionalBin <- 1 -## } -## ## Re-fill buffer. -## binRes <- binYonX(mz, int, -## breaks = brks[idxrange[1]:(idxrange[2] + 1 + -## additionalBin)], -## fromIdx = fromIdx, -## toIdx = toIdx, -## baseValue = ifelse(impute == "none", yes = 0, -## no = NA), -## sortedX = TRUE, -## returnIndex = TRUE -## ) -## if (length(toIdx) == 1) -## binRes <- list(binRes) -## if (additionalBin == 1) { -## binRes <- lapply(binRes, function(z) { -## len <- length(z$x) -## return(list(x = z$x[-len], y = z$y[-len], -## index = z$index[-len])) -## }) -## } -## bufMax <- do.call(cbind, lapply(binRes, function(z) return(z$index))) -## binVals <- lapply(binRes, function(z) { -## return(imputeLinInterpol(z$y, method = impute, -## noInterpolAtEnds = TRUE, -## distance = distance, -## baseValue = baseValue)) -## }) -## buf <- do.call(cbind, binVals) -## } -## ymat <- buf[bufidx[i:(i+steps-1)],,drop=FALSE] -## ysums <- colMax(ymat) -## yfilt <- filtfft(ysums, filt) -## gmax <- max(yfilt) -## for (j in seq(length = max)) { -## maxy <- which.max(yfilt) -## noise <- mean(ysums[ysums > 0]) -## ##noise <- mean(yfilt[yfilt >= 0]) -## sn <- yfilt[maxy]/noise -## if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) { -## peakrange <- descendZero(yfilt, maxy) -## intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE] -## mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)], -## peakrange[1]:peakrange[2]]], -## nrow = steps) -## which.intMax <- which.colMax(intmat) -## mzmat <- mzmat[which.intMax] -## if (all(is.na(mzmat))) { -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## next -## } -## mzrange <- range(mzmat, na.rm = TRUE) -## massmean <- weighted.mean(mzmat, intmat[which.intMax], na.rm = TRUE) -## ## This case (the only non-na m/z had intensity 0) was reported -## ## by Gregory Alan Barding "binlin processing" -## if(any(is.na(massmean))) { -## massmean <- mean(mzmat, na.rm = TRUE) -## } - -## pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / -## (peakrange[2] - peakrange[1]) -## into <- pwid*sum(ysums[peakrange[1]:peakrange[2]]) -## intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) -## maxo <- max(ysums[peakrange[1]:peakrange[2]]) -## maxf <- yfilt[maxy] -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## num <- num + 1 -## ## Double the size of the output matrix if it's full -## if (num > nrow(rmat)) { -## nrmat <- matrix(nrow = 2*nrow(rmat), ncol = ncol(rmat)) -## nrmat[seq(length = nrow(rmat)),] = rmat -## rmat <- nrmat -## } -## rmat[num,] <- c(massmean, mzrange[1], mzrange[2], maxy, peakrange, -## into, intf, maxo, maxf, j, sn) -## } else -## break -## } -## } -## colnames(rmat) <- cnames -## rmat <- rmat[seq(length = num),] -## max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/binSize) -## if (index) -## mzdiff <- mzdiff/binSize -## else { -## rmat[,"rt"] <- scantime[rmat[,"rt"]] -## rmat[,"rtmin"] <- scantime[rmat[,"rtmin"]] -## rmat[,"rtmax"] <- scantime[rmat[,"rtmax"]] -## } -## ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak and report that. -## uorder <- order(rmat[,"into"], decreasing=TRUE) -## uindex <- rectUnique(rmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE], -## uorder, mzdiff) -## rmat <- rmat[uindex,,drop=FALSE] -## return(rmat) -## } - - -## ############################################################ -## ## The code of this function is basically the same than of the original -## ## findPeaks.matchedFilter method in xcms with the following differences: -## ## o Create the full 'profile matrix' (i.e. the m/z binned matrix) once -## ## instead of repeatedly creating a "buffer" of 100 m/z values. -## ## o Append the identified peaks to a list instead of generating a matrix -## ## with a fixed set of rows which is doubled in its size each time more -## ## peaks are identified than there are rows in the matrix. -## .matchedFilter_no_iter <- function(mz, -## int, -## scantime, -## valsPerSpect, -## binSize = 0.1, -## impute = "none", -## baseValue, -## distance, -## fwhm = 30, -## sigma = fwhm/2.3548, -## max = 5, -## snthresh = 10, -## steps = 2, -## mzdiff = 0.8 - binSize * steps, -## index = FALSE -## ){ -## ## Map arguments to findPeaks.matchedFilter arguments. -## step <- binSize -## profMeths <- c("profBinM", "profBinLinM", "profBinLinBaseM", "profIntLinM") -## names(profMeths) <- c("none", "lin", "linbase", "intlin") -## impute <- match.arg(impute, names(profMeths)) -## profFun <- profMeths[impute] - -## ## Input argument checking. -## if (missing(mz) | missing(int) | missing(scantime) | missing(valsPerSpect)) -## stop("Arguments 'mz', 'int', 'scantime' and 'valsPerSpect'", -## " are required!") -## if (length(mz) != length(int) | length(valsPerSpect) != length(scantime) -## | length(mz) != sum(valsPerSpect)) -## stop("Lengths of 'mz', 'int' and of 'scantime','valsPerSpect'", -## " have to match. Also, 'length(mz)' should be equal to", -## " 'sum(valsPerSpect)'.") -## ## Calculate a the "scanindex" from the number of values per spectrum: -## scanindex <- valueCount2ScanIndex(valsPerSpect) - -## ## Create the full profile matrix. -## mrange <- range(mz) -## mass <- seq(floor(mrange[1]/step)*step, ceiling(mrange[2]/step)*step, by = step) -## ## Calculate the /real/ bin size (as in xcms.c code). -## bin_size <- (max(mass) - min(mass)) / (length(mass) - 1) -## ## bufsize <- min(100, length(mass)) -## bufsize <- length(mass) -## ## Define profparam: -## profp <- list() -## if (missing(baseValue)) -## baseValue <- numeric() -## if (length(baseValue) != 0) -## profp$baselevel <- baseValue -## if (missing(distance)) -## distance <- numeric() -## if (length(distance) != 0) -## profp$basespace <- distance * bin_size -## ## This returns a matrix, ncol equals the number of spectra, nrow the bufsize. -## buf <- do.call(profFun, args = list(mz, int, scanindex, bufsize, mass[1], -## mass[bufsize], TRUE, profp)) - -## ## The full matrix, nrow is the total number of (binned) m/z values. -## bufMax <- profMaxIdxM(mz, int, scanindex, bufsize, mass[1], mass[bufsize], -## TRUE, profp) -## ## bufidx <- integer(length(mass)) -## ## idxrange <- c(1, bufsize) -## ## bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize -## bufidx <- 1L:length(mass) -## lookahead <- steps-1 -## lookbehind <- 1 - -## N <- nextn(length(scantime)) -## xrange <- range(scantime) -## x <- c(0:(N/2), -(ceiling(N/2-1)):-1)*(xrange[2]-xrange[1])/(length(scantime)-1) - -## filt <- -attr(eval(deriv3(~ 1/(sigma*sqrt(2*pi))*exp(-x^2/(2*sigma^2)), "x")), "hessian") -## filt <- filt/sqrt(sum(filt^2)) -## filt <- fft(filt, inverse = TRUE)/length(filt) - -## cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intf", -## "maxo", "maxf", "i", "sn") -## num <- 0 - -## ResList <- list() - -## ## Can not do much here, lapply/apply won't work because of the 'steps' parameter. -## ## That's looping through the masses, i.e. rows of the profile matrix. -## for (i in seq(length = (length(mass)-steps+1))) { - -## ymat <- buf[bufidx[i:(i+steps-1)], , drop = FALSE] -## ysums <- colMax(ymat) -## yfilt <- filtfft(ysums, filt) -## gmax <- max(yfilt) -## for (j in seq(length = max)) { -## maxy <- which.max(yfilt) -## noise <- mean(ysums[ysums > 0]) -## ##noise <- mean(yfilt[yfilt >= 0]) -## sn <- yfilt[maxy]/noise -## if (yfilt[maxy] > 0 && yfilt[maxy] > snthresh*noise && ysums[maxy] > 0) { -## peakrange <- descendZero(yfilt, maxy) -## intmat <- ymat[, peakrange[1]:peakrange[2], drop = FALSE] -## mzmat <- matrix(mz[bufMax[bufidx[i:(i+steps-1)], -## peakrange[1]:peakrange[2]]], -## nrow = steps) -## which.intMax <- which.colMax(intmat) -## mzmat <- mzmat[which.intMax] -## if (all(is.na(mzmat))) { -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## next -## } -## mzrange <- range(mzmat, na.rm = TRUE) -## massmean <- weighted.mean(mzmat, intmat[which.intMax], na.rm = TRUE) -## ## This case (the only non-na m/z had intensity 0) was reported -## ## by Gregory Alan Barding "binlin processing" -## if(any(is.na(massmean))) { -## massmean <- mean(mzmat, na.rm = TRUE) -## } - -## pwid <- (scantime[peakrange[2]] - scantime[peakrange[1]]) / -## (peakrange[2] - peakrange[1]) -## into <- pwid*sum(ysums[peakrange[1]:peakrange[2]]) -## intf <- pwid*sum(yfilt[peakrange[1]:peakrange[2]]) -## maxo <- max(ysums[peakrange[1]:peakrange[2]]) -## maxf <- yfilt[maxy] -## yfilt[peakrange[1]:peakrange[2]] <- 0 -## num <- num + 1 -## ResList[[num]] <- c(massmean, mzrange[1], mzrange[2], maxy, peakrange, -## into, intf, maxo, maxf, j, sn) -## } else -## break -## } -## } -## if (length(ResList) == 0) { -## rmat <- matrix(nrow = 0, ncol = length(cnames)) -## colnames(rmat) <- cnames -## return(rmat) -## } -## rmat <- do.call(rbind, ResList) -## if (is.null(dim(rmat))) { -## rmat <- matrix(rmat, nrow = 1) -## } -## colnames(rmat) <- cnames -## max <- max-1 + max*(steps-1) + max*ceiling(mzdiff/step) -## if (index) -## mzdiff <- mzdiff/step -## else { -## rmat[, "rt"] <- scantime[rmat[, "rt"]] -## rmat[, "rtmin"] <- scantime[rmat[, "rtmin"]] -## rmat[, "rtmax"] <- scantime[rmat[, "rtmax"]] -## } -## ## Select for each unique mzmin, mzmax, rtmin, rtmax the largest peak and report that. -## uorder <- order(rmat[, "into"], decreasing = TRUE) -## uindex <- rectUnique(rmat[, c("mzmin", "mzmax", "rtmin", "rtmax"), -## drop = FALSE], -## uorder, mzdiff) -## rmat <- rmat[uindex,,drop = FALSE] -## return(rmat) -## } - ############################################################ ## The code of this function is basically the same than of the original ## findPeaks.matchedFilter method in xcms with the following differences: @@ -1708,9 +1864,9 @@ do_detectFeatures_matchedFilter <- function(mz, ## MSW ## ##' @title Core API function for single-spectrum non-chromatography MS data -##' feature detection +##' peak detection ##' -##' @description This function performs feature detection in mass spectrometry +##' @description This function performs peak detection in mass spectrometry ##' direct injection spectrum using a wavelet based algorithm. ##' ##' @details This is a wrapper around the peak picker in Bioconductor's @@ -1719,33 +1875,33 @@ do_detectFeatures_matchedFilter <- function(mz, ##' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the ##' \emph{xcmsDirect} vignette for more information. ##' -##' @inheritParams do_detectFeatures_centWave -##' @inheritParams featureDetection-centWave +##' @inheritParams do_findChromPeaks_centWave +##' @inheritParams findChromPeaks-centWave ##' @param ... Additional parameters to be passed to the ##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} function. ##' ##' @return -##' A matrix, each row representing an identified feature, with columns: +##' A matrix, each row representing an identified peak, with columns: ##' \describe{ -##' \item{mz}{m/z value of the feature at the centroid position.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} +##' \item{mz}{m/z value of the peak at the centroid position.} +##' \item{mzmin}{Minimum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} ##' \item{rt}{Always \code{-1}.} ##' \item{rtmin}{Always \code{-1}.} ##' \item{rtmax}{Always \code{-1}.} -##' \item{into}{Integrated (original) intensity of the feature.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{into}{Integrated (original) intensity of the peak.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{intf}{Always \code{NA}.} -##' \item{maxf}{Maximum MSW-filter response of the feature.} +##' \item{maxf}{Maximum MSW-filter response of the peak.} ##' \item{sn}{Signal to noise ratio.} ##' } ##' -##' @family core feature detection functions +##' @family core peak detection functions ##' @seealso ##' \code{\link{MSW}} for the standard user interface ##' method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the ##' \code{MassSpecWavelet} package. ##' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer -do_detectFeatures_MSW <- function(mz, int, snthresh = 3, +do_findPeaks_MSW <- function(mz, int, snthresh = 3, verboseColumns = FALSE, ...) { ## Input argument checking. if (missing(int)) @@ -1964,16 +2120,20 @@ do_detectFeatures_MSW <- function(mz, int, snthresh = 3, ############################################################ ## MS1 +## This one might be too cumbersome to do it for plain vectors. It would be ideal +## for MSnExp objects though. ## -do_detectFeatures_MS1 <- function() { -} +## do_findChromPeaks_MS1 <- function(mz, int, scantime, valsPerSpect) { +## ## Checks: do I have +## } + ## ## Original code: TODO REMOVE ME once method is validated. ## do_predictIsotopeROIs <- function(object, ## xcmsPeaks, ppm=25, ## maxcharge=3, maxiso=5, mzIntervalExtension=TRUE) { ## if(nrow(xcmsPeaks) == 0){ -## warning("Warning: There are no features (parameter >xcmsPeaks<) for the prediction of isotope ROIs !\n") +## warning("Warning: There are no peaks (parameter >xcmsPeaks<) for the prediction of isotope ROIs !\n") ## return(list()) ## } ## if(class(xcmsPeaks) != "xcmsPeaks") @@ -2032,7 +2192,7 @@ do_detectFeatures_MS1 <- function() { ## } ## Tuned from the original code. -##' @param features. \code{matrix} or \code{data.frame} with features for which +##' @param peaks. \code{matrix} or \code{data.frame} with peaks for which ##' isotopes should be predicted. Required columns are \code{"mz"}, ##' \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, ##' \code{"intb"} and \code{"scale"}. @@ -2041,18 +2201,18 @@ do_detectFeatures_MS1 <- function() { ##' \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, \code{"length"} (always -1), ##' \code{"intensity"} (always -1) and \code{"scale"}. ##' @noRd -do_define_isotopes <- function(features., maxCharge = 3, maxIso = 5, +do_define_isotopes <- function(peaks., maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE) { req_cols <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "scale") - if (is.null(dim(features.))) - stop("'features.' has to be a matrix or data.frame!") - if (!all(req_cols %in% colnames(features.))) { - not_there <- req_cols[!(req_cols %in% colnames(features.))] - stop("'features.' lacks required columns ", + if (is.null(dim(peaks.))) + stop("'peaks.' has to be a matrix or data.frame!") + if (!all(req_cols %in% colnames(peaks.))) { + not_there <- req_cols[!(req_cols %in% colnames(peaks.))] + stop("'peaks.' lacks required columns ", paste0("'", not_there, "'", collapse = ","), "!") } - if (is.data.frame(features.)) - features. <- as.matrix(features.) + if (is.data.frame(peaks.)) + peaks. <- as.matrix(peaks.) isotopeDistance <- 1.0033548378 charges <- 1:maxCharge @@ -2060,8 +2220,8 @@ do_define_isotopes <- function(features., maxCharge = 3, maxIso = 5, isotopePopulationMz <- unique(as.numeric(matrix(isos, ncol = 1) %*% (isotopeDistance / charges))) - ## split the features into a list. - roiL <- split(features.[, req_cols, drop = FALSE], f = 1:nrow(features.)) + ## split the peaks into a list. + roiL <- split(peaks.[, req_cols, drop = FALSE], f = 1:nrow(peaks.)) newRois <- lapply(roiL, function(z) { if (mzIntervalExtension) @@ -2081,22 +2241,22 @@ do_define_isotopes <- function(features., maxCharge = 3, maxIso = 5, return(do.call(rbind, newRois)) } -##' param @features. see do_define_isotopes +##' param @peaks. see do_define_isotopes ##' @param polarity character(1) defining the polarity, either \code{"positive"} ##' or \code{"negative"}. ##' @return see do_define_isotopes. ##' @noRd -do_define_adducts <- function(features., polarity = "positive") { +do_define_adducts <- function(peaks., polarity = "positive") { req_cols <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "scale") - if (is.null(dim(features.))) - stop("'features' has to be a matrix or data.frame!") - if (!all(req_cols %in% colnames(features.))) { - not_there <- req_cols[!(req_cols %in% colnames(features.))] - stop("'features' lacks required columns ", + if (is.null(dim(peaks.))) + stop("'peaks.' has to be a matrix or data.frame!") + if (!all(req_cols %in% colnames(peaks.))) { + not_there <- req_cols[!(req_cols %in% colnames(peaks.))] + stop("'peaks.' lacks required columns ", paste0("'", not_there, "'", collapse = ","), "!") } - if (is.data.frame(features.)) - features. <- as.matrix(features.) + if (is.data.frame(peaks.)) + peaks. <- as.matrix(peaks.) ## considered adduct distances ## reference: Huang N.; Siegel M.M.1; Kruppa G.H.; Laukien F.H.; J Am Soc ## Mass Spectrom 1999, 10, 1166–1173; Automation of a Fourier transform ion @@ -2235,7 +2395,7 @@ do_define_adducts <- function(features., polarity = "positive") { ) req_cols <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "scale") - roiL <- split(features.[, req_cols, drop = FALSE], f = 1:nrow(features.)) + roiL <- split(peaks.[, req_cols, drop = FALSE], f = 1:nrow(peaks.)) newRois <- lapply(roiL, function(z) { mzDiff <- unlist(lapply(adductPopulationMz, function(x) { @@ -2295,44 +2455,44 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect, } ############################################################ -## do_detectFeatyres_centWaveWithPredIsoROIs +## do_findChromPeaks_centWaveWithPredIsoROIs ## 1) Run a centWave. -## 2) Predict isotope ROIs for the identified features. +## 2) Predict isotope ROIs for the identified peaks. ## 3) centWave on the predicted isotope ROIs. -## 4) combine both lists of identified features removing overlapping ones by -## keeping the feature with the largest signal intensity. -##' @title Core API function for two-step centWave feature detection with feature isotopes +## 4) combine both lists of identified peaks removing overlapping ones by +## keeping the peak with the largest signal intensity. +##' @title Core API function for two-step centWave peak detection with isotopes ##' -##' @description The \code{do_detectFeatures_centWaveWithPredIsoROIs} performs a -##' two-step centWave based feature detection: features are identified using -##' centWave followed by a prediction of the location of the identified features' -##' isotopes in the mz-retention time space. These locations are fed as +##' @description The \code{do_findChromPeaks_centWaveWithPredIsoROIs} performs a +##' two-step centWave based peak detection: chromatographic peaks are identified +##' using centWave followed by a prediction of the location of the identified +##' peaks' isotopes in the mz-retention time space. These locations are fed as ##' \emph{regions of interest} (ROIs) to a subsequent centWave run. All non -##' overlapping features from these two feature detection runs are reported as -##' the final list of identified features. +##' overlapping peaks from these two peak detection runs are reported as +##' the final list of identified peaks. ##' ##' @details For more details on the centWave algorithm see ##' \code{\link{centWave}}. ##' -##' @inheritParams featureDetection-centWave -##' @inheritParams featureDetection-centWaveWithPredIsoROIs -##' @inheritParams do_detectFeatures_centWave +##' @inheritParams findChromPeaks-centWave +##' @inheritParams findChromPeaks-centWaveWithPredIsoROIs +##' @inheritParams do_findChromPeaks_centWave ##' -##' @family core feature detection functions +##' @family core peak detection functions ##' @return -##' A matrix, each row representing an identified feature. All non-overlapping -##' features identified in both centWave runs are reported. +##' A matrix, each row representing an identified chromatographic peak. All +##' non-overlapping peaks identified in both centWave runs are reported. ##' The matrix columns are: ##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} -##' \item{rt}{Retention time of the feature's midpoint.} -##' \item{rtmin}{Minimum retention time of the feature.} -##' \item{rtmax}{Maximum retention time of the feature.} -##' \item{into}{Integrated (original) intensity of the feature.} -##' \item{intb}{Per-feature baseline corrected integrated feature intensity.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} +##' \item{mzmin}{Minimum m/z of the peaks.} +##' \item{mzmax}{Maximum m/z of the peaks.} +##' \item{rt}{Retention time of the peak's midpoint.} +##' \item{rtmin}{Minimum retention time of the peak.} +##' \item{rtmax}{Maximum retention time of the peak.} +##' \item{into}{Integrated (original) intensity of the peak.} +##' \item{intb}{Per-peak baseline corrected integrated peak intensity.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, ##' \code{sd} being the standard deviation of local chromatographic noise.} ##' \item{egauss}{RMSE of Gaussian fit.} @@ -2344,14 +2504,14 @@ do_findKalmanROI <- function(mz, int, scantime, valsPerSpect, ##' \item{h}{Gaussian parameter h.} ##' \item{f}{Region number of the m/z ROI where the peak was localized.} ##' \item{dppm}{m/z deviation of mass trace across scanns in ppk.} -##' \item{scale}{Scale on which the feature was localized.} +##' \item{scale}{Scale on which the peak was localized.} ##' \item{scpos}{Peak position found by wavelet analysis (scan number).} ##' \item{scmin}{Left peak limit found by wavelet analysis (scan number).} ##' \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} ##' } -##' @rdname do_detectFeatures_centWaveWithPredIsoROIs +##' @rdname do_findChromPeaks_centWaveWithPredIsoROIs ##' @author Hendrik Treutler, Johannes Rainer -do_detectFeatures_centWaveWithPredIsoROIs <- +do_findChromPeaks_centWaveWithPredIsoROIs <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, @@ -2360,11 +2520,11 @@ do_detectFeatures_centWaveWithPredIsoROIs <- maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, polarity = "unknown") { ## Input argument checking: most of it will be done in - ## do_detectFeatures_centWave + ## do_findChromPeaks_centWave polarity <- match.arg(polarity, c("positive", "negative", "unknown")) ## 1) First centWave - feats_1 <- do_detectFeatures_centWave(mz = mz, int = int, + feats_1 <- do_findChromPeaks_centWave(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, @@ -2379,7 +2539,7 @@ do_detectFeatures_centWaveWithPredIsoROIs <- roiList = roiList, firstBaselineCheck = firstBaselineCheck, roiScales = roiScales) - return(do_detectFeatures_addPredIsoROIs(mz = mz, int = int, + return(do_findChromPeaks_addPredIsoROIs(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, @@ -2392,63 +2552,63 @@ do_detectFeatures_centWaveWithPredIsoROIs <- fitgauss = fitgauss, noise = noise, verboseColumns = verboseColumns, - features. = feats_1, + peaks. = feats_1, maxCharge = maxCharge, maxIso = maxIso, mzIntervalExtension = mzIntervalExtension, polarity = polarity)) } -##' @description The \code{do_detectFeatures_centWaveAddPredIsoROIs} performs -##' centWave based feature detection based in regions of interest (ROIs) -##' representing predicted isotopes for the features submitted with argument -##' \code{features.}. The function returns a matrix with the identified features -##' consisting of all input features and features representing predicted isotopes +##' @description The \code{do_findChromPeaks_centWaveAddPredIsoROIs} performs +##' centWave based peak detection based in regions of interest (ROIs) +##' representing predicted isotopes for the peaks submitted with argument +##' \code{peaks.}. The function returns a matrix with the identified peaks +##' consisting of all input peaks and peaks representing predicted isotopes ##' of these (if found by the centWave algorithm). ##' -##' @param features. A matrix or \code{xcmsPeaks} object such as one returned by -##' a call to \code{link{do_detectFeatures_centWave}} or +##' @param peaks. A matrix or \code{xcmsPeaks} object such as one returned by +##' a call to \code{link{do_findChromPeaks_centWave}} or ##' \code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) -##' with the features for which isotopes should be predicted and used for an -##' additional feature detectoin using the centWave method. Required columns are: +##' with the peaks for which isotopes should be predicted and used for an +##' additional peak detectoin using the centWave method. Required columns are: ##' \code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, ##' \code{"scale"} and \code{"into"}. ##' -##' @param snthresh For \code{do_detectFeatures_addPredIsoROIs}: +##' @param snthresh For \code{do_findChromPeaks_addPredIsoROIs}: ##' numeric(1) defining the signal to noise threshold for the centWave algorithm. -##' For \code{do_detectFeatures_centWaveWithPredIsoROIs}: numeric(1) defining the +##' For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: numeric(1) defining the ##' signal to noise threshold for the initial (first) centWave run. ##' -##' @inheritParams featureDetection-centWave -##' @inheritParams do_detectFeatures_centWave +##' @inheritParams findChromPeaks-centWave +##' @inheritParams do_findChromPeaks_centWave ##' -##' @rdname do_detectFeatures_centWaveWithPredIsoROIs -do_detectFeatures_addPredIsoROIs <- +##' @rdname do_findChromPeaks_centWaveWithPredIsoROIs +do_findChromPeaks_addPredIsoROIs <- function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 6.25, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, - verboseColumns = FALSE, features. = NULL, + verboseColumns = FALSE, peaks. = NULL, maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, polarity = "unknown") { ## Input argument checking: most of it will be done in - ## do_detectFeatures_centWave + ## do_findChromPeaks_centWave polarity <- match.arg(polarity, c("positive", "negative", "unknown")) ## These variables might at some point be added as function args. addNewIsotopeROIs <- TRUE addNewAdductROIs <- FALSE ## 2) predict isotope and/or adduct ROIs - f_mod <- features. + f_mod <- peaks. ## Extend the mzmin and mzmax if needed. - tittle <- features.[, "mz"] * (ppm / 2) / 1E6 - expand_mz <- (features.[, "mzmax"] - features.[, "mzmin"]) < (tittle * 2) + tittle <- peaks.[, "mz"] * (ppm / 2) / 1E6 + expand_mz <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) < (tittle * 2) if (any(expand_mz)) { - f_mod[expand_mz, "mzmin"] <- features.[expand_mz, "mz"] - + f_mod[expand_mz, "mzmin"] <- peaks.[expand_mz, "mz"] - tittle[expand_mz] - f_mod[expand_mz, "mzmax"] <- features.[expand_mz, "mz"] + tittle[expand_mz] + f_mod[expand_mz, "mzmax"] <- peaks.[expand_mz, "mz"] + tittle[expand_mz] } ## Add predicted ROIs if (addNewIsotopeROIs) { - iso_ROIs <- do_define_isotopes(features. = f_mod, + iso_ROIs <- do_define_isotopes(peaks. = f_mod, maxCharge = maxCharge, maxIso = maxIso, mzIntervalExtension = mzIntervalExtension) @@ -2458,7 +2618,7 @@ do_detectFeatures_addPredIsoROIs <- "length", "intensity", "scale") } if (addNewAdductROIs) { - add_ROIs <- do_define_adducts(features. = f_mod, polarity = polarity) + add_ROIs <- do_define_adducts(peaks. = f_mod, polarity = polarity) } else { add_ROIs <- matrix(nrow = 0, ncol = 8) colnames(iso_ROIs) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", @@ -2467,7 +2627,7 @@ do_detectFeatures_addPredIsoROIs <- newROIs <- rbind(iso_ROIs, add_ROIs) rm(f_mod) if (nrow(newROIs) == 0) - return(features.) + return(peaks.) ## Remove ROIs that are out of mz range: mz_range <- range(mz) newROIs <- newROIs[newROIs[, "mzmin"] >= mz_range[1] & @@ -2485,14 +2645,14 @@ do_detectFeatures_addPredIsoROIs <- newROIs <- newROIs[keep_me, , drop = FALSE] if (nrow(newROIs) == 0) { - warning("No isotope or adduct ROIs for the identified features with a ", + warning("No isotope or adduct ROIs for the identified peaks with a ", "valid signal found!") - return(features.) + return(peaks.) } - + ## 3) centWave using the identified ROIs. roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) - feats_2 <- do_detectFeatures_centWave(mz = mz, int = int, + feats_2 <- do_findChromPeaks_centWave(mz = mz, int = int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, @@ -2513,42 +2673,200 @@ do_detectFeatures_addPredIsoROIs <- "rtmin", "rtmax")])) if (any(any_na)) feats_2 <- feats_2[!any_na, , drop = FALSE] + no_mz_width <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 + no_rt_width <- (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 ## remove empty area - no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || - (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + ## no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || + ## (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + no_area <- no_mz_width || no_rt_width if (any(no_area)) feats_2 <- feats_2[!no_area, , drop = FALSE] } + ## 4) Check and remove ROIs overlapping with peaks. + if (nrow(feats_2) > 0) { + ## Comparing each ROI with each peak; slightly modified from the original + ## code in which we prevent calling apply followed by two lapply. + removeROIs <- rep(FALSE, nrow(feats_2)) + removeFeats <- rep(FALSE, nrow(peaks.)) + overlapProportionThreshold <- 0.01 + for (i in 1:nrow(feats_2)) { + ## Compare ROI i with all peaks (peaks) and check if its + ## overlapping + ## mz + roiMzCenter <- (feats_2[i, "mzmin"] + feats_2[i, "mzmax"]) / 2 + peakMzCenter <- (peaks.[, "mzmin"] + peaks.[, "mzmax"]) / 2 + roiMzRadius <- (feats_2[i, "mzmax"] - feats_2[i, "mzmin"]) / 2 + peakMzRadius <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) / 2 + overlappingMz <- abs(peakMzCenter - roiMzCenter) <= + (roiMzRadius + peakMzRadius) + ## rt + roiRtCenter <- (feats_2[i, "rtmin"] + feats_2[i, "rtmax"]) / 2 + peakRtCenter <- (peaks.[, "rtmin"] + peaks.[, "rtmax"]) / 2 + roiRtRadius <- (feats_2[i, "rtmax"] - feats_2[i, "rtmin"]) / 2 + peakRtRadius <- (peaks.[, "rtmax"] - peaks.[, "rtmin"]) / 2 + overlappingRt <- abs(peakRtCenter - roiRtCenter) <= + (roiRtRadius + peakRtRadius) + is_overlapping <- overlappingMz & overlappingRt + ## Now determine whether we remove the ROI or the peak, depending + ## on the raw signal intensity. + if (any(is_overlapping)) { + if (any(peaks.[is_overlapping, "into"] > feats_2[i, "into"])) { + removeROIs[i] <- TRUE + } else { + removeFeats[is_overlapping] <- TRUE + } + } + } + feats_2 <- feats_2[!removeROIs, , drop = FALSE] + peaks. <- peaks.[!removeFeats, , drop = FALSE] + } + if (!verboseColumns) + peaks. <- peaks.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", + "rtmax", "into", "intb", "maxo", "sn")] + if (nrow(feats_2) == 0) + return(peaks.) + else + return(rbind(peaks., feats_2)) +} + +do_findChromPeaks_addPredIsoROIs_mod <- + function(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), + snthresh = 6.25, prefilter = c(3, 100), mzCenterFun = "wMean", + integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, + verboseColumns = FALSE, peaks. = NULL, + maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, + polarity = "unknown") { + ## Input argument checking: most of it will be done in + ## do_findChromPeaks_centWave + polarity <- match.arg(polarity, c("positive", "negative", "unknown")) + + ## These variables might at some point be added as function args. + addNewIsotopeROIs <- TRUE + addNewAdductROIs <- FALSE + ## 2) predict isotope and/or adduct ROIs + f_mod <- peaks. + ## Extend the mzmin and mzmax if needed. + tittle <- peaks.[, "mz"] * (ppm / 2) / 1E6 + expand_mz <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) < (tittle * 2) + if (any(expand_mz)) { + f_mod[expand_mz, "mzmin"] <- peaks.[expand_mz, "mz"] - + tittle[expand_mz] + f_mod[expand_mz, "mzmax"] <- peaks.[expand_mz, "mz"] + tittle[expand_mz] + } + ## Add predicted ROIs + if (addNewIsotopeROIs) { + iso_ROIs <- do_define_isotopes(peaks. = f_mod, + maxCharge = maxCharge, + maxIso = maxIso, + mzIntervalExtension = mzIntervalExtension) + } else { + iso_ROIs <- matrix(nrow = 0, ncol = 8) + colnames(iso_ROIs) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", + "length", "intensity", "scale") + } + if (addNewAdductROIs) { + add_ROIs <- do_define_adducts(peaks. = f_mod, polarity = polarity) + } else { + add_ROIs <- matrix(nrow = 0, ncol = 8) + colnames(iso_ROIs) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", + "length", "intensity", "scale") + } + newROIs <- rbind(iso_ROIs, add_ROIs) + rm(f_mod) + if (nrow(newROIs) == 0) + return(peaks.) + ## Remove ROIs that are out of mz range: + mz_range <- range(mz) + newROIs <- newROIs[newROIs[, "mzmin"] >= mz_range[1] & + newROIs[, "mzmax"] <= mz_range[2], , drop = FALSE] + ## Remove ROIs with too low signal: + keep_me <- logical(nrow(newROIs)) + scanindex <- as.integer(valueCount2ScanIndex(valsPerSpect)) + for (i in 1:nrow(newROIs)) { + vals <- .Call("getEIC", mz, int, scanindex, + as.double(newROIs[i, c("mzmin", "mzmax")]), + as.integer(newROIs[i, c("scmin", "scmax")]), + as.integer(length(scantime)), PACKAGE ='xcms' ) + keep_me[i] <- sum(vals$intensity, na.rm = TRUE) >= 10 + } + newROIs <- newROIs[keep_me, , drop = FALSE] + + if (nrow(newROIs) == 0) { + warning("No isotope or adduct ROIs for the identified peaks with a ", + "valid signal found!") + return(peaks.) + } + cat("No. of input peaks: ", nrow(peaks.), "\n") + + ## 3) centWave using the identified ROIs. + roiL <- split(as.data.frame(newROIs), f = 1:nrow(newROIs)) + cat("Identified iso ROIs: ", length(roiL), "\n") + feats_2 <- do_findChromPeaks_centWave(mz = mz, int = int, + scantime = scantime, + valsPerSpect = valsPerSpect, + ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, + prefilter = prefilter, + mzCenterFun = mzCenterFun, + integrate = integrate, + mzdiff = mzdiff, fitgauss = fitgauss, + noise = noise, + verboseColumns = verboseColumns, + roiList = roiL, + firstBaselineCheck = FALSE, + roiScales = newROIs[, "scale"]) + cat("No. of chrom. peaks found in ROIs: ", nrow(feats_2), "\n") + ## Clean up of the results: + if (nrow(feats_2) > 0) { + ## remove NaNs + any_na <- is.na(rowSums(feats_2[, c("mz", "mzmin", "mzmax", "rt", + "rtmin", "rtmax")])) + if (any(any_na)) + feats_2 <- feats_2[!any_na, , drop = FALSE] + no_mz_width <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 + no_rt_width <- (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + cat("No. of peaks with NA values: ", sum(any_na), "\n") + cat("No. of peaks without mz width: ", sum(no_mz_width), "\n") + cat("No. of peaks without rt width: ", sum(no_rt_width), "\n") + ## remove empty area + ## no_area <- (feats_2[, "mzmax"] - feats_2[, "mzmin"]) == 0 || + ## (feats_2[, "rtmax"] - feats_2[, "rtmin"]) == 0 + ## no_area <- no_mz_width || no_rt_width + no_area <- no_mz_width + if (any(no_area)) + feats_2 <- feats_2[!no_area, , drop = FALSE] + } + cat("After removing NAs or empty are peaks: ", nrow(feats_2), "\n") ## 4) Check and remove ROIs overlapping with peaks. if (nrow(feats_2) > 0) { ## Comparing each ROI with each peak; slightly modified from the original ## code in which we prevent calling apply followed by two lapply. removeROIs <- rep(FALSE, nrow(feats_2)) - removeFeats <- rep(FALSE, nrow(features.)) + removeFeats <- rep(FALSE, nrow(peaks.)) overlapProportionThreshold <- 0.01 for (i in 1:nrow(feats_2)) { - ## Compare ROI i with all features (peaks) and check if its + ## Compare ROI i with all peaks (peaks) and check if its ## overlapping ## mz roiMzCenter <- (feats_2[i, "mzmin"] + feats_2[i, "mzmax"]) / 2 - peakMzCenter <- (features.[, "mzmin"] + features.[, "mzmax"]) / 2 + peakMzCenter <- (peaks.[, "mzmin"] + peaks.[, "mzmax"]) / 2 roiMzRadius <- (feats_2[i, "mzmax"] - feats_2[i, "mzmin"]) / 2 - peakMzRadius <- (features.[, "mzmax"] - features.[, "mzmin"]) / 2 + peakMzRadius <- (peaks.[, "mzmax"] - peaks.[, "mzmin"]) / 2 overlappingMz <- abs(peakMzCenter - roiMzCenter) <= (roiMzRadius + peakMzRadius) ## rt roiRtCenter <- (feats_2[i, "rtmin"] + feats_2[i, "rtmax"]) / 2 - peakRtCenter <- (features.[, "rtmin"] + features.[, "rtmax"]) / 2 + peakRtCenter <- (peaks.[, "rtmin"] + peaks.[, "rtmax"]) / 2 roiRtRadius <- (feats_2[i, "rtmax"] - feats_2[i, "rtmin"]) / 2 - peakRtRadius <- (features.[, "rtmax"] - features.[, "rtmin"]) / 2 + peakRtRadius <- (peaks.[, "rtmax"] - peaks.[, "rtmin"]) / 2 overlappingRt <- abs(peakRtCenter - roiRtCenter) <= (roiRtRadius + peakRtRadius) is_overlapping <- overlappingMz & overlappingRt ## Now determine whether we remove the ROI or the peak, depending ## on the raw signal intensity. if (any(is_overlapping)) { - if (any(features.[is_overlapping, "into"] > feats_2[i, "into"])) { + if (any(peaks.[is_overlapping, "into"] > feats_2[i, "into"])) { removeROIs[i] <- TRUE } else { removeFeats[is_overlapping] <- TRUE @@ -2556,13 +2874,17 @@ do_detectFeatures_addPredIsoROIs <- } } feats_2 <- feats_2[!removeROIs, , drop = FALSE] - features. <- features.[!removeFeats, , drop = FALSE] + peaks. <- peaks.[!removeFeats, , drop = FALSE] } + cat("After removing overlapping peaks: ", nrow(feats_2), "\n") + cat("After removing overlapping peaks (peaks.): ", nrow(peaks.), "\n") if (!verboseColumns) - features. <- features.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", - "rtmax", "into", "intb", "maxo", "sn")] + peaks. <- peaks.[ , c("mz", "mzmin", "mzmax", "rt", "rtmin", + "rtmax", "into", "intb", "maxo", "sn")] + ## For now just return the new ones. + return(feats_2) if (nrow(feats_2) == 0) - return(features.) + return(peaks.) else - return(rbind(features., feats_2)) + return(rbind(peaks., feats_2)) } diff --git a/R/do_groupChromPeaks-functions.R b/R/do_groupChromPeaks-functions.R new file mode 100644 index 000000000..c118a2e57 --- /dev/null +++ b/R/do_groupChromPeaks-functions.R @@ -0,0 +1,574 @@ +## Correspondence functions. +#' @include functions-Params.R + +##' @title Core API function for peak density based chromatographic peak +##' grouping +##' +##' @description The \code{do_groupChromPeaks_density} function performs +##' chromatographic peak grouping based on the density (distribution) of peaks, +##' found in different samples, along the retention time axis in slices of +##' overlapping mz ranges. +##' +##' @details For overlapping slices along the mz dimension, the function +##' calculates the density distribution of identified peaks along the +##' retention time axis and groups peaks from the same or different samples +##' that are close to each other. See [Smith 2006] for more details. +##' +##' @note The default settings might not be appropriate for all LC/GC-MS setups, +##' especially the \code{bw} and \code{binSize} parameter should be adjusted +##' accordingly. +##' +##' @param peaks A \code{matrix} or \code{data.frame} with the mz values and +##' retention times of the identified chromatographic peaks in all samples of an +##' experiment. Required columns are \code{"mz"}, \code{"rt"} and +##' \code{"sample"}. The latter should contain \code{numeric} values representing +##' the index of the sample in which the peak was found. +##' +##' @inheritParams groupChromPeaks-density +##' +##' @return A \code{list} with elements \code{"featureDefinitions"} and +##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +##' representing a (mz-rt) feature (i.e. a peak group) with columns: +##' \describe{ +##' \item{"mzmed"}{median of the peaks' apex mz values.} +##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +##' \item{"rtmed"}{the median of the peaks' retention times.} +##' \item{"rtmin"}{the smallest retention time of the peaks in the group.} +##' \item{"rtmax"}{the largest retention time of the peaks in the group.} +##' \item{"npeaks"}{the total number of peaks assigned to the feature. +##' Note that this number can be larger than the total number of samples, since +##' multiple peaks from the same sample could be assigned to a feature.} +##' } +##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a +##' feature in the \code{peaks} input matrix. +##' +##' @family core peak grouping algorithms +##' +##' @references +##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +##' \emph{Anal. Chem.} 2006, 78:779-787. +##' +##' @author Colin Smith, Johannes Rainer +##' +##' @examples +##' ## Load the test data set +##' library(faahKO) +##' data(faahko) +##' +##' ## Extract the matrix with the identified peaks from the xcmsSet: +##' fts <- peaks(faahko) +##' +##' ## Perform the peak grouping with default settings: +##' res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) +##' +##' ## The feature definitions: +##' head(res$featureDefinitions) +##' +##' ## The assignment of peaks from the input matrix to the features +##' head(res$peakIndex) +do_groupChromPeaks_density <- function(peaks, sampleGroups, + bw = 30, minFraction = 0.5, minSamples = 1, + binSize = 0.25, maxFeatures = 50) { + if (missing(sampleGroups)) + stop("Parameter 'sampleGroups' is missing! This should be a vector of ", + "length equal to the number of samples specifying the group ", + "assignment of the samples.") + if (missing(peaks)) + stop("Parameter 'peaks' is missing!") + if (!is.matrix(peaks) | is.data.frame(peaks)) + stop("'peaks' has to be a 'matrix' or a 'data.frame'!") + ## Check that we've got all required columns + .reqCols <- c("mz", "rt", "sample") + if (!all(.reqCols %in% colnames(peaks))) + stop("Required columns ", + paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", + collapse = ", "), " not found in 'peaks' parameter") + + sampleGroups <- as.character(sampleGroups) + sampleGroupNames <- unique(sampleGroups) + sampleGroupTable <- table(sampleGroups) + nSampleGroups <- length(sampleGroupTable) + + ## Check that sample groups matches with sample column. + if (max(peaks[, "sample"]) > length(sampleGroups)) + stop("Sample indices in 'peaks' are larger than there are sample", + " groups specified with 'sampleGroups'!") + + ## Order peaks matrix by mz + peakOrder <- order(peaks[, "mz"]) + peaks <- peaks[peakOrder, .reqCols, drop = FALSE] + rownames(peaks) <- NULL + rtRange <- range(peaks[, "rt"]) + + ## Define the mass slices and the index in the peaks matrix with an mz + ## value >= mass[i]. + mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, + by = binSize / 2) + masspos <- findEqualGreaterM(peaks[,"mz"], mass) + + groupmat <- matrix(nrow = 512, ncol = 7 + nSampleGroups) + groupindex <- vector("list", 512) + + densFrom <- rtRange[1] - 3 * bw + densTo <- rtRange[2] + 3 * bw + densN <- max(512, 2^(ceiling(log2(diff(rtRange) / (bw / 2))))) + endIdx <- 0 + num <- 0 + gcount <- integer(nSampleGroups) + message("Processing ", length(mass) - 1, " mz slices ... ", appendLF = FALSE) + for (i in seq_len(length(mass)-2)) { + ## That's identifying overlapping mz slices. + startIdx <- masspos[i] + endIdx <- masspos[i + 2] - 1 + if (endIdx - startIdx < 0) + next + curMat <- peaks[startIdx:endIdx, , drop = FALSE] + den <- density(curMat[, "rt"], bw = bw, from = densFrom, to = densTo, + n = densN) + maxden <- max(den$y) + deny <- den$y + ## gmat <- matrix(nrow = 5, ncol = 2 + gcount) + snum <- 0 + ## What's that 20 there? + while (deny[maxy <- which.max(deny)] > maxden / 20 && snum < maxFeatures) { + grange <- descendMin(deny, maxy) + deny[grange[1]:grange[2]] <- 0 + gidx <- which(curMat[,"rt"] >= den$x[grange[1]] & + curMat[,"rt"] <= den$x[grange[2]]) + ## Determine the sample group of the samples in which the peaks + ## were detected and check if they correspond to the required limits. + tt <- table(sampleGroups[unique(curMat[gidx, "sample"])]) + if (!any(tt / sampleGroupTable[names(tt)] >= minFraction & + tt >= minSamples)) + next + snum <- snum + 1 + num <- num + 1 + ## Double the size of the output containers if they're full + if (num > nrow(groupmat)) { + groupmat <- rbind(groupmat, + matrix(nrow = nrow(groupmat), + ncol = ncol(groupmat))) + groupindex <- c(groupindex, vector("list", length(groupindex))) + } + gcount <- rep(0, length(sampleGroupNames)) + names(gcount) <- sampleGroupNames + gcount[names(tt)] <- as.numeric(tt) + groupmat[num, 1] <- median(curMat[gidx, "mz"]) + groupmat[num, 2:3] <- range(curMat[gidx, "mz"]) + groupmat[num, 4] <- median(curMat[gidx, "rt"]) + groupmat[num, 5:6] <- range(curMat[gidx, "rt"]) + groupmat[num, 7] <- length(gidx) + groupmat[num, 7 + seq(along = gcount)] <- gcount + groupindex[[num]] <- sort(peakOrder[(startIdx:endIdx)[gidx]]) + } + } + message("OK") + + colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", + "npeaks", sampleGroupNames) + + groupmat <- groupmat[seq_len(num), , drop = FALSE] + groupindex <- groupindex[seq_len(num)] + + ## Remove groups that overlap with more "well-behaved" groups + numsamp <- rowSums(groupmat[, (match("npeaks", + colnames(groupmat))+1):ncol(groupmat), + drop = FALSE]) + uorder <- order(-numsamp, groupmat[, "npeaks"]) + + uindex <- rectUnique(groupmat[, c("mzmin","mzmax","rtmin","rtmax"), + drop = FALSE], + uorder) + + return(list(featureDefinitions = groupmat[uindex, , drop = FALSE], + peakIndex = groupindex[uindex])) +} + +## Just to check if we could squeeze a little bit more out using parallel +## processing... +do_groupChromPeaks_density_par <- function(peaks, sampleGroups, + bw = 30, minFraction = 0.5, + minSamples = 1, binSize = 0.25, + maxFeatures = 50) { + if (missing(sampleGroups)) + stop("Parameter 'sampleGroups' is missing! This should be a vector of ", + "length equal to the number of samples specifying the group ", + "assignment of the samples.") + if (missing(peaks)) + stop("Parameter 'peaks' is missing!") + if (!is.matrix(peaks) | is.data.frame(peaks)) + stop("Peaks has to be a 'matrix' or a 'data.frame'!") + ## Check that we've got all required columns + .reqCols <- c("mz", "rt", "sample") + if (!all(.reqCols %in% colnames(peaks))) + stop("Required columns ", + paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", + collapse = ", "), " not found in 'peaks' parameter") + + sampleGroups <- as.character(sampleGroups) + sampleGroupNames <- unique(sampleGroups) + sampleGroupTable <- table(sampleGroups) + nSampleGroups <- length(sampleGroupTable) + + ## Order peaks matrix by mz + peakOrder <- order(peaks[, "mz"]) + peaks <- peaks[peakOrder, .reqCols, drop = FALSE] + rownames(peaks) <- NULL + rtRange <- range(peaks[, "rt"]) + + ## Define the mass slices and the index in the peaks matrix with an mz + ## value >= mass[i]. + mass <- seq(peaks[1, "mz"], peaks[nrow(peaks), "mz"] + binSize, + by = binSize / 2) + masspos <- findEqualGreaterM(peaks[,"mz"], mass) + + groupmat <- matrix(nrow = 512, ncol = 7 + nSampleGroups) + groupindex <- vector("list", 512) + + ## Create the list of peak data subsets. + ftsL <- vector("list", length(mass)) + for (i in seq_len(length(mass) - 2)) { + startIdx <- masspos[i] + endIdx <- masspos[i + 2] - 1 + ftsL[[i]] <- cbind(peaks[startIdx:endIdx, , drop = FALSE], + idx = startIdx:endIdx) + } + ftsL <- ftsL[lengths(ftsL) > 0] + ## Here we can run bplapply: + res <- bplapply(ftsL, function(z, rtr, bw, maxF, sampleGrps, + sampleGroupTbl, minFr, minSmpls, + sampleGroupNms, peakOrdr) { + den <- density(z[, "rt"], bw = bw, from = rtr[1] - 3 * bw, + to = rtr[2] + 3 * bw, + n = max(512, 2^(ceiling(log2(diff(rtr) / (bw / 2)))))) + maxden <- max(den$y) + deny <- den$y + snum <- 0 + tmpL <- vector("list", maxF) + tmpL2 <- tmpL + while (deny[maxy <- which.max(deny)] > maxden / 20 && snum < maxF) { + grange <- xcms:::descendMin(deny, maxy) + deny[grange[1]:grange[2]] <- 0 + gidx <- which(z[,"rt"] >= den$x[grange[1]] & + z[,"rt"] <= den$x[grange[2]]) + ## Determine the sample group of the samples in which the peaks + ## were detected and check if they correspond to the required limits. + tt <- table(sampleGrps[unique(z[gidx, "sample"])]) + if (!any(tt / sampleGroupTbl[names(tt)] >= minFr & + tt >= minSmpls)) + next + snum <- snum + 1 + gcount <- rep(0, length(sampleGroupNms)) + names(gcount) <- sampleGroupNms + gcount[names(tt)] <- as.numeric(tt) + + tmpL[[snum]] <- c(median(z[gidx, "mz"]), + range(z[gidx, "mz"]), + median(z[gidx, "rt"]), + range(z[gidx, "rt"]), + length(gidx), + gcount) + tmpL2[[snum]] <- sort(peakOrdr[z[, "idx"][gidx]]) + } + tmpL <- tmpL[lengths(tmpL) > 0] + tmpL2 <- tmpL2[lengths(tmpL2) > 0] + if (length(tmpL)) + return(list(grps = do.call(rbind, tmpL), idx = tmpL2)) + }, rtr = rtRange, bw = bw, maxF = maxFeatures, sampleGrps = sampleGroups, + sampleGroupTbl = sampleGroupTable, minFr = minFraction, + minSmpls = minSamples, sampleGroupNms = sampleGroupNames, + peakOrdr = peakOrder) + + res <- res[lengths(res) > 0] + ## Now we have to process that list of results. + groupmat <- do.call(rbind, lapply(res, function(z) z[["grps"]])) + groupidx <- unlist(lapply(res, function(z) z[["idx"]]), recursive = FALSE) + + colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", + "npeaks", sampleGroupNames) + + ## groupmat <- groupmat[seq_len(num), , drop = FALSE] + ## groupindex <- groupindex[seq_len(num)] + + ## Remove groups that overlap with more "well-behaved" groups + numsamp <- rowSums(groupmat[, (match("npeaks", + colnames(groupmat))+1):ncol(groupmat), + drop = FALSE]) + uorder <- order(-numsamp, groupmat[, "npeaks"]) + + uindex <- rectUnique(groupmat[, c("mzmin","mzmax","rtmin","rtmax"), + drop = FALSE], + uorder) + + return(list(featureDefinitions = groupmat[uindex, , drop = FALSE], + peakIndex = groupidx[uindex])) +} + +##' @title Core API function for peak grouping using mzClust +##' +##' @description The \code{do_groupPeaks_mzClust} function performs high +##' resolution correspondence on single spectra samples. +##' +##' @inheritParams groupChromPeaks-density +##' @inheritParams do_groupChromPeaks_density +##' @inheritParams groupChromPeaks-mzClust +##' +##' @return A \code{list} with elements \code{"featureDefinitions"} and +##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +##' representing an (mz-rt) feature (i.e. peak group) with columns: +##' \describe{ +##' \item{"mzmed"}{median of the peaks' apex mz values.} +##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +##' \item{"rtmed"}{always \code{-1}.} +##' \item{"rtmin"}{always \code{-1}.} +##' \item{"rtmax"}{always \code{-1}.} +##' \item{"npeaks"}{the total number of peaks assigned to the feature. +##' Note that this number can be larger than the total number of samples, since +##' multiple peaks from the same sample could be assigned to a group.} +##' } +##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a +##' peak group in the \code{peaks} input matrix. +##' +##' @family core peak grouping algorithms +##' +##' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill +##' and David F. Grant\cr \emph{Alignment of high resolution mass spectra: +##' development of a heuristic approach for metabolomics}.\cr Metabolomics, +##' Vol. 2, No. 2, 75-83 (2006) +do_groupPeaks_mzClust <- function(peaks, sampleGroups, ppm = 20, + absMz = 0, minFraction = 0.5, + minSamples = 1) { + if (missing(sampleGroups)) + stop("Parameter 'sampleGroups' is missing! This should be a vector of ", + "length equal to the number of samples specifying the group ", + "assignment of the samples.") + if (missing(peaks)) + stop("Parameter 'peaks' is missing!") + if (!is.matrix(peaks) | is.data.frame(peaks)) + stop("Peaks has to be a 'matrix' or a 'data.frame'!") + ## Check that we've got all required columns + .reqCols <- c("mz", "sample") + if (!all(.reqCols %in% colnames(peaks))) + stop("Required columns ", + paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", + collapse = ", "), " not found in 'peaks' parameter") + if (!is.factor(sampleGroups)) + sampleGroups <- factor(sampleGroups, levels = unique(sampleGroups)) + sampleGroupNames <- levels(sampleGroups) + sampleGroupTable <- table(sampleGroups) + nSampleGroups <- length(sampleGroupTable) + ##sampleGroups <- as.numeric(sampleGroups) + + ## Check that sample groups matches with sample column. + if (max(peaks[, "sample"]) > length(sampleGroups)) + stop("Sample indices in 'peaks' are larger than there are sample", + " groups specified with 'sampleGroups'!") + + ##peaks <- peaks[, .reqCols, drop = FALSE] + grps <- mzClustGeneric(peaks[, .reqCols, drop = FALSE], + sampclass = sampleGroups, + mzppm = ppm, + mzabs = absMz, + minsamp = minSamples, + minfrac = minFraction) + grpmat <- grps$mat + if (is.null(nrow(grpmat))) { + matColNames <- names(grpmat) + grpmat <- matrix(grpmat, ncol = length(grpmat), byrow = FALSE) + colnames(grpmat) <- matColNames + } + rts <- rep(-1, nrow(grpmat)) + cns <- colnames(grpmat) + grpmat <- cbind(grpmat[, 1:3, drop = FALSE], rts, rts, rts, + grpmat[, 4:ncol(grpmat), drop = FALSE]) + colnames(grpmat) <- c(cns[1:3], c("rtmed", "rtmin", "rtmax"), + cns[4:length(cns)]) + return(list(featureDefinitions = grpmat, peakIndex = grps$idx)) +} + +##' @title Core API function for chromatic peak grouping using a nearest +##' neighbor approach +##' +##' @description The \code{do_groupChromPeaks_nearest} function groups peaks +##' across samples by creating a master peak list and assigning corresponding +##' peaks from all samples to each peak group (i.e. feature). The method is +##' inspired by the correspondence algorithm of mzMine [Katajamaa 2006]. +##' +##' @inheritParams do_groupChromPeaks_density +##' @inheritParams groupChromPeaks-nearest +##' +##' @return A \code{list} with elements \code{"featureDefinitions"} and +##' \code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +##' representing an (mz-rt) feature (i.e. peak group) with columns: +##' \describe{ +##' \item{"mzmed"}{median of the peaks' apex mz values.} +##' \item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +##' \item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +##' \item{"rtmed"}{the median of the peaks' retention times.} +##' \item{"rtmin"}{the smallest retention time of the peaks in the feature.} +##' \item{"rtmax"}{the largest retention time of the peaks in the feature.} +##' \item{"npeaks"}{the total number of peaks assigned to the feature.} +##' } +##' \code{"peakIndex"} is a \code{list} with the indices of all peaks in a +##' feature in the \code{peaks} input matrix. +##' +##' @family core peak grouping algorithms +##' +##' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for +##' processing and visualization of mass spectrometry based molecular profile +##' data. \emph{Bioinformatics} 2006, 22:634-636. +do_groupChromPeaks_nearest <- function(peaks, sampleGroups, mzVsRtBalance = 10, + absMz = 0.2, absRt = 15, kNN = 10) { + if (missing(sampleGroups)) + stop("Parameter 'sampleGroups' is missing! This should be a vector of ", + "length equal to the number of samples specifying the group ", + "assignment of the samples.") + if (missing(peaks)) + stop("Parameter 'peaks' is missing!") + if (!is.matrix(peaks) | is.data.frame(peaks)) + stop("Peaks has to be a 'matrix' or a 'data.frame'!") + ## Check that we've got all required columns + .reqCols <- c("mz", "rt", "sample") + if (!all(.reqCols %in% colnames(peaks))) + stop("Required columns ", + paste0("'", .reqCols[!.reqCols %in% colnames(peaks)],"'", + collapse = ", "), " not found in 'peaks' parameter") + if (!is.factor(sampleGroups)) + sampleGroups <- factor(sampleGroups, levels = unique(sampleGroups)) + sampleGroupNames <- levels(sampleGroups) + sampleGroupTable <- table(sampleGroups) + nSampleGroups <- length(sampleGroupTable) + + ## sampleGroups == classlabel + ## nSampleGroups == gcount + ## peaks == peakmat + + peaks <- peaks[, .reqCols, drop = FALSE] + + parameters <- list(mzVsRTBalance = mzVsRtBalance, mzcheck = absMz, + rtcheck = absRt, knn = kNN) + + ptable <- table(peaks[,"sample"]) + pord <- ptable[order(ptable, decreasing = TRUE)] + sid <- as.numeric(names(pord)) + pn <- as.numeric(pord) + + ## environment - probably not a good idea for parallel processing - we + ## would like to have data copying there (or better just provide the data + ## chunk it needs to process). + mplenv <- new.env(parent = .GlobalEnv) + mplenv$mplist <- matrix(0, pn[1], length(sid)) + mplenv$mplist[, sid[1]] <- which(peaks[,"sample"] == sid[1]) + mplenv$mplistmean <- data.frame(peaks[which(peaks[,"sample"] == sid[1]), + c("mz", "rt")]) + mplenv$peakmat <- peaks + assign("peakmat", peaks, envir = mplenv) ## double assignment? + + sapply(sid[2:length(sid)], function(sample, mplenv){ + message("Processing sample number ", sample, " ... ", appendLF = FALSE) + ## require(parallel) + ## cl <- makeCluster(getOption("cl.cores", nSlaves)) + ## clusterEvalQ(cl, library(RANN)) + ## parSapply(cl, 2:length(samples), function(sample,mplenv, object){ + ## might slightly improve on this for loop. + ## Calculating for each row (peak) the mean mz or rt for peaks + ## assigned yet to this peak group. + for (mml in seq(mplenv$mplist[,1])) { + mplenv$mplistmean[mml, "mz"] <- + mean(mplenv$peakmat[mplenv$mplist[mml, ], "mz"]) + mplenv$mplistmean[mml, "rt"] <- + mean(mplenv$peakmat[mplenv$mplist[mml, ], "rt"]) + } + + mplenv$peakIdxList <- data.frame( + peakidx = which(mplenv$peakmat[, "sample"] == sample), + isJoinedPeak = FALSE + ) + if (length(mplenv$peakIdxList$peakidx) == 0) + message("Warning: No peaks in sample number ", sample) + + ## this really doesn't take a long time not worth parallel version here. + ## but make an apply loop now faster even with rearranging the data :D : PB + scoreList <- sapply(mplenv$peakIdxList$peakidx, + function(currPeak, para, mplenv){ + xcms:::patternVsRowScore(currPeak, para, mplenv) + }, parameters, mplenv, simplify = FALSE) + scoreList <- do.call(rbind, scoreList) + + ## Browse scores in order of descending goodness-of-fit + scoreListcurr <- scoreList[order(scoreList[, "score"]), ] + if (nrow(scoreListcurr) > 0) { + for (scoreIter in 1:nrow(scoreListcurr)) { + + iterPeak <- scoreListcurr[scoreIter, "peak"] + iterRow <- scoreListcurr[scoreIter, "mpListRow"] + + ## Check if master list row is already assigned with peak + if (scoreListcurr[scoreIter, "isJoinedRow"] == TRUE) + next + + ## Check if peak is already assigned to some master list row + if (scoreListcurr[scoreIter, "isJoinedPeak"] == TRUE) + next + + ## Check if score good enough + ## Assign peak to master peak list row + mplenv$mplist[iterRow, sample] <- iterPeak + + ## Mark peak as joined + setTrue <- which(scoreListcurr[, "mpListRow"] == iterRow) + scoreListcurr[setTrue, "isJoinedRow"] <- TRUE + setTrue <- which(scoreListcurr[, "peak"] == iterPeak) + scoreListcurr[setTrue, "isJoinedPeak"] <- TRUE + mplenv$peakIdxList[which(mplenv$peakIdxList$peakidx == iterPeak), + "isJoinedPeak"] <- TRUE + } + } + notJoinedPeaks <- mplenv$peakIdxList[which(mplenv$peakIdxList$isJoinedPeak == FALSE), "peakidx"] + + for (notJoinedPeak in notJoinedPeaks) { + mplenv$mplist <- rbind(mplenv$mplist, + matrix(0, 1, dim(mplenv$mplist)[2])) + mplenv$mplist[length(mplenv$mplist[,1]), sample] <- notJoinedPeak + } + + ## Clear "Joined" information from all master peaklist rows + rm(list = "peakIdxList", envir = mplenv) + message("OK") + }, mplenv) + + groupmat <- matrix( 0, nrow(mplenv$mplist), 7 + nSampleGroups) + colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", + "npeaks", sampleGroupNames) + groupindex <- vector("list", nrow(mplenv$mplist)) + ## Variable to count samples for a peak + sampCounts <- rep_len(0, nSampleGroups) + names(sampCounts) <- sampleGroupNames + ## gcount <- integer(nSampleGroups) + ## Can we vectorize that below somehow? + for (i in 1:nrow(mplenv$mplist)) { + groupmat[i, "mzmed"] <- median(peaks[mplenv$mplist[i, ], "mz"]) + groupmat[i, c("mzmin", "mzmax")] <- range(peaks[mplenv$mplist[i, ], "mz"]) + groupmat[i, "rtmed"] <- median(peaks[mplenv$mplist[i, ], "rt"]) + groupmat[i, c("rtmin", "rtmax")] <- range(peaks[mplenv$mplist[i, ], "rt"]) + + groupmat[i, "npeaks"] <- length(which(peaks[mplenv$mplist[i, ]] > 0)) + + ## Now summarizing the number of samples in which the peak was identified + sampCounts[] <- 0 + tbl <- table(sampleGroups[peaks[mplenv$mplist[i, ], "sample"]]) + sampCounts[names(tbl)] <- as.numeric(tbl) + groupmat[i, 7 + seq_len(nSampleGroups)] <- sampCounts + ## gnum <- sampleGroups[unique(peaks[mplenv$mplist[i, ], "sample"])] + ## for (j in seq(along = gcount)) + ## gcount[j] <- sum(gnum == j) + ## groupmat[i, 7 + seq(along = gcount)] <- gcount + groupindex[[i]] <- mplenv$mplist[i, (which(mplenv$mplist[i,]>0))] + } + + return(list(featureDefinitions = groupmat, peakIndex = groupindex)) +} diff --git a/R/functions-Chromatogram.R b/R/functions-Chromatogram.R new file mode 100644 index 000000000..56b467595 --- /dev/null +++ b/R/functions-Chromatogram.R @@ -0,0 +1,339 @@ +#' @include DataClasses.R +.SUPPORTED_AGG_FUN_CHROM <- c("sum", "max", "min", "mean") +names(.SUPPORTED_AGG_FUN_CHROM) <- + c("Total ion chromatogram (TIC).", "Base peak chromatogram (BPC).", + "Intensity representing the minimum intensity across the mz range.", + "Intensity representing the mean intensity across the mz range.") + +#' @title Validation function for Chromatogram objects +#' +#' @description This function can be used instead of the \code{validObject} to +#' check if the chromatogram is valid, without having to call the validity +#' method on all super classes. +#' +#' @param object A \code{Chromatogram} object. +#' +#' @return \code{TRUE} if the \code{object} is valid and the error messages +#' otherwise (i.e. a \code{character}). +#' +#' @author Johannes Rainer +#' +#' @noRd +validChromatogram <- function(object) { + msg <- character() + if (length(object@rtime) != length(object@intensity)) + msg <- c(msg, "Length of 'rt' and 'intensity' have to match!") + if (is.unsorted(object@rtime)) + msg <- c(msg, paste0("'rtime' has to be increasingly ordered!")) + if (length(object@mz) > 0 & length(object@mz) != 2) + msg <- c(msg, paste0("'mz' is supposed to contain the ", + "minimum and maximum mz values for the ", + "chromatogram.")) + if (!all(is.na(object@mz))) + if (is.unsorted(object@mz)) + msg <- c(msg, "'mz' has to be increasingly ordered!") + if (length(object@filterMz) > 0 & length(object@filterMz) != 2) + msg <- c(msg, paste0("'filterMz' is supposed to contain the ", + "minimum and maximum mz values of the filter", + " used to create the chromatogram.")) + if (length(object@precursorMz) > 0 & length(object@precursorMz) != 2) + msg <- c(msg, paste0("'precursorMz' is supposed to be a numeric of", + " length 2.")) + if (length(object@productMz) > 0 & length(object@productMz) != 2) + msg <- c(msg, paste0("'productMz' is supposed to be a numeric of", + " length 2.")) + if (length(object@fromFile) > 1 | any(object@fromFile < 0)) + msg <- c(msg, paste0("'fromFile' is supposed to be a single ", + "positive integer!")) + if (length(object@aggregationFun) > 1) + msg <- c(msg, "Length of 'aggregationFun' has to be 1!") + if (length(object@aggregationFun)) { + if (!object@aggregationFun %in% .SUPPORTED_AGG_FUN_CHROM) + msg <- c(msg, paste0("Invalid value for 'aggregationFun'! only ", + paste0("'", .SUPPORTED_AGG_FUN_CHROM,"'", + collapse = ","), " are allowed!")) + } + if (length(msg) == 0) TRUE + else msg +} + +#' @description \code{Chromatogram}: create an instance of the +#' \code{Chromatogram} class. +#' +#' @param rtime \code{numeric} with the retention times (length has to be equal +#' to the length of \code{intensity}). +#' +#' @param intensity \code{numeric} with the intensity values (length has to be +#' equal to the length of \code{rtime}). +#' +#' @param mz \code{numeric(2)} representing the mz value range (min, max) +#' on which the chromatogram was created. This is supposed to contain the +#' \emph{real} range of mz values in contrast to the \code{filterMz} below. +#' If not applicable use \code{mzrange = c(0, 0)}. +#' +#' @param filterMz \code{numeric(2)} representing the mz value range (min, +#' max) that was used to filter the original object on mz dimension. If not +#' applicable use \code{filterMz = c(0, 0)}. +#' +#' @param precursorMz \code{numeric(2)} for SRM/MRM transitions. +#' Represents the mz of the precursor ion. See details for more information. +#' +#' @param productMz \code{numeric(2)} for SRM/MRM transitions. +#' Represents the mz of the product. See details for more information. +#' +#' @param fromFile \code{integer(1)} the index of the file within the +#' \code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the +#' chromatogram was extracted. +#' +#' @param aggregationFun \code{character} string specifying the function that +#' was used to aggregate intensity values for the same retention time across +#' the mz range. Supported are \code{"sum"} (total ion chromatogram), +#' \code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}. +#' +#' @slot .__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun See corresponding parameter above. +#' +#' @rdname Chromatogram-class +Chromatogram <- function(rtime = numeric(), intensity = numeric(), + mz = c(NA_real_, NA_real_), + filterMz = c(NA_real_, NA_real_), + precursorMz = c(NA_real_, NA_real_), + productMz = c(NA_real_, NA_real_), + fromFile = integer(), + aggregationFun = character()) { + ## Check if we have to re-order the data (issue #145). + if (is.unsorted(rtime)) { + idx <- order(rtime) + rtime <- rtime[idx] + intensity <- intensity[idx] + } + return(new("Chromatogram", rtime = rtime, intensity = intensity, + mz = range(mz), filterMz = range(filterMz), + precursorMz = range(precursorMz), productMz = range(productMz), + fromFile = as.integer(fromFile), aggregationFun = aggregationFun)) +} + +#' @title Plot Chromatogram objects +#' +#' @description \code{plotChromatogram} creates a chromatogram plot for a +#' single \code{Chromatogram} object or a \code{list} of +#' \code{\link{Chromatogram}} objects (one line for each +#' \code{\link{Chromatogram}}/sample). +#' +#' @details The \code{plotChromatogram} function allows to efficiently plot +#' the chromatograms of several samples into a single plot. +#' +#' @param x For \code{plotChromatogram}: \code{list} of +#' \code{\link{Chromatogram}} objects. Such as extracted from an +#' \code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} +#' method. +#' For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected +#' peaks. +#' +#' @param rt For \code{plotChromatogram}: \code{numeric(2)}, optional parameter +#' to subset each \code{Chromatogram} by retention time prior to plotting. +#' Alternatively, the plot could be subsetted by passing a \code{xlim} +#' parameter. +#' For \code{highlightChromPeaks}: \code{numeric(2)} with the +#' retention time range from which peaks should be extracted and plotted. +#' +#' @param col For \code{plotChromatogram}: color definition for each +#' line/sample. Has to have the same length as samples/elements in \code{x}, +#' otherwise \code{col[1]} is recycled to generate a vector of +#' \code{length(x)}. +#' For \code{highlightChromPeaks}: color to be used to fill the +#' rectangle. +#' +#' @param lty the line type. See \code{\link[graphics]{plot}} for more details. +#' +#' @param type the plotting type. See \code{\link[graphics]{plot}} for more +#' details. +#' For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +#' should be highlighted: \code{type = "rect"} draws a rectangle +#' representing the peak definition, \code{type = "point"} indicates a +#' chromatographic peak with a single point at the position of the peak's +#' \code{"rt"} and \code{"maxo"}. +#' +#' @param xlab \code{character(1)} with the label for the x-axis. +#' +#' @param ylab \code{character(1)} with the label for the y-axis. +#' +#' @param main The title for the plot. For \code{plotChromatogram}: if +#' \code{main = NULL} the mz range of the \code{Chromatogram} object(s) will +#' be used as the title. +#' +#' @param ... additional parameters to the \code{\link{matplot}} or \code{plot} +#' function. +#' +#' @seealso \code{\link{extractChromatograms}} for how to extract a list of +#' \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} +#' objects. +#' +#' @author Johannes Rainer +#' +#' @examples +#' +#' ## Perform a fast peak detection. +#' library(xcms) +#' library(faahKO) +#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_3_files) +#' +#' od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) +#' +#' rtr <- c(2600, 2750) +#' mzr <- c(344, 344) +#' chrs <- extractChromatograms(od, rt = rtr, mz = mzr) +#' +#' ## Plot a single chromatogram +#' plotChromatogram(chrs[[1]]) +#' +#' ## Plot all chromatograms at once, using different colors for each. +#' plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) +#' +#' ## Highlight identified chromatographic peaks. +#' highlightChromPeaks(od, rt = rtr, mz = mzr, +#' col = c("#FF000005", "#00FF0005", "#0000FF05"), +#' border = c("#FF000040", "#00FF0040", "#0000FF40")) +#' +plotChromatogram <- function(x, rt, col = "#00000060", + lty = 1, type = "l", xlab = "retention time", + ylab = "intensity", main = NULL, ...) { + if (!is.list(x) & !is(x, "Chromatogram")) + stop("'x' should be a Chromatogram object or a list of Chromatogram", + " objects.") + if (is(x, "Chromatogram")) + x <- list(x) + isOK <- lapply(x, function(z) { + if (is(z, "Chromatogram")) { + return(TRUE) + } else { + if (is.na(z)) + return(TRUE) + } + FALSE + }) + if (any(!unlist(isOK))) + stop("if 'x' is a list it should only contain Chromatogram objects") + ## Subset the Chromatogram objects if rt provided. + if (!missing(rt)) { + rt <- range(rt) + x <- lapply(x, function(z) { + if (is(z, "Chromatogram")) + filterRt(z, rt = rt) + }) + } + if (length(col) != length(x)) { + col <- rep(col[1], length(x)) + } + ## If main is NULL use the mz range. + if (is.null(main)) { + mzr <- range(lapply(x, mz), na.rm = TRUE, finite = TRUE) + main <- paste0(format(mzr, digits = 7), collapse = " - ") + } + ## Number of measurements we've got per chromatogram. This can be different + ## between samples, from none (if not a single measurement in the rt/mz) + ## to the number of data points that were actually measured. + lens <- unique(lengths(x)) + max_len <- max(lens) + max_len_vec <- rep_len(NA, max_len) + ## Generate the matrix of rt values, columns are samples, rows retention + ## time values. Fill each column with NAs up to the maximum number of values + ## we've got in a sample/file. + rts <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (cur_len == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- rtime(z) + max_len_vec + } + })) + ## Same for the intensities. + ints <- do.call(cbind, lapply(x, function(z) { + cur_len <- length(z) + if (length(z) == 0) + max_len_vec + else { + ## max_len_vec[,] <- NA ## don't need that. get's copied. + max_len_vec[seq_len(cur_len)] <- intensity(z) + max_len_vec + } + })) + ## Define the x and y limits + x_lim <- c(0, 1) + y_lim <- c(0, 1) + if (all(is.na(rts))) + if (!missing(rt)) + x_lim <- range(rt) + else + x_lim <- range(rts, na.rm = TRUE, finite = TRUE) + if (!all(is.na(ints))) + y_lim <- range(ints, na.rm = TRUE, finite = TRUE) + ## Identify columns that have only NAs in either intensity or rt - these + ## will not be plotted. + keepCol <- which(apply(ints, MARGIN = 2, function(z) any(!is.na(z))) | + apply(rts, MARGIN = 2, function(z) any(!is.na(z)))) + ## Finally plot the data. + if (length(keepCol)) { + matplot(x = rts[, keepCol, drop = FALSE], + y = ints[, keepCol, drop = FALSE], type = type, lty = lty, + col = col[keepCol], xlab = xlab, ylab = ylab, main = main, + ...) + } else + plot(x = 3, y = 3, pch = NA, xlab = xlab, ylab = ylab, main = main, + xlim = x_lim, ylim = y_lim) +} + + + +#' @description The \code{highlightChromPeaks} function adds chromatographic +#' peak definitions to an existing plot, such as one created by the +#' \code{plotChromatograms} function. +#' +#' @param mz \code{numeric(2)} with the mz range from which the peaks should +#' be extracted and plotted. +#' +#' @param border colors to be used to color the border of the rectangles. Has to +#' be equal to the number of samples in \code{x}. +#' +#' @param lwd \code{numeric(1)} defining the width of the line/border. +#' +#' @rdname plotChromatogram +highlightChromPeaks <- function(x, rt, mz, + border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), + ...) { + type <- match.arg(type) + if (missing(rt)) + rt <- c(-Inf, Inf) + if (missing(mz)) + mz <- c(-Inf, Inf) + if (!is(x, "XCMSnExp")) + stop("'x' has to be a XCMSnExp object") + if (!hasChromPeaks(x)) + stop("'x' does not contain any detected peaks") + pks <- chromPeaks(x, rt = rt, mz = mz, ppm = 0) + if (length(col) != length(fileNames(x))) + col <- rep(col[1], length(fileNames(x))) + if (length(border) != length(fileNames(x))) + border <- rep(border[1], length(fileNames(x))) + if (length(pks)) { + if (type == "rect") + rect(xleft = pks[, "rtmin"], xright = pks[, "rtmax"], + ybottom = rep(0, nrow(pks)), ytop = pks[, "maxo"], + border = border[pks[, "sample"]], lwd = lwd, + col = col[pks[, "sample"]]) + if (type == "point") { + if (any(is.na(col))) + col <- border + ## Draw a star at the position defined by the "rt" column + points(x = pks[, "rt"], y = pks[, "maxo"], + col = col[pks[, "sample"]], ...) + } + } +} + diff --git a/R/functions-IO.R b/R/functions-IO.R index 031c2c1fb..140f58805 100644 --- a/R/functions-IO.R +++ b/R/functions-IO.R @@ -13,7 +13,7 @@ isCdfFile <- function(x) { res <- sapply(patts, function(z) { grep(z, x, ignore.case = TRUE) }) - return(any(unlist(res))) + any(unlist(res)) } ############################################################ @@ -21,15 +21,30 @@ isCdfFile <- function(x) { ## ## Just guessing whether the file is a mzML file based on its ending. isMzMLFile <- function(x) { - fileEnds <- c("mzxml", "mzml", "mzdata") + fileEnds <- c("mzml") ## check for endings and and ending followed by a . (e.g. mzML.gz) patts <- paste0("\\.", fileEnds, "($|\\.)") res <- sapply(patts, function(z) { grep(z, x, ignore.case = TRUE) }) - return(any(unlist(res))) + any(unlist(res)) } +############################################################ +## isRampFile +## +## Files that have to be read using the Ramp backend. +isRampFile <- function(x) { + fileEnds <- c("mzxml", "mzdata") + ## check for endings and and ending followed by a . (e.g. mzML.gz) + patts <- paste0("\\.", fileEnds, "($|\\.)") + res <- sapply(patts, function(z) { + grep(z, x, ignore.case = TRUE) + }) + any(unlist(res)) +} + + ############################################################ ## readRawData ## @@ -40,8 +55,6 @@ isMzMLFile <- function(x) { ##' @param x The file name. ##' @param includeMSn logical(1) indicating whether MS level > 1 should be loaded ##' too. Only supported for mzML files. -##' @param backendMzML Default backend to be used for mzML files. Can b wither -##' \code{"Ramp"} or \code{"pwiz"}. ##' @param dropEmptyScans Scans/spectra without peaks are not returned if ##' \code{dropEmptyScans = TRUE}. If \code{FALSE} all spectra from the input ##' file are returned. This is to be consistent with the code before @@ -49,20 +62,20 @@ isMzMLFile <- function(x) { ##' https://github.com/sneumann/xcms/issues/67). ##' @return A \code{list} with rt, tic, scanindex, mz and intensity. ##' @noRd -readRawData <- function(x, includeMSn = FALSE, backendMzML = "Ramp", - dropEmptyScans = TRUE) { +readRawData <- function(x, includeMSn = FALSE, dropEmptyScans = TRUE) { ## def_backend <- "Ramp" ## Eventually use pwiz... header_cols <- c("retentionTime", "acquisitionNum", "totIonCurrent") - if (isCdfFile(x)) { + backend <- NA + if (isCdfFile(x)) backend <- "netCDF" - } else { - if (isMzMLFile(x)) { - backend <- backendMzML - header_cols <- c(header_cols, "polarity") - } else { - stop("Unknown file type.") - } + if (isRampFile(x)) + backend <- "Ramp" + if (isMzMLFile(x)) { + backend <- "pwiz" + header_cols <- c(header_cols, "polarity") } + if (is.na(backend)) + stop("Unsupported file type.") msd <- mzR::openMSfile(x, backend = backend) on.exit(if(!is.null(msd)) mzR::close(msd)) ## That's due to issue https://github.com/lgatto/MSnbase/issues/151 @@ -139,5 +152,6 @@ readRawData <- function(x, includeMSn = FALSE, backendMzML = "Ramp", } mzR::close(msd) mzR <- NULL + gc() resList } diff --git a/R/functions-MsFeatureData.R b/R/functions-MsFeatureData.R index 16f9758ce..969530841 100644 --- a/R/functions-MsFeatureData.R +++ b/R/functions-MsFeatureData.R @@ -1,143 +1,165 @@ ## Functions for MsFeatureData classes. #' @include DataClasses.R +##' Validates a 'chromPeaks' matrix or data.frame and ensures that it contains all +##' required columns and that all columns are of numeric data type. +##' @return \code{TRUE} or a \code{character} with the error message. +##' @noRd +.validChromPeaksMatrix <- function(x) { + msg <- character() + if (length(x)) { + if (!(is.matrix(x) | is.data.frame(x))) + return(paste0("'chromPeaks' has to be a matrix or a data.frame!")) + hasReqCols <- .REQ_PEAKS_COLS %in% colnames(x) + if (any(!hasReqCols)) + return(paste0("Required columns ", + paste0("'", .REQ_PEAKS_COLS[!hasReqCols], + "'", collapse = ", "), " not", + " present in 'chromPeaks' matrix!")) + ## Check data.types - all have to be numeric. + typeOK <- apply(x[, .REQ_PEAKS_COLS, drop = FALSE], MARGIN = 2, + is.numeric) + if (any(!typeOK)) + return(paste0("Values in column(s) ", + paste0("'", names(typeOK)[!typeOK], "'", collapse = ", ")), + " of the 'chromPeaks' matrix are not numeric!") + } + return(TRUE) +} + + ##' @description Performs a validation check of all elements within the object: -##' 1) Allowed are: features (matrix), featureGroups (DataFrame) and +##' 1) Allowed are: chromPeaks (matrix), featureDefinitions (DataFrame) and ##' adjustedRtime (list). ##' @author Johannes Rainer ##' @return \code{TRUE} if object is valid, or a message with the error message. ##' @noRd validateMsFeatureData <- function(x) { - msg <- validMsg(NULL, NULL) + msg <- character() ks <- ls(x) if (length(ks)) { - validKeys <- ks %in% c("features", "featureGroups", "adjustedRtime") + validKeys <- ks %in% c("chromPeaks", "featureDefinitions", + "adjustedRtime") if (!all(validKeys)) { - msg <- validMsg(msg, paste0("Only elements named 'features', ", - "'featureGroups' and 'adjustedRtime' ", - "are allowed but I got ", - paste0("'", ks[!validKeys],"'"))) + msg <- c(msg, paste0("Only elements named 'chromPeaks', ", + "'featureDefinitions' and 'adjustedRtime' ", + "are allowed but I got ", + paste0("'", ks[!validKeys],"'"))) } - haveFts <- any(ks == "features") + haveFts <- any(ks == "chromPeaks") if (haveFts) { - if (is.matrix(x$features)) { - hasReqCols <- .XCMS_REQ_FEATS_COLS %in% colnames(x$features) - if (any(!hasReqCols)) - msg <- validMsg(msg, - paste0("Required columns ", - paste0("'", .XCMS_REQ_FEATS_COLS[!hasReqCols], - "'", collapse = ", "), " not", - " present in 'features' matrix!")) - } else { - msg <- validMsg(msg, paste0("The 'features' element has to be ", - "of type 'matrix' and not '", - class(x$features), "'!")) - } + OK <- .validChromPeaksMatrix(x$chromPeaks) + if (is.character(OK)) + msg <- c(msg, OK) } - haveFGs <- any(ks == "featureGroups") + haveFGs <- any(ks == "featureDefinitions") if (haveFGs) { - if (is(x$featureGroups, "DataFrame")) { + if (is(x$featureDefinitions, "DataFrame")) { ## Check required columns. - hasReqCols <- .XCMS_REQ_FEATG_COLS %in% colnames(x$featureGroups) + hasReqCols <- .REQ_PEAKG_COLS %in% colnames(x$featureDefinitions) if (any(!hasReqCols)) { - msg <- validMsg(msg, - paste0("Required columns ", - paste0("'", .XCMS_REQ_FEATG_COLS[!hasReqCols], - "'", collapse = ", "), " not", - " present in 'featureGroups'!")) + msg <- c(msg, + paste0("Required columns ", + paste0("'", .REQ_PEAKG_COLS[!hasReqCols], + "'", collapse = ", "), " not", + " present in 'featureDefinitions'!")) } else { ## Check content! - if (!is(x$featureGroups$featureidx, "list")) - msg <- validMsg(msg, - paste0("Column 'featureidx' in '", - "featureGroups' is not a list!")) - for (col in .XCMS_REQ_FEATG_COLS[-length(.XCMS_REQ_FEATG_COLS)]) { - if (!is.numeric(x$featureGroups[, col])) - msg <- validMsg(msg, paste0("Column '", col, "' has", - " to be numeric!")) + if (!is(x$featureDefinitions$peakidx, "list")) + msg <- c(msg, + paste0("Column 'peakidx' in '", + "featureDefinitions' is not a list!")) + for (col in .REQ_PEAKG_COLS[-length(.REQ_PEAKG_COLS)]) { + if (!is.numeric(x$featureDefinitions[, col])) + msg <- c(msg, paste0("Column '", col, "' has", + " to be numeric!")) } if (haveFts) { - ## Check that indices are within 1:nrow(x$features) - if (!all(unlist(x$featureGroups$featureidx) %in% - 1:nrow(x$features))) - msg <- validMsg(msg, - paste0("Some of the indices in column", - " 'featureidx' of element ", - "'featureGroups' do not match ", - "rows of the 'features' matrix!")) + ## Check that indices are within 1:nrow(x$chromPeaks) + if (!all(unlist(x$featureDefinitions$peakidx) %in% + 1:nrow(x$chromPeaks))) + msg <- c(msg, + paste0("Some of the indices in column", + " 'peakidx' of element ", + "'featureDefinitions' do not match ", + "rows of the 'chromPeaks' matrix!")) } } } else { - msg <- validMsg(msg, paste0("The 'featureGroups' element has to", - " be of type 'DataFrame' and not '", - class(x$featureGroups), "'!")) + msg <- c(msg, paste0("The 'featureDefinitions' element has to", + " be of type 'DataFrame' and not '", + class(x$featureDefinitions), "'!")) } if (!haveFts) { - msg <- validMsg(msg, paste0("Can not have element 'featureGroups'", - " without element 'features'!")) + msg <- c(msg, paste0("Can not have element 'featureDefinitions'", + " without element 'chromPeaks'!")) } } haveRts <- any(ks == "adjustedRtime") if (haveRts) { - if (!haveFts) - msg <- validMsg(msg, paste0("Can not have element 'adjustedRtime'", - " without element 'features'!")) + ## Not true, since obiwarp works without peaks. + ## if (!haveFts) + ## msg <- c(msg, paste0("Can not have element 'adjustedRtime'", + ## " without element 'chromPeaks'!")) ## adjustedRtime has to be a list of numerics. if (!is.list(x$adjustedRtime)) { - msg <- validMsg(msg, paste0("The 'alignedRtime' element has to ", - "be of type 'list' and not '", - class(x$adjustedRtime), "'!")) + msg <- c(msg, paste0("The 'alignedRtime' element has to ", + "be of type 'list' and not '", + class(x$adjustedRtime), "'!")) } else { areNum <- unlist(lapply(x$adjustedRtime, function(z) { return(is.numeric(z)) })) if (!all(areNum)) - msg <- validMsg(msg, paste0("The 'alignedRtime' element has", - " to be a list of numeric ", - "vectors!")) + msg <- c(msg, paste0("The 'alignedRtime' element has", + " to be a list of numeric ", + "vectors!")) } } } - if (is.null(msg)) - return(TRUE) - else return(msg) + ## if (length(msg) == 0) + ## return(TRUE) + ## else return(msg) + return(msg) } -##' @description Filter features and sync them with with the present -##' filterGroups, i.e. update their featureidx column or remove them. +##' @description Filter chromPeaks and sync them with with the present +##' filterGroups, i.e. update their peakidx column or remove them. ##' ##' @param x A \code{MsFeatureData} or an \code{XCMSnExp} object. -##' @param idx \code{numeric} with the indices of the features to keep. +##' @param idx \code{numeric} with the indices of the chromatographic peaks to +##' keep. ##' ##' @return A \code{MsFeatureData}. ##' @author Johannes Rainer ##' @noRd -.filterFeatures <- function(x, idx) { +.filterChromPeaks <- function(x, idx) { if (missing(idx)) return(x) - if (!hasDetectedFeatures(x)) + if (!hasChromPeaks(x)) return(x) - fts <- features(x) + fts <- chromPeaks(x) idx <- sort(idx) if (!all(idx %in% 1:nrow(fts))) - stop("All indices in 'idx' have to be within 1 and nrow of the feature", + stop("All indices in 'idx' have to be within 1 and nrow of the peak", " matrix.") new_e <- new("MsFeatureData") - features(new_e) <- fts[idx, , drop = FALSE] - if (hasAlignedFeatures(x)) { - af <- featureGroups(x) + chromPeaks(new_e) <- fts[idx, , drop = FALSE] + if (hasFeatures(x)) { + af <- featureDefinitions(x) af <- split(af, 1:nrow(af)) afL <- lapply(af, function(z) { - if(all(z$featureidx[[1]] %in% idx)) { - z$featureidx <- list(match(z$featureidx[[1]], idx)) + if(all(z$peakidx[[1]] %in% idx)) { + z$peakidx <- list(match(z$peakidx[[1]], idx)) return(z) } else { return(NULL) } }) af <- do.call(rbind, afL) - featureGroups(new_e) <- af + if (length(af) > 0) + featureDefinitions(new_e) <- af } if (hasAdjustedRtime(x)) { if (is(x, "XCMSnExp")) diff --git a/R/functions-OnDiskMSnExp.R b/R/functions-OnDiskMSnExp.R index 1ba5784b2..a5ea487f6 100644 --- a/R/functions-OnDiskMSnExp.R +++ b/R/functions-OnDiskMSnExp.R @@ -1,44 +1,51 @@ ## Functions for MSnbase's OnDiskMSnExp objects -#' @include do_detectFeatures-functions.R DataClasses.R +#' @include do_findChromPeaks-functions.R DataClasses.R ##' @param x an OnDiskMSnExp representing the whole experiment. -##' @param method The feature detection method to be used. Can be "centWave" etc. +##' @param method The (chromatographic) peak detection method to be used. Can be +##' "centWave" etc. ##' @param param A class extending Param containing all parameters for the -##' feature detection method. +##' peak detection method. ##' ##' @return a list of length 2, \code{peaks} containing a matrix with the -##' identified peaks and \code{date} the time stamp when the feature detection +##' identified peaks and \code{date} the time stamp when the peak detection ##' was started. ##' @noRd -detectFeatures_OnDiskMSnExp <- function(object, method = "centWave", +findChromPeaks_OnDiskMSnExp <- function(object, method = "centWave", param) { if (missing(param)) stop("'param' has to be specified!") ## pass the spectra to the _Spectrum_list function - return(detectFeatures_Spectrum_list(x = spectra(object), method = method, + ## Since we're calling this function already with bplapply ensure that + ## the spectra call is not firing its own parallel processing! + return(findChromPeaks_Spectrum_list(x = spectra(object, + BPPARAM = SerialParam()), + method = method, param = param, rt = rtime(object))) } -##' Run the feature detection on a list of Spectrum1 objects from the same +##' Run the peak detection on a list of Spectrum1 objects from the same ##' file ##' ##' @param x A list of Spectrum1 objects of a sample. -##' @param method The feature detection method to be used. Can be "centWave" etc. +##' @param method The peak detection method to be used. Can be "centWave" etc. ##' @param param A class extending Param containing all parameters for the -##' feature detection method. +##' peak detection method. ##' @param rt Numeric with the retention times for the spectra. If not provided ##' it is extracted from the spectra. ##' @return a list of length 2, \code{peaks} containing a matrix with the -##' identified peaks and \code{date} the time stamp when the feature detection +##' identified peaks and \code{date} the time stamp when the peak detection ##' was started. ##' @author Johannes Rainer ##' @noRd -detectFeatures_Spectrum_list <- function(x, method = "centWave", param, rt) { +findChromPeaks_Spectrum_list <- function(x, method = "centWave", param, rt) { method <- match.arg(method, c("centWave", "massifquant", "matchedFilter", "MSW", "centWaveWithPredIsoROIs")) - method <- paste0("do_detectFeatures_", method) + method <- paste0("do_findChromPeaks_", method) + if (method == "MSW") + method <- paste0("do_findPeaks_", method) if (missing(param)) stop("'param' has to be specified!") ## Check if the spectra are orderd by rt. @@ -48,29 +55,35 @@ detectFeatures_Spectrum_list <- function(x, method = "centWave", param, rt) { stop("Spectra are not ordered by retention time!") mzs <- lapply(x, mz) procDat <- date() - return(list(peaks = do.call(method, - args = c(list(mz = unlist(mzs, - use.names = FALSE), - int = unlist(lapply(x, intensity), - use.names = FALSE), - valsPerSpect = lengths(mzs, FALSE), - scantime = rt), - as(param, "list"))), - date = procDat)) + res <- do.call(method, args = c(list(mz = unlist(mzs, + use.names = FALSE), + int = unlist(lapply(x, intensity), + use.names = FALSE), + valsPerSpect = lengths(mzs, FALSE), + scantime = rt), + as(param, "list"))) + ## Ensure that we call the garbage collector to eventually clean unused stuff + rm(mzs) + rm(x) + rm(rt) + gc() + return(list(peaks = res, date = procDat)) } ## That's a special case since we don't expect to have rt available for this. -detectFeatures_MSW_OnDiskMSnExp <- function(object, method = "MSW", +findPeaks_MSW_OnDiskMSnExp <- function(object, method = "MSW", param) { if (missing(param)) stop("'param' has to be specified!") ## pass the spectra to the _Spectrum_list function - return(detectFeatures_MSW_Spectrum_list(x = spectra(object), method = method, - param = param)) + return(findPeaks_MSW_Spectrum_list(x = spectra(object, + BPPARAM = SerialParam()), + method = method, + param = param)) } -detectFeatures_MSW_Spectrum_list <- function(x, method = "MSW", param) { +findPeaks_MSW_Spectrum_list <- function(x, method = "MSW", param) { method <- match.arg(method, c("MSW")) - method <- paste0("do_detectFeatures_", method) + method <- paste0("do_findPeaks_", method) if (missing(param)) stop("'param' has to be specified!") mzs <- lapply(x, mz) @@ -118,9 +131,9 @@ detectFeatures_MSW_Spectrum_list <- function(x, method = "MSW", param) { } ##' @description Processes the result list returned by an lapply/bplapply to -##' detectFeatures_Spectrum_list or detectFeatures_OnDiskMSnExp and returns a +##' findChromPeaks_Spectrum_list or findChromPeaks_OnDiskMSnExp and returns a ##' list with two elements: \code{$peaks} the peaks matrix of identified -##' features and \code{$procHist} a list of ProcessHistory objects (empty if +##' peaks and \code{$procHist} a list of ProcessHistory objects (empty if ##' \code{getProcHist = FALSE}). ##' @param x See description above. ##' @param getProcHist Wheter ProcessHistory objects should be returned too. @@ -143,14 +156,183 @@ detectFeatures_MSW_Spectrum_list <- function(x, method = "MSW", param) { pks[[i]] <- cbind(x[[i]]$peaks, sample = rep.int(i, n_pks)) } if (getProcHist) - phList[[i]] <- ProcessHistory(info. = paste0("Feature detection in '", - basename(fnames[i]), - "': ", n_pks, - " features identified."), - date. = x[[i]]$date, - type. = .PROCSTEP.FEATURE.DETECTION, - fileIndex. = i - ) + phList[[i]] <- ProcessHistory( + info. = paste0("Chromatographic peak detection in '", + basename(fnames[i]), "': ", n_pks, + " peaks identified."), + date. = x[[i]]$date, + type. = .PROCSTEP.PEAK.DETECTION, + fileIndex. = i + ) } return(list(peaks = pks, procHist = phList)) } + + +##' Calculate adjusted retention times by aligning each sample against a center +##' sample. +##' @param object An \code{OnDiskMSnExp}. +##' @param param An \code{ObiwarpParam}. +##' @return The function returns a \code{list} of adjusted retention times +##' grouped by file. +##' @noRd +.obiwarp <- function(object, param) { + if (missing(object)) + stop("'object' is mandatory!") + if (missing(param)) + param <- ObiwarpParam() + nSamples <- length(fileNames(object)) + if (nSamples <= 1) + stop("Can not perform a retention time correction on less than to files.") + + ## centerSample + if (length(centerSample(param))) { + if (!(centerSample(param) %in% 1:nSamples)) + stop("'centerSample' has to be a single integer between 1 and ", + nSamples, "!") + } else { + centerSample(param) <- floor(median(1:nSamples)) + } + message("Sample number ", centerSample(param), " used as center sample.") + + ## Get the profile matrix of the center sample: + ## Using the (hidden) parameter returnBreaks to return also the breaks of + ## the bins of the profile matrix. I can use them to align the matrices + ## later. + ## NOTE: it might be event better to just re-use the breaks from the center + ## sample for the profile matrix generation of all following samples. + suppressMessages( + profCtr <- profMat(object, method = "bin", step = binSize(param), + fileIndex = centerSample(param), + returnBreaks = TRUE)[[1]] + ) + ## Now split the object by file + objL <- splitByFile(object, f = factor(seq_len(nSamples))) + objL <- objL[-centerSample(param)] + centerObject <- filterFile(object, file = centerSample(param)) + ## Now we can bplapply here! + res <- bplapply(objL, function(z, cntr, cntrPr, parms) { + message("Aligning ", basename(fileNames(z)), " against ", + basename(fileNames(cntr)), " ... ", appendLF = FALSE) + ## Get the profile matrix for the current file. + suppressMessages( + curP <- profMat(z, method = "bin", step = binSize(parms), + returnBreaks = TRUE)[[1]] + ) + ## --------------------------------------- + ## 1)Check the scan times of both objects: + scantime1 <- unname(rtime(cntr)) + scantime2 <- unname(rtime(z)) + ## median difference between spectras' scan time. + mstdiff <- median(c(diff(scantime1), diff(scantime2))) + + ## rtup1 <- seq_along(scantime1) + ## rtup2 <- seq_along(scantime2) + + mst1 <- which(diff(scantime1) > 5 * mstdiff)[1] + if (!is.na(mst1)) { + scantime1 <- scantime1[seq_len((mst1 - 1))] + message("Found gaps in scan times of the center sample: cut ", + "scantime-vector at ", scantime1[mst1]," seconds.") + } + mst2 <- which(diff(scantime2) > 5 * mstdiff)[1] + if(!is.na(mst2)) { + scantime2 <- scantime2[seq_len((mst2 - 1))] + message("Found gaps in scan time of file ", basename(fileNames(z)), + ": cut scantime-vector at ", scantime2[mst2]," seconds.") + } + ## Drift of measured scan times - expected to be largest at the end. + rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)], + scantime2[length(scantime2)]))) + ## If the drift is larger than the threshold, cut the matrix up to the + ## max allowed difference. + if(rtmaxdiff > (5 * mstdiff)){ + rtmax <- min(scantime1[length(scantime1)], + scantime2[length(scantime2)]) + scantime1 <- scantime1[scantime1 <= rtmax] + scantime2 <- scantime2[scantime2 <= rtmax] + } + valscantime1 <- length(scantime1) + valscantime2 <- length(scantime2) + ## Finally, restrict the profile matrix to columns 1:valscantime + if (ncol(cntrPr$profMat) > valscantime1) { + cntrPr$profMat <- cntrPr$profMat[, -c((valscantime1 + 1): + ncol(cntrPr$profMat))] + } + if(ncol(curP$profMat) > valscantime2) { + curP$profMat <- curP$profMat[, -c((valscantime2 + 1): + ncol(curP$profMat))] + } + ## --------------------------------- + ## 2) Now match the breaks/mz range. + ## The -1 below is because the breaks define the upper and lower + ## boundary. Have to do it that way to be in line with the orignal + ## code... would be better to use the breaks as is. + mzr1 <- c(cntrPr$breaks[1], cntrPr$breaks[length(cntrPr$breaks) - 1]) + mzr2 <- c(curP$breaks[1], curP$breaks[length(curP$breaks) - 1]) + mzmin <- min(c(mzr1[1], mzr2[1])) + mzmax <- max(c(mzr1[2], mzr2[2])) + mzs <- seq(mzmin, mzmax, by = binSize(parms)) + ## Eventually add empty rows at the beginning + if (mzmin < mzr1[1]) { + tmp <- matrix(0, (length(seq(mzmin, mzr1[1], binSize(parms))) - 1), + ncol = ncol(cntrPr$profMat)) + cntrPr$profMat <- rbind(tmp, cntrPr$profMat) + } + ## Eventually add empty rows at the end + if (mzmax > mzr1[2]) { + tmp <- matrix(0, (length(seq(mzr1[2], mzmax, binSize(parms))) - 1), + ncol = ncol(cntrPr$profMat)) + cntrPr$profMat <- rbind(cntrPr$profMat, tmp) + } + ## Eventually add empty rows at the beginning + if (mzmin < mzr2[1]) { + tmp <- matrix(0, (length(seq(mzmin, mzr2[1], binSize(parms))) - 1), + ncol = ncol(curP$profMat)) + curP$profMat <- rbind(tmp, curP$profMat) + } + ## Eventually add empty rows at the end + if (mzmax > mzr2[2]) { + tmp <- matrix(0, (length(seq(mzr2[2], mzmax, binSize(parms))) - 1), + ncol = ncol(curP$profMat)) + curP$profMat <- rbind(curP$profMat, tmp) + } + ## A final check of the data. + mzvals <- length(mzs) + cntrVals <- length(cntrPr$profMat) + curVals <- length(curP$profMat) + if ((mzvals * valscantime1) != cntrVals | (mzvals * valscantime2) != curVals + | cntrVals != curVals) + stop("Dimensions of profile matrices of files ", + basename(fileNames(cntr)), " and ", basename(fileNames(z)), + " do not match!") + ## Done with preparatory stuff - now I can perform the alignment. + rtadj <- .Call("R_set_from_xcms", valscantime1, scantime1, mzvals, mzs, + cntrPr$profMat, valscantime2, scantime2, mzvals, mzs, + curP$profMat, response(parms), distFun(parms), + gapInit(parms), gapExtend(parms), factorDiag(parms), + factorGap(parms), as.numeric(localAlignment(parms)), + initPenalty(parms)) + if (length(rtime(z)) > valscantime2) { + ## Adding the raw retention times if we were unable to align all of + ## them. + rtadj <- c(rtadj, rtime(z)[(valscantime2 + 1):length(rtime(z))]) + warning(basename(fileNames(z)), " :could only align up to a ", + "retention time of ", rtime(z)[valscantime2], " seconds. ", + "After that raw retention times are reported.") + } + message("OK") + return(rtadj) + ## Related to issue #122: try to resemble the rounding done in the + ## recor.obiwarp method. + ## return(round(rtadj, 2)) + }, cntr = centerObject, cntrPr = profCtr, parms = param) + ## Add also the rtime of the center sample: + adjRt <- vector("list", nSamples) + adjRt[centerSample(param)] <- list(unname(rtime(centerObject))) + ## Add the result. + idxs <- 1:nSamples + idxs <- idxs[idxs != centerSample(param)] + adjRt[idxs] <- res + return(adjRt) +} diff --git a/R/functions-Params.R b/R/functions-Params.R index d016038bf..5cd15d4ba 100644 --- a/R/functions-Params.R +++ b/R/functions-Params.R @@ -35,14 +35,49 @@ } } +## Just get the name of the algorithm for each Parameter class. +.param2string <- function(x) { + if (is(x, "CentWaveParam")) + return("centWave") + if (is(x, "MatchedFilterParam")) + return("matchedFilter") + if (is(x, "MassifquantParam")) + return("massifquant") + if (is(x, "MSWParam")) + return("MSW") + if (is(x, "CentWavePredIsoParam")) + return("centWave with predicted isotope ROIs") + if (is(x, "PeakDensityParam")) + return("chromatographic peak density") + if (is(x, "MzClustParam")) + return("mzClust") + if (is(x, "NearestPeaksParam")) + return("nearest peaks") + if (is(x, "PeakGroupsParam")) + return("peak groups") + if (is(x, "ObiwarpParam")) + return("obiwarp") + return("unknown") +} + +############################################################ +## GenericParam +#' @return The \code{GenericParam} function returns a \code{GenericParam} object. +#' @param fun \code{character} representing the name of the function. +#' @param args \code{list} (ideally named) with the arguments to the function. +#' @rdname GenericParam +GenericParam <- function(fun = character(), args = list()) { + return(new("GenericParam", fun = fun, args = args)) +} + ############################################################ ## CentWaveParam ##' @return The \code{CentWaveParam} function returns a \code{CentWaveParam} -##' class instance with all of the settings specified for feature detection by -##' the centWave method. +##' class instance with all of the settings specified for chromatographic peak +##' detection by the centWave method. ##' -##' @rdname featureDetection-centWave +##' @rdname findChromPeaks-centWave CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -62,9 +97,9 @@ CentWaveParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, ##' @return The \code{MatchedFilterParam} function returns a ##' \code{MatchedFilterParam} class instance with all of the settings specified -##' for feature detection by the centWave method. +##' for chromatographic detection by the \emph{matchedFilter} method. ##' -##' @rdname featureDetection-matchedFilter +##' @rdname findChromPeaks-matchedFilter MatchedFilterParam <- function(binSize = 0.1, impute = "none", baseValue = numeric(), distance = numeric(), fwhm = 30, sigma = fwhm / 2.3548, @@ -75,15 +110,27 @@ MatchedFilterParam <- function(binSize = 0.1, impute = "none", sigma = sigma, max = max, snthresh = snthresh, steps = steps, mzdiff = mzdiff, index = index)) } +#' Convert the impute method to the old-style method name (e.g. for profMat +#' calls) +#' @noRd +.impute2method <- function(x) { + if (impute(x) == "none") + return("bin") + if (impute(x) == "lin") + return("binlin") + if (impute(x) == "linbase") + return("binlinbase") + return("intlin") +} ############################################################ ## MassifquantParam ##' @return The \code{MassifquantParam} function returns a \code{MassifquantParam} -##' class instance with all of the settings specified for feature detection by -##' the centWave method. +##' class instance with all of the settings specified for chromatographic peak +##' detection by the \emph{massifquant} method. ##' -##' @rdname featureDetection-massifquant +##' @rdname findChromPeaks-massifquant MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -102,7 +149,7 @@ MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, ############################################################ ## MSWParam -##' @inheritParams featureDetection-centWave +##' @inheritParams findChromPeaks-centWave ##' ##' @param scales Numeric defining the scales of the continuous wavelet ##' transform (CWT). @@ -136,10 +183,10 @@ MassifquantParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, ##' \code{MassSpecWavelet} package. ##' ##' @return The \code{MSWParam} function returns a \code{MSWParam} -##' class instance with all of the settings specified for feature detection by -##' the centWave method. +##' class instance with all of the settings specified for peak detection by +##' the \emph{MSW} method. ##' -##' @rdname featureDetection-MSW +##' @rdname findPeaks-MSW MSWParam <- function(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, 2), seq(32, 64, 4)), nearbyPeak = TRUE, peakScaleRange = 5, @@ -161,10 +208,10 @@ MSWParam <- function(snthresh = 3, verboseColumns = FALSE, ##' @return The \code{CentWavePredIsoParam} function returns a ##' \code{CentWavePredIsoParam} class instance with all of the settings -##' specified for the two-step centWave-based feature detection considering also -##' feature isotopes. +##' specified for the two-step centWave-based peak detection considering also +##' isotopes. ##' -##' @rdname featureDetection-centWaveWithPredIsoROIs +##' @rdname findChromPeaks-centWaveWithPredIsoROIs CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, @@ -182,3 +229,102 @@ CentWavePredIsoParam <- function(ppm = 25, peakwidth = c(20, 50), snthresh = 10, maxCharge = as.integer(maxCharge), mzIntervalExtension = mzIntervalExtension, polarity = polarity)) } + + +############################################################ +## PeakDensityParam + +##' @return The \code{PeakDensityParam} function returns a +##' \code{PeakDensityParam} class instance with all of the settings +##' specified for chromatographic peak alignment based on peak densities. +##' +##' @rdname groupChromPeaks-density +PeakDensityParam <- function(sampleGroups = numeric(), bw = 30, + minFraction = 0.5, minSamples = 1, + binSize = 0.25, maxFeatures = 50) { + return(new("PeakDensityParam", sampleGroups = sampleGroups, bw = bw, + minFraction = minFraction, minSamples = minSamples, + binSize = binSize, maxFeatures = maxFeatures)) +} + +############################################################ +## MzClustParam + +##' @return The \code{MzClustParam} function returns a +##' \code{MzClustParam} class instance with all of the settings +##' specified for high resolution single spectra peak alignment. +##' +##' @rdname groupChromPeaks-mzClust +MzClustParam <- function(sampleGroups = numeric(), ppm = 20, absMz = 0, + minFraction = 0.5, minSamples = 1) { + return(new("MzClustParam", sampleGroups = sampleGroups, ppm = ppm, + absMz = absMz, minFraction = minFraction, + minSamples = minSamples)) +} + + +############################################################ +## NearestPeaksParam + +##' @return The \code{NearestPeaksParam} function returns a +##' \code{NearestPeaksParam} class instance with all of the settings +##' specified for peak alignment based on peak proximity. +##' +##' @rdname groupChromPeaks-nearest +NearestPeaksParam <- function(sampleGroups = numeric(), mzVsRtBalance = 10, + absMz = 0.2, absRt = 15, kNN = 10) { + return(new("NearestPeaksParam", sampleGroups = sampleGroups, + mzVsRtBalance = mzVsRtBalance, absMz = absMz, absRt = absRt, + kNN = kNN)) +} + + +############################################################ +## PeakGroupsParam + +##' @return The \code{PeakGroupsParam} function returns a +##' \code{PeakGroupsParam} class instance with all of the settings +##' specified for retention time adjustment based on \emph{house keeping} +##' features/peak groups. +##' +##' @rdname adjustRtime-peakGroups +PeakGroupsParam <- function(minFraction = 0.9, extraPeaks = 1, + smooth = "loess", span = 0.2, + family = "gaussian", + peakGroupsMatrix = matrix(nrow = 0, ncol = 0)) { + return(new("PeakGroupsParam", minFraction = minFraction, + extraPeaks = extraPeaks, smooth = smooth, span = span, + family = family, peakGroupsMatrix = peakGroupsMatrix)) +} + + +############################################################ +## ObiwarpParam + +##' @return The \code{ObiwarpParam} function returns a +##' \code{ObiwarpParam} class instance with all of the settings +##' specified for obiwarp retention time adjustment and alignment. +##' +##' @rdname adjustRtime-obiwarp +ObiwarpParam <- function(binSize = 1, centerSample = integer(), response = 1L, + distFun = "cor_opt", gapInit = numeric(), + gapExtend = numeric(), factorDiag = 2, factorGap = 1, + localAlignment = FALSE, initPenalty = 0) { + return(new("ObiwarpParam", binSize = binSize, + centerSample = as.integer(centerSample), + response = as.integer(response), distFun = distFun, + gapInit = gapInit, gapExtend = gapExtend, factorDiag = factorDiag, + factorGap = factorGap, localAlignment = localAlignment, + initPenalty = initPenalty)) +} + +############################################################ +## FillChromPeaksParam + +#' @return The \code{FillChromPeaksParam} function returns a +#' \code{FillChromPeaksParam} object. +#' @rdname fillChromPeaks +FillChromPeaksParam <- function(expandMz = 0, expandRt = 0, ppm = 0) { + return(new("FillChromPeaksParam", expandMz = expandMz, expandRt = expandRt, + ppm = ppm)) +} diff --git a/R/functions-ProcessHistory.R b/R/functions-ProcessHistory.R index e6843be09..00867241b 100644 --- a/R/functions-ProcessHistory.R +++ b/R/functions-ProcessHistory.R @@ -2,6 +2,15 @@ ## Functions for ProcessHistory objects #' @include DataClasses.R +#' @description \code{processHistoryTypes} returns the available \emph{types} of +#' process histories. These can be passed with argument \code{type} to the +#' \code{processHistory} method to extract specific process step(s). +#' +#' @rdname XCMSnExp-class +processHistoryTypes <- function() { + .PROCSTEPS +} + ############################################################ ## Constructor ProcessHistory <- function(type., date., info., error., fileIndex.) { diff --git a/R/functions-XCMSnExp.R b/R/functions-XCMSnExp.R index d7e021652..e756540bf 100644 --- a/R/functions-XCMSnExp.R +++ b/R/functions-XCMSnExp.R @@ -1,53 +1,61 @@ -#' @include DataClasses.R - -##' Takes a XCMSnExp and drops ProcessHistory steps from the @.processHistory -##' slot matching the provided type. -##' -##' @return The XCMSnExp input object with selected ProcessHistory steps dropped. -##' @noRd -dropProcessHistories <- function(x, type) { - ## ## Drop processing history steps by type. - ## if (!missing(type)) { - ## toRem <- unlist(lapply(processHistory(x), function(z) { - ## return(processType(z) %in% type) - ## })) - ## if (any(toRem)) - ## x@.processHistory <- processHistory(x)[!toRem] - ## } - x@.processHistory <- dropProcessHistoriesList(processHistory(x), type = type) +#' @include DataClasses.R functions-utils.R + +#' @description Takes a XCMSnExp and drops ProcessHistory steps from the +#' @.processHistory slot matching the provided type. +#' +#' @param num which should be dropped? If \code{-1} all matching will be dropped, +#' otherwise just the most recent num. +#' +#' @return The XCMSnExp input object with selected ProcessHistory steps dropped. +#' +#' @noRd +dropProcessHistories <- function(x, type, num = -1) { + x@.processHistory <- dropProcessHistoriesList(processHistory(x), + type = type, num = num) return(x) } -dropProcessHistoriesList <- function(x, type) { +dropProcessHistoriesList <- function(x, type, num = -1) { if (!missing(type)) { toRem <- unlist(lapply(x, function(z) { return(processType(z) %in% type) })) - if (any(toRem)) - x <- x[!toRem] + if (any(toRem)) { + if (num < 0) { + x <- x[!toRem] + } else { + idx <- which(toRem) + idx <- tail(idx, n = num) + if (length(idx)) + x <- x[-idx] + } + } } return(x) } -##' Convert an XCMSnExp to an xcmsSet. -##' @noRd +#' Convert an XCMSnExp to an xcmsSet. +#' +#' @noRd .XCMSnExp2xcmsSet <- function(from) { if (any(msLevel(from) > 1)) stop("Coercing an XCMSnExp with MS level > 1 is not yet supported!") xs <- new("xcmsSet") - ## @peaks <- features - if (hasDetectedFeatures(from)) - xs@peaks <- features(from) - ## @groups <- part of featureGroups - ## @groupidx <- featureGroups(x)$featureidx - if (hasAlignedFeatures(from)){ - fgs <- featureGroups(from) - xs@groups <- as.matrix(fgs[, -ncol(fgs)]) - xs@groupidx <- fgs$featureidx + ## @peaks <- chromPeaks + if (hasChromPeaks(from)) + xs@peaks <- chromPeaks(from) + ## @groups <- part of featureDefinitions + ## @groupidx <- featureDefinitions(x)$peakidx + if (hasFeatures(from)){ + fgs <- featureDefinitions(from) + xs@groups <- S4Vectors::as.matrix(fgs[, -ncol(fgs)]) + rownames(xs@groups) <- NULL + xs@groupidx <- fgs$peakidx } ## @rt combination from rtime(x) and adjustedRtime(x) rts <- list() - rts$raw <- rtime(from, bySample = TRUE) + ## Ensure we're getting the raw rt + rts$raw <- rtime(from, bySample = TRUE, adjusted = FALSE) if (hasAdjustedRtime(from)) rts$corrected <- adjustedRtime(from, bySample = TRUE) else @@ -64,7 +72,7 @@ dropProcessHistoriesList <- function(x, type) { profStep <- 0.1 profParam <- list() ## If we've got any MatchedFilterParam we can take the values from there - ph <- processHistory(from, type = .PROCSTEP.FEATURE.DETECTION) + ph <- processHistory(from, type = .PROCSTEP.PEAK.DETECTION) if (length(ph)) { if (is(ph[[1]], "XProcessHistory")) { prm <- processParam(ph[[1]]) @@ -97,6 +105,10 @@ dropProcessHistoriesList <- function(x, type) { ## and if this is used at all. ## @filled ... not yet. + if (any(chromPeaks(from)[, "is_filled"] == 1)) { + fld <- which(chromPeaks(from)[, "is_filled"] == 1) + xs@filled <- as.integer(fld) + } ## @dataCorrection (numeric) ? in xcmsSet function, if lockMassFreq. ## @progressInfo skip ## @progressCallback skip @@ -108,18 +120,1035 @@ dropProcessHistoriesList <- function(x, type) { return(xs) } -##' @title Extract spectra subsets -##' -##' Extract subsets of spectra matching the given mz and retention time ranges. -##' @noRd -.spectraSubsets <- function(x, rtrange, mzrange) { - ## o Should allow to provide single ranges, but also matrices of rtrange - ## and/or mzranges. - ## o Perform the data fetching by file. - ## o Return the (mz subsetted) spectra by file and by ranges. - ## o Since data should be processed on a by-file basis representation of the - ## result as a list (files) of list(ranges) of list(spectra) seems to be - ## best. - ## SEE runit.XCMSnExp.R, +#' @description Extract a \code{data.frame} of retention time, mz and intensity +#' values from each file/sample in the provided rt-mz range. +#' +#' @note Ideally, \code{x} should be an \code{OnDiskMSnExp} object as subsetting +#' of a \code{XCMSnExp} object is more costly (removing of preprocessing +#' results, restoring data etc). If retention times reported in the +#' featureData are replaced by adjusted retention times, these are set +#' in the Spectrum objects as retention time. +#' +#' @param x An \code{OnDiskMSnExp} object. +#' +#' @param rt \code{numeric(2)} with the retention time range from which the +#' data should be extracted. +#' +#' @param mz \code{numeric(2)} with the mz range. +#' +#' @param return A \code{list} with length equal to the number of files and +#' each element being a \code{data.frame} with the extracted values. +#' +#' @noRd +#' +#' @author Johannes Rainer +.extractMsData <- function(x, rt, mz) { + if (!missing(rt)) { + rt <- range(rt, na.rm = TRUE) + if (length(rt) != 2) + stop("'rt' has to be a numeric of length 2!") + } + if (!missing(mz)) { + mz <- range(mz, na.rm = TRUE) + if (length(mz) != 2) + stop("'mz' has to be a numeric of length 2!") + fmzr <- mz + } else fmzr <- c(0, 0) + ## Subset the object based on rt and mz range. + subs <- filterMz(filterRt(x, rt = rt), mz = mz) + if (length(subs) == 0) { + ## Return a list with empty data.frames + empty_df <- data.frame(rt = numeric(), mz = numeric(), i = integer()) + return(lapply(1:length(fileNames(x)), FUN = function(z){empty_df})) + } + suppressWarnings( + dfs <- spectrapply(subs, FUN = function(z) { + if (!z@peaksCount) + return(data.frame(rt = numeric(), mz = numeric(), + i = integer())) + data.frame(rt = rep_len(z@rt, length(z@mz)), + mz = z@mz, i = z@intensity) + }) + ) + fns <- fileNames(x) + fromF <- base::match(fileNames(subs), fns) + + ## Now I want to rbind the spectrum data frames per file + L <- split(dfs, f = fromFile(subs)) + L <- lapply(L, do.call, what = rbind) + ## Put them into a vector same length that we have files. + res <- vector(mode = "list", length = length(fns)) + res[fromF] <- L + return(res) } + +#' @description This function extracts chromatograms efficiently for multiple +#' rt and mz ranges by loading the data per file only once and performing +#' the mz subsetting on the already loaded Spectrum1 classes. +#' +#' @note Ensure that x is an OnDiskMSnExp and not an e.g. XCMSnExp object. +#' Subsetting etc an XCMSnExp might take longer. +#' +#' @param rt \code{matrix} with two columns and number of rows corresponding to +#' the number of ranges to extract. +#' +#' @param mz \code{matrix} with two columns and number of rows corresponding to +#' the number of ranges to extract. nrow of rt and mz have to match. +#' +#' @param x OnDiskMSnExp object from which to extract the chromatograms. +#' +#' @param return.type either \code{"list"} or \code{"matrix"} to return the +#' result as a list or as a matrix. +#' +#' @param missingValue value to be used as intensity if no signal was measured +#' for a given rt. +#' +#' @return A \code{list} or \code{matrix} with the \code{Chromatogram} objects. +#' If no data was present for the specified \code{rtrange} and +#' \code{mzrange} the function returns a \code{list} of length \code{0}. +#' The \code{list} is arranged first by ranges and then by files, such that +#' \code{result[[1]]} returns a \code{list} of \code{Chromatogram} objects +#' for the same rt/mz range. +#' For \code{return.type = "matrix"} a \code{matrix} is returned with rows +#' corresponding to ranges and columns to files/samples. \code{result[, 1]} +#' will thus return a \code{list} of \code{Chromatogram} objects for the +#' first sample/file, while \code{result[1, ]} returns a \code{list} of +#' \code{Chromatogram} objects for the same rt/mz range for all files. +#' +#' @author Johannes Rainer +#' +#' @noRd +.extractMultipleChromatograms <- function(x, rt, mz, aggregationFun = "sum", + BPPARAM = bpparam(), + return.type = c("list", "matrix"), + missingValue = NA_real_) { + return.type <- match.arg(return.type) + missingValue <- as.numeric(missingValue) + if (!any(.SUPPORTED_AGG_FUN_CHROM == aggregationFun)) + stop("'aggregationFun' should be one of ", + paste0("'", .SUPPORTED_AGG_FUN_CHROM, "'", collapse = ", ")) + ## Ensure we're working on MS1 only! + x <- filterMsLevel(x, 1) + if (length(x) == 0) + return(list()) + nranges <- 1 + if (missing(rt)) + rt <- matrix(c(-Inf, Inf), nrow = 1) + if (missing(mz)) + mz <- matrix(c(-Inf, Inf), nrow = 1) + if (!missing(rt)) { + if (ncol(rt) != 2) + stop("'rt' has to be a matrix with two columns") + ## Replicate if nrow rt is 1 to match nrow of mz. + if (nrow(rt) == 1) + rt <- matrix(rep(rt, nrow(mz)), ncol = 2, byrow = TRUE) + } + if (!missing(mz)) { + if (ncol(mz) != 2) + stop("'mz' has to be a matrix with two coliumns") + if (nrow(mz) == 1) + mz <- matrix(rep(mz, nrow(rt)), ncol = 2, byrow = TRUE) + } + if (nrow(rt) != nrow(mz)) + stop("dimensions of 'rt' and 'mz' have to match") + ## Identify indices of all spectra that are within the rt ranges. + rtimes <- rtime(x) + + ## 1) Subset x keeping all spectra that fall into any of the provided rt + ## ranges. + keep_idx <- unlist(apply(rt, MARGIN = 1, function(z) + which(rtimes >= z[1] & rtimes <= z[2])), use.names = FALSE) + keep_idx <- sort(unique(as.integer(keep_idx))) + if (length(keep_idx) == 0) + return(list()) + subs <- x[keep_idx] + + ## 2) Call the final subsetting on each file separately. + subs_by_file <- splitByFile(subs, f = factor(seq_along(fileNames(subs)))) + suppressWarnings( + res <- bpmapply( + subs_by_file, + seq_along(fileNames(subs)), + FUN = function(cur_sample, cur_file, rtm, mzm, aggFun) { + ## Load all spectra for that file. applies also any proc steps + sps <- spectra(cur_sample) + rts <- rtime(cur_sample) + cur_res <- vector("list", nrow(rtm)) + ## Loop through rt and mz. + for (i in 1:nrow(rtm)) { + ## - Select all spectra within that range and call a + ## function on them that does first filterMz and then + ## aggregate the values per spectrum. + in_rt <- rts >= rtm[i, 1] & rts <= rtm[i, 2] + ## Return an empty Chromatogram if there is no spectrum/scan + ## within the retention time range. + if (!any(in_rt)) { + cur_res[[i]] <- Chromatogram( + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + next + } + cur_sps <- lapply( + sps[in_rt], + function(spct, filter_mz, aggFun) { + spct <- filterMz(spct, filter_mz) + ## Now aggregate the values. + if (!spct@peaksCount) + return(c(NA_real_, NA_real_, missingValue)) + return(c(range(spct@mz, na.rm = TRUE, finite = TRUE), + do.call( + aggFun, + list(spct@intensity, na.rm = TRUE)))) + }, filter_mz = mzm[i, ], aggFun = aggFun) + ## Now build the Chromatogram class. + allVals <- unlist(cur_sps, use.names = FALSE) + idx <- seq(3, length(allVals), by = 3) + ## Or should we drop the names completely? + ints <- allVals[idx] + names(ints) <- names(cur_sps) + ## Don't return a Chromatogram object if no values. + if (!all(is.na(ints))) { + cur_res[[i]] <- Chromatogram( + rtime = rts[in_rt], + intensity = ints, + mz = range(allVals[-idx], na.rm = TRUE, + finite = TRUE), + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + } else { + ## If no measurement if non-NA, still report the NAs and + ## use the filter mz as mz. + cur_res[[i]] <- Chromatogram( + rtime = rts[in_rt], + intensity = ints, + mz = mzm[i, ], + filterMz = mzm[i, ], + fromFile = as.integer(cur_file), + aggregationFun = aggFun) + } + } + cur_res + }, MoreArgs = list(rtm = rt, mzm = mz, aggFun = aggregationFun), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + ) + ## Ensure that the lists have the same length than there are samples! + fns <- fileNames(x) + fromF <- base::match(fileNames(subs), fns) + + ## If we've got some files in which we don't have any signal in any range, + ## fill it with empty Chromatograms. This ensures that the result has + ## ALWAYS the same length than there are samples. + if (length(res) != length(fns)) { + res_all_files <- vector(mode = "list", length = length(fns)) + res_all_files[fromF] <- res + empties <- which(lengths(res_all_files) == 0) + ## fill these + for (i in 1:length(empties)) { + empty_list <- vector(mode = "list", length = nrow(rt)) + for(j in 1:nrow(rt)) { + empty_list[j] <- Chromatogram(filterMz = mz[i, ], + fromFile = as.integer(i), + aggregationFun = aggregationFun) + } + res_all_files[[empties[i]]] <- empty_list + } + res <- res_all_files + } + ## Now I need to re-arrange the result. + if (return.type == "list") { + ## Got [[file]][[range]], but want to have [[range]][[file]] + final_res <- vector("list", nrow(rt)) + for (i in 1:nrow(rt)) { + final_res[[i]] <- lapply(res, FUN = `[[`, i) + } + if (nrow(rt) == 1) + final_res <- final_res[[1]] + } + if (return.type == "matrix") { + final_res <- do.call(cbind, res) + } + final_res +} + + +## #' @description Integrates the intensities for chromatograpic peak(s). This is +## #' supposed to be called by the fillChromPeaks method. +## #' +## #' @note This reads the full data first and does the subsetting later in R. +## #' +## #' @param object An \code{XCMSnExp} object representing a single sample. +## #' +## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +## #' +## #' @noRd +## .getPeakInt2 <- function(object, peakArea) { +## if (length(fileNames(object)) != 1) +## stop("'object' should be an XCMSnExp for a single file!") +## res <- numeric(nrow(peakArea)) +## spctr <- spectra(object, BPPARAM = SerialParam()) +## mzs <- lapply(spctr, mz) +## valsPerSpect <- lengths(mzs) +## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) +## rm(spctr) +## mzs <- unlist(mzs, use.names = FALSE) +## rtim <- rtime(object) +## for (i in 1:length(res)) { +## rtr <- peakArea[i, c("rtmin", "rtmax")] +## mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, +## valsPerSpect = valsPerSpect, rtrange = rtr, +## mzrange = peakArea[i, c("mzmin", "mzmax")]) +## if (length(mtx)) { +## if (!all(is.na(mtx[, 3]))) { +## ## How to calculate the area: (1)sum of all intensities / (2)by +## ## the number of data points (REAL ones, considering also NAs) +## ## and multiplied with the (3)rt width. +## ## (1) sum(mtx[, 3], na.rm = TRUE) +## ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used +## ## nrow(mtx) here, which would correspond to the non-NA +## ## intensities within the rt range we don't get the same results +## ## as e.g. centWave. +## ## (3) rtr[2] - rtr[1] +## res[i] <- sum(mtx[, 3], na.rm = TRUE) * +## ((rtr[2] - rtr[1]) / +## (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) +## } else { +## res[i] <- NA_real_ +## } +## } else { +## res[i] <- NA_real_ +## } +## } +## return(unname(res)) +## } + +## #' @description Integrates the intensities for chromatograpic peak(s). This is +## #' supposed to be called by the fillChromPeaks method. +## #' +## #' @note This reads the full data first and does the subsetting later in R. This +## #' function uses the C getEIC function. +## #' +## #' @param object An \code{XCMSnExp} object representing a single sample. +## #' +## #' @param peakArea A \code{matrix} with the peak definition, i.e. \code{"rtmin"}, +## #' \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +## #' +## #' @noRd +## .getPeakInt3 <- function(object, peakArea) { +## if (length(fileNames(object)) != 1) +## stop("'object' should be an XCMSnExp for a single file!") +## if (nrow(peakArea) == 0) { +## return(numeric()) +## } +## res <- matrix(ncol = 4, nrow = nrow(peakArea)) +## res <- numeric(nrow(peakArea)) +## spctr <- spectra(object, BPPARAM = SerialParam()) +## mzs <- lapply(spctr, mz) +## valsPerSpect <- lengths(mzs) +## scanindex <- valueCount2ScanIndex(valsPerSpect) ## Index vector for C calls +## ints <- unlist(lapply(spctr, intensity), use.names = FALSE) +## rm(spctr) +## mzs <- unlist(mzs, use.names = FALSE) +## rtim <- rtime(object) +## for (i in 1:length(res)) { +## rtr <- peakArea[i, c("rtmin", "rtmax")] +## sr <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) +## eic <- .Call("getEIC", mzs, ints, scanindex, +## as.double(peakArea[i, c("mzmin", "mzmax")]), +## as.integer(sr), as.integer(length(scanindex)), +## PACKAGE = "xcms") +## if (length(eic$intensity)) { +## ## How to calculate the area: (1)sum of all intensities / (2)by +## ## the number of data points (REAL ones, considering also NAs) +## ## and multiplied with the (3)rt width. +## if (!all(is.na(eic$intensity)) && !all(eic$intensity == 0)) { +## res[i] <- sum(eic$intensity, na.rm = TRUE) * +## ((rtr[2] - rtr[1]) / (length(eic$intensity) - 1)) +## } else { +## res[i] <- NA_real_ +## } +## } else { +## res[i] <- NA_real_ +## } +## } +## return(unname(res)) +## } + + +#' @description Integrates the intensities for chromatograpic peak(s). This is +#' supposed to be called by the fillChromPeaks method. +#' +#' @note This reads the full data first and does the subsetting later in R. +#' +#' @param object An \code{XCMSnExp} object representing a single sample. +#' +#' @param peakArea A \code{matrix} with the peak definition, i.e. +#' \code{"rtmin"}, \code{"rtmax"}, \code{"mzmin"} and \code{"mzmax"}. +#' +#' @param sample_idx \code{integer(1)} with the index of the sample in the +#' object. +#' +#' @param mzCenterFun Name of the function to be used to calculate the mz value. +#' Defaults to \code{weighted.mean}, i.e. the intensity weighted mean mz. +#' +#' @param cn \code{character} with the names of the result matrix. +#' +#' @return A \code{matrix} with at least columns \code{"mz"}, \code{"rt"}, +#' \code{"into"} and \code{"maxo"} with the by intensity weighted mean of +#' mz, rt or the maximal intensity in the area, the integrated signal in +#' the area and the maximal signal in the area. +#' +#' @noRd +.getChromPeakData <- function(object, peakArea, sample_idx, + mzCenterFun = "weighted.mean", + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, c("mzmin", "mzmax")] <- + peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object, BPPARAM = SerialParam()) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + rtim_range <- range(rtim) + for (i in 1:nrow(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + ## Ensure that the rt region is within the rtrange of the data. + rtr[1] <- max(rtr[1], rtim_range[1]) + rtr[2] <- min(rtr[2], rtim_range[2]) + mtx <- .rawMat(mz = mzs, int = ints, scantime = rtim, + valsPerSpect = valsPerSpect, rtrange = rtr, + mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities / (2)by + ## the number of data points (REAL ones, considering also NAs) + ## and multiplied with the (3)rt width. + ## (1) sum(mtx[, 3], na.rm = TRUE) + ## (2) sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1 ; if we used + ## nrow(mtx) here, which would correspond to the non-NA + ## intensities within the rt range we don't get the same results + ## as e.g. centWave. + ## (3) rtr[2] - rtr[1] + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) * + ((rtr[2] - rtr[1]) / + (sum(rtim >= rtr[1] & rtim <= rtr[2]) - 1)) + maxi <- which.max(mtx[, 3]) + res[i, c("rt", "maxo")] <- mtx[maxi[1], c(1, 3)] + res[i, c("rtmin", "rtmax")] <- rtr + ## Calculate the intensity weighted mean mz + meanMz <- do.call(mzCenterFun, list(mtx[, 2], mtx[, 3])) + if (is.na(meanMz)) meanMz <- mtx[maxi[1], 2] + res[i, "mz"] <- meanMz + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + +#' Same as getChromPeakData, just without retention time. +#' @note The mz and maxo are however estimated differently than for the +#' getChromPeakData: mz is the mz closest to the median mz of the feature and +#' maxo its intensity. +#' @noRd +.getMSWPeakData <- function(object, peakArea, sample_idx, + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, "rt"] <- -1 + res[, "rtmin"] <- -1 + res[, "rtmax"] <- -1 + res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object, BPPARAM = SerialParam()) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + for (i in 1:nrow(res)) { + mz_area <- which(mzs >= peakArea[i, "mzmin"] & + mzs <= peakArea[i, "mzmax"]) + ## Alternative version from original code: but this can also pick up + ## mzs from outside of the range! See also comments on issue #130 + ## mz_area <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + ## which.min(abs(mzs - peakArea[i, "mzmax"]))) + mtx <- cbind(time = -1, mz = mzs[mz_area], intensity = ints[mz_area]) + ## mtx <- xcms:::.rawMat(mz = mzs, int = ints, scantime = rtime(object), + ## valsPerSpect = valsPerSpect, + ## mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) + ## Get the index of the mz value(s) closest to the mzmed of the + ## feature + mzDiff <- abs(mtx[, 2] - peakArea[i, "mzmed"]) + mz_idx <- which(mzDiff == min(mzDiff)) + ## Now get the one with the highest intensity. + maxi <- mz_idx[which.max(mtx[mz_idx, 3])] + ## Return these. + res[i, c("mz", "maxo")] <- mtx[maxi, 2:3] + ## ## mz should be the weighted mean! + ## res[i, c("mz", "maxo")] <- c(weighted.mean(mtx[, 2], mtx[, 3]), + ## mtx[maxi[1], 3]) + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} +## The same version as above, but the maxo is the maximum signal of the peak, +## and the mz the intensity weighted mean mz. +.getMSWPeakData2 <- function(object, peakArea, sample_idx, + cn = c("mz", "rt", "into", "maxo", "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, "rt"] <- -1 + res[, "rtmin"] <- -1 + res[, "rtmax"] <- -1 + res[, c("mzmin", "mzmax")] <- peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Reguesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object, BPPARAM = SerialParam()) + mzs <- lapply(spctr, mz) + valsPerSpect <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + for (i in 1:nrow(res)) { + mtx <- .rawMat(mz = mzs, int = ints, scantime = rtime(object), + valsPerSpect = valsPerSpect, + mzrange = peakArea[i, c("mzmin", "mzmax")]) + if (length(mtx)) { + if (!all(is.na(mtx[, 3]))) { + ## How to calculate the area: (1)sum of all intensities + res[i, "into"] <- sum(mtx[, 3], na.rm = TRUE) + res[i, c("mz", "maxo")] <- c(weighted.mean(mtx[, 2], mtx[, 3]), + max(mtx[, 3], na.rm = TRUE)) + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } else { + res[i, ] <- rep(NA_real_, ncols) + } + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + +## Same as .getChromPeakData but for matchedFilter, i.e. using the profile +## matrix instead of the original signal. +.getChromPeakData_matchedFilter <- function(object, peakArea, sample_idx, + mzCenterFun = "weighted.mean", + param = MatchedFilterParam(), + cn = c("mz", "rt", "into", "maxo", + "sample")) { + if (length(fileNames(object)) != 1) + stop("'object' should be an XCMSnExp for a single file!") + ncols <- length(cn) + res <- matrix(ncol = ncols, nrow = nrow(peakArea)) + colnames(res) <- cn + res[, "sample"] <- sample_idx + res[, c("mzmin", "mzmax")] <- + peakArea[, c("mzmin", "mzmax")] + ## Load the data + message("Requesting ", nrow(res), " missing peaks from ", + basename(fileNames(object)), " ... ", appendLF = FALSE) + spctr <- spectra(object, BPPARAM = SerialParam()) + mzs <- lapply(spctr, mz) + vps <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + rm(spctr) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(object) + rtim_range <- range(rtim) + ## Now, if we do have "distance" defined it get's tricky: + basespc <- NULL + if (length(distance(param)) > 0) { + mass <- seq(floor(min(mzs) / binSize(param)) * binSize(param), + ceiling(max(mzs) / binSize(param)) * binSize(param), + by = binSize(param)) + bin_size <- (max(mass) - min(mass)) / (length(mass) - 1) + basespc <- distance(param) * bin_size + } + ## Create the profile matrix: + pMat <- .createProfileMatrix(mz = mzs, int = ints, valsPerSpect = vps, + method = .impute2method(param), + step = binSize(param), + baselevel = baseValue(param), + basespace = basespc, + returnBreaks = TRUE, + baseValue = NA) # We want to return NA not 0 + # if nothing was found + brks <- pMat$breaks + pMat <- pMat$profMat ## rows are masses, cols are retention times/scans. + bin_size <- diff(brks[1:2]) + bin_half <- bin_size / 2 + ## Calculate the mean mass per bin using the breaks used for the binning. + mass <- brks[-length(brks)] + bin_half ## midpoint for the breaks + mass_range <- range(mass) + + for (i in 1:nrow(res)) { + rtr <- peakArea[i, c("rtmin", "rtmax")] + mzr <- peakArea[i, c("mzmin", "mzmax")] + ## Ensure that the rt region is within the rtrange of the data. + rtr[1] <- max(rtr[1], rtim_range[1]) + rtr[2] <- min(rtr[2], rtim_range[2]) + mzr[1] <- max(mzr[1], mass_range[1]) + mzr[2] <- min(mzr[2], mass_range[2]) + ## Get the index of rt in rtim that are within the rt range rtr + ## range_rt <- c(min(which(rtim >= rtr[1])), max(which(rtim <= rtr[2]))) + ## range_rt <- c(which.min(abs(rtim - rtr[1])), + ## which.min(abs(rtim - rtr[2]))) + range_rt <- findRange(rtim, rtr, TRUE) + idx_rt <- range_rt[1]:range_rt[2] + ## Get the index of the mz in the data that are within the mz range. + ##range_mz <- c(min(which(brks >= mzr[1])) - 1, max(which(brks <= mzr[2]))) + range_mz <- findRange(mass, c(mzr[1] - bin_half, mzr[2] + bin_half), + TRUE) + idx_mz <- range_mz[1]:range_mz[2] + + if (length(idx_mz) > 0 & length(idx_rt) > 0) { + intMat <- pMat[idx_mz, idx_rt, drop = FALSE] + is_na <- is.na(intMat) + if (all(is_na)) { + res[i, ] <- rep(NA_real_, ncols) + next + } + intMat_0 <- intMat + intMat_0[is.na(intMat)] <- 0 + ## Calculate the mean mz value using a intensity weighted mean. + mz_ints <- rowSums(intMat, na.rm = TRUE) + ## mz_ints <- Biobase::rowMax(intMat) + if (length(mz_ints) != length(idx_mz)) { + ## Take from original code... + warning("weighted.mean: x and weights have to have same length!") + mz_ints <- rep(1, length(idx_mz)) + } + ## mean mz: intensity weighted mean + mean_mz <- weighted.mean(mass[idx_mz], mz_ints, na.rm = TRUE) + if (is.nan(mean_mz) || is.na(mean_mz)) + mean_mz <- mean(mzr, na.rm = TRUE) + res[i, "mz"] <- mean_mz + ## mean rt: position of the maximum intensity (along rt.) + rt_ints <- colMax(intMat_0, na.rm = TRUE) + res[i, c("rt", "rtmin", "rtmax")] <- + c(rtim[idx_rt][which.max(rt_ints)], rtr) + ## maxo + res[i, "maxo"] <- max(rt_ints, na.rm = TRUE) + ## into + rt_width <- diff(rtim[range_rt])/diff(range_rt) + res[i, "into"] <- rt_width * sum(rt_ints, na.rm = TRUE) + } else + res[i, ] <- rep(NA_real_, ncols) + } + message("got ", sum(!is.na(res[, "into"])), ".") + return(res) +} + + +.hasFilledPeaks <- function(object) { + if (hasChromPeaks(object)) + if (any(colnames(chromPeaks(object)) == "is_filled")) + return(any(chromPeaks(object)[, "is_filled"] == 1)) + FALSE +} + +#' @description Simple helper function to extract the peakidx column from the +#' featureDefinitions DataFrame. The function ensures that the names of the +#' returned list correspond to the rownames of the DataFrame +#' +#' @noRd +.peakIndex <- function(object) { + if (!hasFeatures(object)) + stop("No feature definitions present. Please run groupChromPeaks first.") + idxs <- featureDefinitions(object)$peakidx + names(idxs) <- rownames(featureDefinitions(object)) + idxs +} + +#' @description \code{adjustRtimePeakGroups} returns the features (peak groups) +#' which would, depending on the provided \code{\link{PeakGroupsParam}}, be +#' selected for alignment/retention time correction. +#' +#' @note \code{adjustRtimePeakGroups} is supposed to be called \emph{before} the +#' sample alignment, but after a correspondence (peak grouping). +#' +#' @return For \code{adjustRtimePeakGroups}: a \code{matrix}, rows being +#' features, columns samples, of retention times. The features are ordered +#' by the median retention time across columns. +#' +#' @rdname adjustRtime-peakGroups +adjustRtimePeakGroups <- function(object, param = PeakGroupsParam()) { + if (!is(object, "XCMSnExp")) + stop("'object' has to be an 'XCMSnExp' object.") + if (!hasFeatures(object)) + stop("No features present. Please run 'groupChromPeaks' first.") + if (hasAdjustedRtime(object)) + warning("Alignment/retention time correction was already performed, ", + "returning a matrix with adjusted retention times.") + nSamples <- length(fileNames(object)) + pkGrp <- .getPeakGroupsRtMatrix( + peaks = chromPeaks(object), + peakIndex = .peakIndex(object), + nSamples = nSamples, + missingSample = nSamples - (nSamples * minFraction(param)), + extraPeaks = extraPeaks(param) + ) + colnames(pkGrp) <- basename(fileNames(object)) + pkGrp +} + +#' @title Visualization of alignment results +#' +#' @description Plot the difference between the adjusted and the raw retention +#' time (y-axis) for each file along the (adjusted or raw) retention time +#' (x-axis). If alignment was performed using the +#' \code{\link{adjustRtime-peakGroups}} method, also the features (peak +#' groups) used for the alignment are shown. +#' +#' @param object A \code{\link{XCMSnExp}} object with the alignment results. +#' +#' @param col colors to be used for the lines corresponding to the individual +#' samples. +#' +#' @param lty line type to be used for the lines of the individual samples. +#' +#' @param type plot type to be used. See help on the \code{par} function for +#' supported values. +#' +#' @param adjustedRtime logical(1) whether adjusted or raw retention times +#' should be shown on the x-axis. +#' +#' @param xlab the label for the x-axis. +#' +#' @param ylab the label for the y-axis. +#' +#' @param peakGroupsCol color to be used for the peak groups (only used if +#' alignment was performed using the \code{\link{adjustRtime-peakGroups}} +#' method. +#' +#' @param peakGroupsPch point character (\code{pch}) to be used for the peak +#' groups (only used if alignment was performed using the +#' \code{\link{adjustRtime-peakGroups}} method. +#' +#' @param peakGroupsLty line type (\code{lty}) to be used to connect points for +#' each peak groups (only used if alignment was performed using the +#' \code{\link{adjustRtime-peakGroups}} method. +#' +#' @param ... Additional arguments to be passed down to the \code{plot} +#' function. +#' +#' @seealso \code{\link{adjustRtime}} for all retention time correction/ +#' alignment methods. +#' +#' @author Johannes Rainer +#' +#' @examples +#' ## Below we perform first a peak detection (using the matchedFilter +#' ## method) on some of the test files from the faahKO package followed by +#' ## a peak grouping and retention time adjustment using the "peak groups" +#' ## method +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the matchedFilter method. +#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +#' res <- findChromPeaks(raw_data, param = mfp) +#' +#' ## Performing the peak grouping using the "peak density" method. +#' p <- PeakDensityParam(sampleGroups = c(1, 1)) +#' res <- groupChromPeaks(res, param = p) +#' +#' ## Perform the retention time adjustment using peak groups found in both +#' ## files. +#' fgp <- PeakGroupsParam(minFraction = 1) +#' res <- adjustRtime(res, param = fgp) +#' +#' ## Visualize the impact of the alignment. We show both versions of the plot, +#' ## with the raw retention times on the x-axis (top) and with the adjusted +#' ## retention times (bottom). +#' par(mfrow = c(2, 1)) +#' plotAdjustedRtime(res, adjusted = FALSE) +#' grid() +#' plotAdjustedRtime(res) +#' grid() +plotAdjustedRtime <- function(object, col = "#00000080", lty = 1, type = "l", + adjustedRtime = TRUE, + xlab = ifelse(adjustedRtime, + yes = expression(rt[adj]), + no = expression(rt[raw])), + ylab = expression(rt[adj]-rt[raw]), + peakGroupsCol = "#00000060", + peakGroupsPch = 16, + peakGroupsLty = 3, ...) { + if (!is(object, "XCMSnExp")) + stop("'object' has to be an 'XCMSnExp' object.") + if (!hasAdjustedRtime(object)) + warning("No alignment/retention time correction results present.") + diffRt <- rtime(object, adjusted = TRUE) - rtime(object, adjusted = FALSE) + diffRt <- split(diffRt, fromFile(object)) + ## Define the rt that is shown on x-axis + xRt <- rtime(object, adjusted = adjustedRtime, bySample = TRUE) + ## Check colors. + if (length(col) == 1) + col <- rep(col, length(diffRt)) + if (length(lty) == 1) + lty <- rep(lty, length(diffRt)) + if (length(col) != length(diffRt)) { + warning("length of 'col' does not match the number of samples! Will ", + "use 'col[1]' for all samples.") + col <- rep(col[1], length(diffRt)) + } + if (length(lty) != length(lty)) { + warning("length of 'lty' does not match the number of samples! Will ", + "use 'lty[1]' for all samples.") + lty <- rep(lty[1], length(diffRt)) + } + ## Initialize plot. + plot(3, 3, pch = NA, xlim = range(xRt, na.rm = TRUE), + ylim = range(diffRt, na.rm = TRUE), xlab = xlab, ylab = ylab, ...) + ## Plot all. + for (i in 1:length(diffRt)) + points(x = xRt[[i]], y = diffRt[[i]], col = col[i], lty = lty[i], + type = type) + ## If alignment was performed using the peak groups method highlight also + ## those in the plot. + ph <- processHistory(object, type = .PROCSTEP.RTIME.CORRECTION) + if (length(ph)) { + ph <- ph[[length(ph)]] + if (is(ph, "XProcessHistory")) { + ## Check if we've got a PeakGroupsParam parameter class + prm <- processParam(ph) + if (is(prm, "PeakGroupsParam")) { + rm(diffRt) + rm(xRt) + rawRt <- rtime(object, adjusted = FALSE, bySample = TRUE) + adjRt <- rtime(object, adjusted = TRUE, bySample = TRUE) + pkGroup <- peakGroupsMatrix(prm) + ## Have to "adjust" these: + pkGroupAdj <- pkGroup + for (i in 1:ncol(pkGroup)) { + pkGroupAdj[, i] <- .applyRtAdjustment(pkGroup[, i], + rawRt[[i]], + adjRt[[i]]) + } + diffRt <- pkGroupAdj - pkGroup + if (adjustedRtime) + xRt <- pkGroupAdj + else + xRt <- pkGroup + ## Loop through the rows and plot points - ordered by diffRt! + for (i in 1:nrow(xRt)) { + idx <- order(diffRt[i, ]) + points(x = xRt[i, ][idx], diffRt[i, ][idx], + col = peakGroupsCol, type = "b", + pch = peakGroupsPch, lty = peakGroupsLty) + } + } + } + } +} + +#' @title Plot chromatographic peak density along the retention time axis +#' +#' @description Plot the density of chromatographic peaks along the retention +#' time axis and indicate which peaks would be grouped into the same feature +#' based using the \emph{peak density} correspondence method. Settings for +#' the \emph{peak density} method can be passed with an +#' \code{\link{PeakDensityParam}} object to parameter \code{param}. +#' +#' @details The \code{plotChromPeakDensity} function allows to evaluate +#' different settings for the \emph{peak density} on an mz slice of +#' interest (e.g. containing chromatographic peaks corresponding to a known +#' metabolite). +#' The plot shows the individual peaks that were detected within the +#' specified \code{mz} slice at their retention time (x-axis) and sample in +#' which they were detected (y-axis). The density function is plotted as a +#' black line. Parameters for the \code{density} function are taken from the +#' \code{param} object. Grey rectangles indicate which chromatographic peaks +#' would be grouped into a feature by the \emph{peak density} correspondence +#' method. Parameters for the algorithm are also taken from \code{param}. +#' See \code{\link{groupChromPeaks-density}} for more information about the +#' algorithm and its supported settings. +#' +#' @param object A \code{\link{XCMSnExp}} object with identified +#' chromatographic peaks. +#' +#' @param mz \code{numeric(2)} defining an mz range for which the peak density +#' should be plotted. +#' +#' @param rt \code{numeric(2)} defining an optional rt range for which the +#' peak density should be plotted. Defaults to the absolute retention time +#' range of \code{object}. +#' +#' @param param \code{\link{PeakDensityParam}} from which parameters for the +#' \emph{peak density} correspondence algorithm can be extracted. +#' +#' @param col Color to be used for the individual samples. Length has to be 1 +#' or equal to the number of samples in \code{object}. +#' +#' @param xlab \code{character(1)} with the label for the x-axis. +#' +#' @param ylab \code{character(1)} with the label for the y-axis. +#' +#' @param xlim \code{numeric(2)} representing the limits for the x-axis. +#' Defaults to the range of the \code{rt} parameter. +#' +#' @param ... Additional parameters to be passed to the \code{plot} function. +#' +#' @return The function is called for its side effect, i.e. to create a plot. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{groupChromPeaks-density}} for details on the +#' \emph{peak density} correspondence method and supported settings. +#' +#' @examples +#' +#' ## Below we perform first a peak detection (using the centWave +#' ## method) on some of the test files from the faahKO package. +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' +#' ## Reading 2 of the KO samples +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Perform the peak detection using the centWave method. +#' res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 1000)) +#' +#' ## Align the samples using obiwarp +#' res <- adjustRtime(res, param = ObiwarpParam()) +#' +#' ## Plot the chromatographic peak density for a specific mz range to evaluate +#' ## different peak density correspondence settings. +#' mzr <- c(305.05, 305.15) +#' +#' plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(), pch = 16) +#' +#' ## Use a larger bandwidth +#' plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60), +#' pch = 16) +#' ## Neighboring peaks are now fused into one. +#' +#' ## Require the chromatographic peak to be present in all samples of a group +#' plotChromPeakDensity(res, mz = mzr, pch = 16, +#' param = PeakDensityParam(minFraction = 1)) +plotChromPeakDensity <- function(object, mz, rt, param = PeakDensityParam(), + col = "#00000080", xlab = "retention time", + ylab = "sample", xlim = range(rt), ...) { + if (missing(object)) + stop("Required parameter 'object' is missing") + if (!is(object, "XCMSnExp")) + stop("'object' must be an XCMSnExp object") + if (!hasChromPeaks(object)) + stop("No chromatographic peaks present in 'object'") + if (missing(mz)) + mz <- c(-Inf, Inf) + if (missing(rt)) + rt <- range(rtime(object)) + mz <- range(mz) + rt <- range(rt) + ## Get all the data we require. + nsamples <- length(fileNames(object)) + if (length(col) != nsamples) + col <- rep_len(col[1], nsamples) + pks <- chromPeaks(object, mz = mz, rt = rt) + if (nrow(pks)) { + ## Extract parameters from the param object + bw = bw(param) + ## That's Jan Stanstrup's fix (issue #161). + densN <- max(512, 2^(ceiling(log2(diff(rt) / (bw / 2))))) + sample_groups <- sampleGroups(param) + if (length(sample_groups) == 0) + sample_groups <- rep(1, nsamples) + if (length(sample_groups) != nsamples) + stop("If provided, the 'sampleGroups' parameter in the 'param' ", + "class has to have the same length than there are samples ", + "in 'object'") + sample_groups_table <- table(sample_groups) + dens_from <- rt[1] - 3 * bw + dens_to <- rt[2] + 3 * bw + dens <- density(pks[, "rt"], bw = bw, from = dens_from, to = dens_to, + n = densN) + yl <- c(0, max(dens$y)) + ypos <- seq(from = 0, to = yl[2], length.out = nsamples) + ## Plot the peaks as points. + plot(x = pks[, "rt"], y = ypos[pks[, "sample"]], xlim = xlim, + col = col[pks[, "sample"]], xlab = xlab, yaxt = "n", ylab = ylab, + main = paste0(format(mz, digits = 7), collapse = " - "), ...) + axis(side = 2, at = ypos, labels = 1:nsamples) + points(x = dens$x, y = dens$y, type = "l") + ## Estimate what would be combined to a feature + ## Code is taken from do_groupChromPeaks_density + dens_max <- max(dens$y) + dens_y <- dens$y + snum <- 0 + while(dens_y[max_y <- which.max(dens_y)] > dens_max / 20 && + snum < maxFeatures(param)) { + feat_range <- xcms:::descendMin(dens_y, max_y) + dens_y[feat_range[1]:feat_range[2]] <- 0 + feat_idx <- which(pks[, "rt"] >= dens$x[feat_range[1]] & + pks[, "rt"] <= dens$x[feat_range[2]]) + tt <- table(sample_groups[pks[feat_idx, "sample"]]) + if (!any(tt / sample_groups_table[names(tt)] >= + minFraction(param) & tt >= minSamples(param))) + next + rect(xleft = min(pks[feat_idx, "rt"]), ybottom = 0, + xright = max(pks[feat_idx, "rt"]), ytop = yl[2], + border = "#00000040", col = "#00000020") + } + } else { + plot(3, 3, pch = NA, xlim = rt, xlab = xlab, + main = paste0(format(mz, digits = 7), collapse = " - ")) + } +} + +## Plot the chromatographic peaks for a file in a two dimensional plot. +## plotChromPeakImage... +## @description Plots the + +## Find mz ranges with multiple peaks per sample. +## Use the density distribution for that? with a bandwidth = 0.001, check +## density method for that... diff --git a/R/functions-binning.R b/R/functions-binning.R index d5b925a4d..25ec9cb4c 100644 --- a/R/functions-binning.R +++ b/R/functions-binning.R @@ -172,7 +172,9 @@ binYonX <- function(x, y, breaks, nBins, binSize, binFromX, if (!sortedX) { message("'x' is not sorted, will sort 'x' and 'y'.") ## Sort method; see issue #180 for MSnbase - o <- order(x, method = options()$BioC$xcms$sortMethod) + ## Note: order method = "radix" is considerably faster - but there is no + ## method argument for older R versions. + o <- order(x) x <- x[o] y <- y[o] } diff --git a/R/functions-utils.R b/R/functions-utils.R index 24c3947c8..5915f5207 100644 --- a/R/functions-utils.R +++ b/R/functions-utils.R @@ -4,16 +4,21 @@ ############################################################ ## valueCount2ScanIndex ## -## @description Simple helper function that converts the number of values -## per scan/spectrum to an integer vector that can be passed to the base -## xcms functions/downstream C functions. -## -## @title Create index vector for internal C calls -## @param valCount Numeric vector representing the number of values per -## spectrum. -## @return An integer vector with the index (0-based) in the mz or intensity -## vectors indicating the start of a spectrum. -## @author Johannes Rainer +#' @title Create index vector for internal C calls +#' +#' @description Simple helper function that converts the number of values +#' per scan/spectrum to an integer vector that can be passed to the base +#' xcms functions/downstream C functions. +#' +#' @param valCount Numeric vector representing the number of values per +#' spectrum. +#' +#' @return An integer vector with the index (0-based) in the mz or intensity +#' vectors indicating the start of a spectrum. +#' +#' @author Johannes Rainer +#' +#' @noRd valueCount2ScanIndex <- function(valCount){ ## Convert into 0 based. valCount <- cumsum(valCount) @@ -27,24 +32,27 @@ valueCount2ScanIndex <- function(valCount){ ## code instead of the new implementations. ## This sets options. ## -##' @title Enable usage of old xcms code -##' -##' @description This function allows to enable the usage of old, partially -##' deprecated code from xcms by setting a corresponding global option. See -##' details for functions affected. -##' -##' @note Usage of old code is strongly dicouraged. This function is thought -##' to be used mainly in the transition phase from xcms to xcms version 3. -##' @details The functions/methods that will be affected by this are: -##' \itemize{ -##' \item \code{\link{do_detectFeatures_matchedFilter}} -##' } -##' @param x logical(1) to specify whether or not original -##' old code should be used in corresponding functions. If not provided the -##' function simply returns the value of the global option. -##' @return logical(1) indicating whether old code is being -##' used. -##' @author Johannes Rainer +#' @title Enable usage of old xcms code +#' +#' @description This function allows to enable the usage of old, partially +#' deprecated code from xcms by setting a corresponding global option. See +#' details for functions affected. +#' +#' @note Usage of old code is strongly dicouraged. This function is thought +#' to be used mainly in the transition phase from xcms to xcms version 3. +#' +#' @details The functions/methods that will be affected by this are: +#' \itemize{ +#' \item \code{\link{do_findChromPeaks_matchedFilter}} +#' } +#' +#' @param x logical(1) to specify whether or not original +#' old code should be used in corresponding functions. If not provided the +#' function simply returns the value of the global option. +#' +#' @return logical(1) indicating whether old code is being used. +#' +#' @author Johannes Rainer useOriginalCode <- function(x) { if (missing(x)) { res <- options()$BioC$xcms$useOriginalCode @@ -69,13 +77,19 @@ useOriginalCode <- function(x) { ## matchedFilter = ".matchedFilter_orig" ## ) -##' @title Copy the content from an environment to another one -##' This function copies the content of an environment into another one. -##' @param env environment from which to copy. -##' @param inheritLocks logical(1) whether the locking status should be copied -##' too -##' @return an env. -##' @noRd +#' @title Copy the content from an environment to another one +#' +#' @description This function copies the content of an environment into another +#' one. +#' +#' @param env environment from which to copy. +#' +#' @param inheritLocks logical(1) whether the locking status should be copied +#' too. +#' +#' @return an env. +#' +#' @noRd .copy_env <- function(env, inheritLocks = FALSE) { new_e <- new.env(parent = emptyenv()) eNames <- ls(env, all.names = TRUE) @@ -90,3 +104,223 @@ useOriginalCode <- function(x) { } return(new_e) } + +## #' Simulates the \code{findRange} function. +## #' @noRd +## findRangeR <- function(x, values) { +## start <- min(which(x >= values[1])) +## end <- max(which(x <= values[2])) +## return(c(start, end)) +## } + +############################################################ +## .createProfileMatrix +#' @title Create the profile matrix +#' +#' @description This function creates a \emph{profile} matrix, i.e. a rt times +#' m/z matrix of aggregated intensity values with values aggregated within +#' bins along the m/z dimension. +#' +#' @details This is somewhat the successor function for the deprecated +#' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, +#' \code{profBinLinBaseM} and \code{profIntLin}). +#' +#' @param mz Numeric representing the m/z values across all scans/spectra. +#' +#' @param int Numeric representing the intensity values across all +#' scans/spectra. +#' +#' @param valsPerSpect Numeric representing the number of measurements for each +#' scan/spectrum. +#' +#' @param method A character string specifying the profile matrix generation +#' method. Allowed are \code{"bin"}, \code{"binlin"}, +#' \code{"binlinbase"} and \code{"intlin"}. +#' +#' @param step Numeric specifying the size of the m/z bins. +#' +#' @param baselevel Numeric specifying the base value. +#' +#' @param basespace Numeric. +#' +#' @param mzrange. numeric(2) optionally specifying the mz value range +#' for binning. This is to adopt the old profStepPad<- method used for +#' obiwarp retention time correction that did the binning from +#' whole-number limits. +#' +#' @param returnBreaks logical(1): hack to return the breaks of the bins. +#' Setting this to TRUE causes the function to return a \code{list} with +#' elements \code{"$profMat"} and \code{"breaks"}. +#' +#' @param baseValue numeric(1) defining the value to be returned if no signal +#' was found in the corresponding bin. Defaults to 0 for backward +#' compatibility. +#' +#' @noRd +.createProfileMatrix <- function(mz, int, valsPerSpect, + method, step = 0.1, baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + returnBreaks = FALSE, + baseValue = 0) { + profMeths <- c("bin", "binlin", "binlinbase", "intlin") + names(profMeths) <- c("none", "lin", "linbase", "intlin") + method <- match.arg(method, profMeths) + impute <- names(profMeths)[profMeths == method] + brks <- NULL + + if (length(mzrange.) != 2) { + mrange <- range(mz, na.rm = TRUE) + mzrange. <- c(floor(mrange[1] / step) * step, + ceiling(mrange[2] / step) * step) + } + mass <- seq(mzrange.[1], mzrange.[2], by = step) + mlength <- length(mass) + ## Calculate the "real" bin size; old xcms code oddity that that's different + ## from step. + bin_size <- (mass[mlength] - mass[1]) / (mlength - 1) + ## Define the breaks. + toIdx <- cumsum(valsPerSpect) + fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) + shiftBy <- TRUE + binFromX <- min(mass) + binToX <- max(mass) + brks <- breaks_on_nBins(fromX = binFromX, toX = binToX, + nBins = mlength, shiftByHalfBinSize = TRUE) + ## for profIntLinM we have to use the old code. + if (impute == "intlin") { + profFun <- "profIntLinM" + profp <- list() + scanindex <- valueCount2ScanIndex(valsPerSpect) + buf <- do.call(profFun, args = list(mz, int, + scanindex, mlength, + mass[1], mass[mlength], + TRUE)) + } else { + ## Binning the data. + binRes <- binYonX(mz, int, + breaks = brks, + fromIdx = fromIdx, + toIdx = toIdx, + baseValue = ifelse(impute == "none", yes = baseValue, + no = NA), + sortedX = TRUE, + returnIndex = FALSE + ) + if (length(toIdx) == 1) + binRes <- list(binRes) + ## Missing value imputation. + if (impute == "linbase") { + ## need arguments distance and baseValue. + if (length(basespace) > 0) { + if (!is.numeric(basespace)) + stop("'basespace' has to be numeric!") + distance <- floor(basespace[1] / bin_size) + } else { + distance <- floor(0.075 / bin_size) + } + if (length(baselevel) > 0) { + if (!is.numeric(baselevel)) + stop("'baselevel' has to be numeric!") + baseValue <- baselevel + } else { + baseValue <- min(int, na.rm = TRUE) / 2 + } + } else { + distance <- 0 + baseValue <- 0 + } + binVals <- lapply(binRes, function(z) { + return(imputeLinInterpol(z$y, method = impute, distance = distance, + noInterpolAtEnds = TRUE, + baseValue = baseValue)) + }) + buf <- do.call(cbind, binVals) + } + if (returnBreaks) + buf <- list(profMat = buf, breaks = brks) + buf +} + +#' @description This function creates arbitrary IDs for features. +#' +#' @param x integer(1) with the number of IDs that should be generated. +#' +#' @noRd +.featureIDs <- function(x) { + sprintf(paste0("FT%0", ceiling(log10(x + 1L)), "d"), 1:x) +} + +#' @description Expands stretches of TRUE values in \code{x} by one on both +#' sides. +#' +#' @note The return value for a \code{NA} is always \code{FALSE}. +#' +#' @param x \code{logical} vector. +#' +#' @author Johannes Rainer +#' +#' @noRd +.grow_trues <- function(x) { + previous <- NA + x_new <- rep_len(FALSE, length(x)) + for (i in 1:length(x)) { + if (is.na(x[i])) { + previous <- NA + next + } + ## If current element is TRUE + if (x[i]) { + x_new[i] <- TRUE + ## if last element was FALSE, set last element to TRUE + if (!is.na(previous) && !previous) + x_new[i - 1] <- TRUE + } else { + ## if previous element was TRUE, set current to TRUE. + if (!is.na(previous) && previous) + x_new[i] <- TRUE + } + previous <- x[i] + } + x_new +} + +#' @title Weighted mean around maximum +#' +#' @describe Calculate a weighted mean of the values around the value with the +#' largest weight. \code{x} could e.g. be mz values and \code{w} the +#' corresponding intensity values. +#' +#' @param x \code{numeric} vector from which the weighted mean should be +#' calculated. +#' +#' @param w \code{numeric} of same length than \code{x} with the weights. +#' +#' @param i \code{integer(1)} defining the number of data points left and right +#' of the index with the largest weight that should be considered for the +#' weighted mean calculation. +#' +#' @return The weighted mean value. +#' +#' @author Johannes Rainer +#' +#' @noRd +#' +#' @examples +#' +#' mz <- c(124.0796, 124.0812, 124.0828, 124.0843, 124.0859, 124.0875, +#' 124.0890, 124.0906, 124.0922, 124.0938, 124.0953, 124.0969) +#' ints <- c(10193.8, 28438.0, 56987.6, 85107.6, 102531.6, 104262.6, +#' 89528.8, 61741.2, 33485.8, 14146.6, 5192.2, 1630.2) +#' +#' plot(mz, ints) +#' +#' ## What would be found by the max: +#' abline(v = mz[which.max(ints)], col = "grey") +#' ## What does the weighted mean around apex return: +#' abline(v = weightedMeanAroundApex(mz, ints, i = 2), col = "blue") +weightedMeanAroundApex <- function(x, w = rep(1, length(x)), i = 1) { + max_idx <- which.max(w) + seq_idx <- max(1, max_idx - i):min(length(x), max_idx + i) + weighted.mean(x[seq_idx], w[seq_idx]) +} diff --git a/R/functions-xcmsRaw.R b/R/functions-xcmsRaw.R index bd5df67d7..b2476294e 100644 --- a/R/functions-xcmsRaw.R +++ b/R/functions-xcmsRaw.R @@ -47,7 +47,7 @@ xcmsRaw <- function(filename, profstep = 1, profmethod = "bin", if (min(scanrange) < 1 | max(scanrange) > length(object@scantime)) { scanrange[1] <- max(1, scanrange[1]) scanrange[2] <- min(length(object@scantime), scanrange[2]) - message("Provided scanrange was adjusted to ", scanrange) + message("Provided scanrange was adjusted to ", scanrange[1]," - ", scanrange[2]) } if (!is.null(rawdata$acquisitionNum)) { ## defined only for mzData and mzXML @@ -539,106 +539,196 @@ remakeTIC<-function(object){ } ############################################################ -## .createProfileMatrix -##' @title Create the profile matrix -##' -##' @description This function creates a \emph{profile} matrix, i.e. a rt times -##' m/z matrix of aggregated intensity values with values aggregated within bins -##' along the m/z dimension. -##' -##' @details This is somewhat the successor function for the deprecated -##' \code{profBin} methods (\code{profBinM}, \code{profBinLinM}, -##' \code{profBinLinBaseM} and \code{profIntLin}). -##' -##' @param mz Numeric representing the m/z values across all scans/spectra. -##' @param int Numeric representing the intensity values across all -##' scans/spectra. -##' @param valsPerSpect Numeric representing the number of measurements for each -##' scan/spectrum. -##' @param method A character string specifying the profile matrix generation -##' method. Allowed are \code{"bin"}, \code{"binlin"}, -##' \code{"binlinbase"} and \code{"intlin"}. -##' @param step Numeric specifying the size of the m/z bins. -##' @param baselevel Numeric specifying the base value. -##' @param basespace Numeric. -##' @param mzrange. numeric(2) optionally specifying the mz value range -##' for binning. This is to adopt the old profStepPad<- method used for obiwarp -##' retention time correction that did the binning from whole-number limits. -##' @noRd -.createProfileMatrix <- function(mz, int, valsPerSpect, - method, step = 0.1, baselevel = NULL, - basespace = NULL, - mzrange. = NULL) { - profMeths <- c("bin", "binlin", "binlinbase", "intlin") - names(profMeths) <- c("none", "lin", "linbase", "intlin") - method <- match.arg(method, profMeths) - impute <- names(profMeths)[profMeths == method] - - if (length(mzrange.) != 2) { - mrange <- range(mz) - mzrange. <- c(floor(mrange[1] / step) * step, - ceiling(mrange[2] / step) * step) +## getPeaks +#' @description Replacement function for the original getPeaks method/function +#' that does no longer use the deprecated \code{profFun} functions. This +#' function uses the \code{binYonX} and \code{imputeLinInterpol} to perform +#' the binning (and missing value imputation). +#' +#' @param object An \code{xcmsRaw} object. +#' +#' @param peakrange \code{matrix} with 4 required columns \code{"mzmin"}, +#' \code{"mzmax"}, \code{"rtmin"} and \code{"rtmax"}. +#' +#' @param step \code{numeric(1)} defining the bin size for the profile matrix +#' generation. +#' +#' @author Johannes Rainer +#' +#' @noRd +.getPeaks_new <- function(object, peakrange, step = 0.1) { + ## Here we're avoiding the profFun call. + if (all(c("mzmin","mzmax","rtmin","rtmax") %in% colnames(peakrange))) + peakrange <- peakrange[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE] + stime <- object@scantime + + pi <- profinfo(object) + method <- pi$method + if (missing(step)) + step <- pi$step + if (step == 0) + step <- 0.1 + baselevel <- pi$baselevel + basespace <- pi$basespace + vps <- diff(c(object@scanindex, length(object@env$mz))) + + cat("method: ", method, "\n") + cat("step: ", step, "\n") + ## Create the profile matrix: + pMat <- .createProfileMatrix(mz = object@env$mz, int = object@env$intensity, + valsPerSpect = vps, + method = method, + step = step, + baselevel = baselevel, + basespace = basespace, + returnBreaks = TRUE, + baseValue = 0, + mzrange. = NULL) + brks <- pMat$breaks + pMat <- pMat$profMat ## rows are masses, cols are retention times/scans. + bin_size <- diff(brks[1:2]) + bin_half <- bin_size / 2 + ## Calculate the mean mass per bin using the breaks used for the binning. + ## Note: these define the real mass breaks as they have been used for the + ## binning. Simply using seq(floor...) as in the original code is wrong + ## because the mass bins are calculated wrongly. The bin size is != step, + ## bin size is marginally smaller and, for larger mz the correct mass + ## bin will be wrongly identified. + mass <- brks[-length(brks)] + bin_half ## midpoint for the breaks + mass_range <- range(mass) + + ## Prepare the result matrix. + cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "maxo") + rmat <- matrix(nrow = nrow(peakrange), ncol = length(cnames)) + colnames(rmat) <- cnames + + for (i in order(peakrange[, 1])) { + imz <- findRange(mass, c(peakrange[i, 1] - bin_half, + peakrange[i, 2] + bin_half), TRUE) + iret <- findRange(stime, peakrange[i, 3:4], TRUE) + idx_imz <- imz[1]:imz[2] + idx_iret <- iret[1]:iret[2] + ## Extract the intensity matrix for the mz-rt range: rows are mz, cols + ## rt values. + ymat <- pMat[idx_imz, idx_iret, drop = FALSE] + ## Define the maximum intensity, is one value per mz. + ymax <- colMax(ymat) + iymax <- which.max(ymax) + + ## The width in rt. + pwid <- diff(stime[iret])/diff(iret) + + ## Calculate sum across rt. For each mz we get one value. + rosm <- rowSums(ymat) + limz <- length(idx_imz) + if (length(rosm) != limz) { ## that happens for some reason + warning("weighted.mean : x and w must have the same length \n") + rosm <- rep(1, limz) ## fallback to mean + } + ## mean mz: + rmat[i, 1] <- weighted.mean(mass[idx_imz], rosm) ## mz; its not the + ## position of the largest intensity! + if (is.nan(rmat[i,1]) || is.na(rmat[i,1])) ## R2.11 : weighted.mean() + ## results in NA (not NaN) for zero weights + rmat[i, 1] <- mean(peakrange[i, 1:2]) + + rmat[i, 2:3] <- peakrange[i, 1:2] ## mzmin, mzmax + rmat[i, 4] <- stime[idx_iret][iymax] ## rt + rmat[i, 5:6] <- peakrange[i, 3:4] ## rtmin, rtmax + + if (peakrange[i, 3] < stime[1] || + peakrange[i, 4] > stime[length(stime)] || + is.nan(pwid)) { + warning("getPeaks: Peak m/z:", peakrange[i, 1], "-", + peakrange[i, 2], ", RT:", peakrange[i, 3], "-", + peakrange[i, 4], "is out of retention time range for ", + "this sample (", object@filepath, + "), using zero intensity value.\n") + rmat[i, 7:8] <- 0 + } else { + rmat[i, 7] <- pwid * sum(ymax) ## into + rmat[i, 8] <- ymax[iymax] ## maxo + } } - mass <- seq(mzrange.[1], mzrange.[2], by = step) - mlength <- length(mass) - ## Calculate the "real" bin size; old xcms code oddity that that's different - ## from step. - bin_size <- (mass[mlength] - mass[1]) / (mlength - 1) - ## for profIntLinM we have to use the old code. - if (impute == "intlin") { - profFun <- "profIntLinM" - profp <- list() - scanindex <- valueCount2ScanIndex(valsPerSpect) - buf <- do.call(profFun, args = list(mz, int, - scanindex, mlength, - mass[1], mass[mlength], - TRUE)) - } else { - ## Binning the data. - toIdx <- cumsum(valsPerSpect) - fromIdx <- c(1L, toIdx[-length(toIdx)] + 1L) - shiftBy <- TRUE - binFromX <- min(mass) - binToX <- max(mass) - brks <- breaks_on_nBins(fromX = binFromX, toX = binToX, - nBins = mlength, shiftByHalfBinSize = TRUE) - binRes <- binYonX(mz, int, - breaks = brks, - fromIdx = fromIdx, - toIdx = toIdx, - baseValue = ifelse(impute == "none", yes = 0, no = NA), - sortedX = TRUE, - returnIndex = FALSE - ) - if (length(toIdx) == 1) - binRes <- list(binRes) - ## Missing value imputation. - if (impute == "linbase") { - ## need arguments distance and baseValue. - if (length(basespace) > 0) { - if (!is.numeric(basespace)) - stop("'basespace' has to be numeric!") - distance <- floor(basespace[1] / bin_size) - } else { - distance <- floor(0.075 / bin_size) - } - if (length(baselevel) > 0) { - if (!is.numeric(baselevel)) - stop("'baselevel' has to be numeric!") - baseValue <- baselevel - } else { - baseValue <- min(int, na.rm = TRUE) / 2 - } + invisible(rmat) +} + +#' @description Original getPeaks function. This should be removed at some point +#' as it uses deprecated API. +#' @noRd +.getPeaks_orig <- function(object, peakrange, step = 0.1) { + profFun <- match.profFun(object) + if (all(c("mzmin","mzmax","rtmin","rtmax") %in% colnames(peakrange))) + peakrange <- peakrange[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE] + stime <- object@scantime + +### Create EIC buffer + ## This is NOT calculated for the full file. + mrange <- range(peakrange[,1:2]) + ## These mass bins are slightly different from the ones that are used + ## by the binning function, since within the binning function the step/bin + ## size is recalculated! + mass <- seq(floor(mrange[1]/step)*step, ceiling(mrange[2]/step)*step, by = step) + bufsize <- min(100, length(mass)) + buf <- profFun(object@env$mz, object@env$intensity, object@scanindex, + bufsize, mass[1], mass[bufsize], TRUE, object@profparam) + bufidx <- integer(length(mass)) + idxrange <- c(1, bufsize) + bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize + + cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "maxo") + rmat <- matrix(nrow = nrow(peakrange), ncol = length(cnames)) + colnames(rmat) <- cnames + + for (i in order(peakrange[,1])) { + imz <- findRange(mass, c(peakrange[i,1]-.5*step, peakrange[i,2]+.5*step), TRUE) + iret <- findRange(stime, peakrange[i,3:4], TRUE) + +### Update EIC buffer if necessary + if (bufidx[imz[2]] == 0) { + bufidx[idxrange[1]:idxrange[2]] <- 0 + idxrange <- c(max(1, imz[1]), min(bufsize+imz[1]-1, length(mass))) + bufidx[idxrange[1]:idxrange[2]] <- 1:(diff(idxrange)+1) + buf <- profFun(object@env$mz, object@env$intensity, object@scanindex, + diff(idxrange)+1, mass[idxrange[1]], mass[idxrange[2]], + TRUE, object@profparam) + } + ## Extract the intensity matrix for the mz-rt range: rows are mz, cols + ## rt values. + ymat <- buf[bufidx[imz[1]:imz[2]],iret[1]:iret[2],drop=FALSE] + ## Define the maximum intensity, is one value per mz. + ymax <- colMax(ymat) + iymax <- which.max(ymax) + + ## The width in rt. + pwid <- diff(stime[iret])/diff(iret) + + ## Calculate sum across rt. For each mz we get one value. + rosm <- rowSums(ymat) + limz <- length(imz[1]:imz[2]) + if (length(rosm) != limz) { ## that happens for some reason + warning("weighted.mean : x and w must have the same length \n") + rosm <- rep(1, limz) ## fallback to mean + } + ## mean mz: + rmat[i,1] <- weighted.mean(mass[imz[1]:imz[2]], rosm) ## mz; its not the + ## position of the largest intensity! + if (is.nan(rmat[i,1]) || is.na(rmat[i,1])) ## R2.11 : weighted.mean() results in NA (not NaN) for zero weights + rmat[i,1] <- mean(peakrange[i,1:2]) + + rmat[i,2:3] <- peakrange[i,1:2] ## mzmin, mzmax + rmat[i,4] <- stime[iret[1]:iret[2]][iymax] ## rt + rmat[i,5:6] <- peakrange[i,3:4] ## rtmin, rtmax + + if (peakrange[i,3] < stime[1] || peakrange[i,4] > stime[length(stime)] || is.nan(pwid)) { + warning("getPeaks: Peak m/z:",peakrange[i,1],"-",peakrange[i,2], ", RT:",peakrange[i,3],"-",peakrange[i,4], + "is out of retention time range for this sample (",object@filepath,"), using zero intensity value.\n") + rmat[i,7:8] <- 0 } else { - distance <- 0 - baseValue <- 0 + rmat[i,7] <- pwid*sum(ymax) ## into + rmat[i,8] <- ymax[iymax] ## maxo } - binVals <- lapply(binRes, function(z) { - return(imputeLinInterpol(z$y, method = impute, distance = distance, - noInterpolAtEnds = TRUE, - baseValue = baseValue)) - }) - buf <- do.call(cbind, binVals) } - buf + invisible(rmat) + } diff --git a/R/functions-xcmsSet.R b/R/functions-xcmsSet.R index 9d14de099..aba8d0b8b 100644 --- a/R/functions-xcmsSet.R +++ b/R/functions-xcmsSet.R @@ -1,5 +1,5 @@ ## Functions for xcmsSet objects. -#' @include DataClasses.R do_detectFeatures-functions.R +#' @include DataClasses.R do_findChromPeaks-functions.R ## The "constructor" ## The "new" xcmsSet method using BiocParallel. @@ -10,9 +10,11 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, progressCallback=NULL, scanrange=NULL, BPPARAM=bpparam(), stopOnError = TRUE, ...) { - if (nSlaves != 0) - warning("Use of argument 'nSlaves' is deprecated!", - " Please use 'BPPARAM' instead.") + if (nSlaves != 0) { + message("Use of argument 'nSlaves' is deprecated,", + " please use 'BPPARAM' instead.") + options(mc.cores = nSlaves) + } if (!is.logical(stopOnError)) stop("'stopOnError' has to be a logical.") ## Overwriting the stop.on.error in BPPARAM: @@ -41,16 +43,21 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, if (is.null(files)) files <- getwd() info <- file.info(files) - listed <- list.files(files[info$isdir], pattern = filepattern, - recursive = TRUE, full.names = TRUE) - files <- c(files[!info$isdir], listed) + if (any(info$isdir)) { + message("Scanning files in directory ", files[info$isdir], " ... ", + appendLF = FALSE) + listed <- list.files(files[info$isdir], pattern = filepattern, + recursive = TRUE, full.names = TRUE) + message("found ", length(listed), " files") + files <- c(files[!info$isdir], listed) + } ## try making paths absolute files_abs <- file.path(getwd(), files) exists <- file.exists(files_abs) files[exists] <- files_abs[exists] if (length(files) == 0 | all(is.na(files))) stop("No NetCDF/mzXML/mzData/mzML files were found.\n") - + if(lockMassFreq==TRUE){ ## remove the 02 files if there here lockMass.files<-grep("02.CDF", files) @@ -146,7 +153,7 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, ## Error identifying features in <>: Error:... -> info + error isOK <- bpok(res) if (all(!isOK)) - stop("Feature detection failed for all files!", + stop("Chromatographic peak detection failed for all files!", " The first error was: ", res[[1]]) if (any(!isOK)) { ## Use scantime from a working file one of the failing ones. @@ -170,24 +177,24 @@ xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, warning("Only 1 peak found in sample ", snames[i], ".") else if (nrow(pks) < 5) warning("Only ", nrow(pks), " found in sample ", snames[i], ".") - proclist[[i]] <- ProcessHistory(info. = paste0("Feature detection in '", + proclist[[i]] <- ProcessHistory(info. = paste0("Peak detection in '", basename(files[i]), "': ", nrow(pks), - " features identified."), + " peaks identified."), date. = res[[i]]$date, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex. = i) } else { scntlist[[i]] <- scnt peaklist[[i]] <- NULL proclist[[i]] <- ProcessHistory(info. = paste0("Error identifying", - " features in '", + " peaks in '", basename(files[i]), "': ", res[[i]]), error. = res[[i]], - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex. = i) - warning("Feature detection failed in '", files[i], "':", res[[i]]) + warning("Peak detection failed in '", files[i], "':", res[[i]]) } } ## peaklist <- lapply(res, function(x) x$peaks) @@ -237,9 +244,13 @@ c.xcmsSet <- function(...) { rtraw <- c(rtraw, lcsets[[i]]@rt$raw) rtcor <- c(rtcor, lcsets[[i]]@rt$corrected) - sampidx <- seq(along = namelist[[i]]) + nsamp - peaklist[[i]][,"sample"] <- sampidx[peaklist[[i]][,"sample"]] - nsamp <- nsamp + length(namelist[[i]]) + ## Update samples only if we've got any peaks. Issue #133 + if (nrow(peaks(lcsets[[i]]))) { + sampidx <- seq(along = namelist[[i]]) + nsamp + peaklist[[i]][,"sample"] <- sampidx[peaklist[[i]][,"sample"]] + ## Don't increment if we don't have any peaks + nsamp <- nsamp + length(namelist[[i]]) + } if (.hasSlot(lcsets[[i]], ".processHistory")) { ph <- .getProcessHistory(lcsets[[i]]) if (length(ph) > 0) { @@ -387,33 +398,39 @@ phenoDataFromPaths <- function(paths) { ## patternVsRowScore patternVsRowScore <- function(currPeak, parameters, mplenv) { - mplistmeanCurr <- mplenv$mplistmean[,c("mz","rt")] - mplistmeanCurr[,"mz"] <- mplistmeanCurr[,"mz"] * parameters$mzVsRTBalance - peakmatCurr <- mplenv$peakmat[currPeak,c("mz","rt"),drop=FALSE] - peakmatCurr[,"mz"] <- peakmatCurr[,"mz"] * parameters$mzVsRTBalance + mplistmeanCurr <- mplenv$mplistmean[, c("mz", "rt")] + mplistmeanCurr[, "mz"] <- mplistmeanCurr[, "mz"] * parameters$mzVsRTBalance + peakmatCurr <- mplenv$peakmat[currPeak, c("mz", "rt"), drop = FALSE] + peakmatCurr[, "mz"] <- peakmatCurr[, "mz"] * parameters$mzVsRTBalance - nnDist <- nn2(mplistmeanCurr,peakmatCurr[,c("mz","rt"),drop=FALSE], - k=min(length(mplistmeanCurr[,1]),parameters$knn)) + nnDist <- nn2(mplistmeanCurr, peakmatCurr[, c("mz", "rt"), drop = FALSE], + k = min(length(mplistmeanCurr[, 1]), parameters$knn)) - scoreListcurr <- data.frame(score=numeric(0),peak=integer(0), mpListRow=integer(0), - isJoinedPeak=logical(0), isJoinedRow=logical(0)) + scoreListcurr <- data.frame(score = numeric(0), + peak = integer(0), + mpListRow = integer(0), + isJoinedPeak = logical(0), + isJoinedRow = logical(0)) rtTolerance = parameters$rtcheck - for(mplRow in 1:length(nnDist$nn.idx)){ - mplistMZ <- mplenv$mplistmean[nnDist$nn.idx[mplRow],"mz"] - mplistRT <- mplenv$mplistmean[nnDist$nn.idx[mplRow],"rt"] - - ## Calculate differences between M/Z and RT values of current peak and median of the row - diffMZ = abs(mplistMZ-mplenv$peakmat[[currPeak,"mz"]]) - diffRT = abs(mplistRT-mplenv$peakmat[[currPeak,"rt"]]) + for (mplRow in 1:length(nnDist$nn.idx)) { + mplistMZ <- mplenv$mplistmean[nnDist$nn.idx[mplRow], "mz"] + mplistRT <- mplenv$mplistmean[nnDist$nn.idx[mplRow], "rt"] + + ## Calculate differences between M/Z and RT values of current peak and + ## median of the row + diffMZ = abs(mplistMZ - mplenv$peakmat[[currPeak, "mz"]]) + diffRT = abs(mplistRT - mplenv$peakmat[[currPeak, "rt"]]) ## Calculate if differences within tolerancdiffRT < rtTolerance)es - if ( (diffMZ < parameters$mzcheck)& (diffRT < rtTolerance) ) { + if ( (diffMZ < parameters$mzcheck) & (diffRT < rtTolerance) ) { scoreListcurr <- rbind(scoreListcurr, - data.frame(score=nnDist$nn.dists[mplRow], - peak=currPeak, mpListRow=nnDist$nn.idx[mplRow], - isJoinedPeak=FALSE, isJoinedRow=FALSE)) + data.frame(score = nnDist$nn.dists[mplRow], + peak = currPeak, + mpListRow = nnDist$nn.idx[mplRow], + isJoinedPeak = FALSE, + isJoinedRow = FALSE)) ## goodEnough = true return(scoreListcurr) } @@ -744,33 +761,33 @@ filtfft <- function(y, filt) { ## .validProcessHistory ## Check the validity of the .processHistory slot. .validProcessHistory <- function(x) { - msg <- validMsg(NULL, NULL) + msg <- character() if (.hasSlot(x, ".processHistory")) { if (length(x@.processHistory) > 0) { ## All elements have to inherit from ProcessHistory if (!all(unlist(lapply(x@.processHistory, function(z) { return(inherits(z, "ProcessHistory")) })))) - msg <- validMsg(msg, paste0("All objects in slot .processHistory", - " have to be 'ProcessHistory' objects!")) + msg <- c(msg, paste0("All objects in slot .processHistory", + " have to be 'ProcessHistory' objects!")) ## Each element has to be valid vals <- lapply(x@.processHistory, validObject) for (i in seq_along(vals)) { if (!is.logical(vals[[i]])) - msg <- validMsg(msg, vals[[i]]) + msg <- c(msg, vals[[i]]) } ## The fileIndex has to be within 1:length(filepaths(x)) fidx <- 1:length(filepaths(x)) for (z in x@.processHistory) { if (length(z@fileIndex) == 0 | !(all(z@fileIndex %in% fidx))) - msg <- validMsg(msg, paste0("Value of 'fileIndex' slot of some", - " ProcessHistory objects does not", - " match the number of available", - " files!")) + msg <- c(msg, paste0("Value of 'fileIndex' slot of some", + " ProcessHistory objects does not", + " match the number of available", + " files!")) } } } - if (is.null(msg)) TRUE - else msg + if (length(msg)) msg + else TRUE } diff --git a/R/init.R b/R/init.R index 9e6529724..990acc935 100644 --- a/R/init.R +++ b/R/init.R @@ -70,17 +70,17 @@ ## getEIC method getEIC.method="getEICOld" - + ## Sort method; see issue #180 for MSnbase - sortMeth <- "auto" - if (as.numeric(R.Version()$major) >= 3 & as.numeric(R.Version()$minor) >= 3) - sortMeth <- "radix" + ## sortMeth <- "auto" + ## if (as.numeric(R.Version()$major) >= 3 & as.numeric(R.Version()$minor) >= 3) + ## sortMeth <- "radix" xcms.opt <- list(findPeaks.method=findPeaks.method, findPeaks.methods=findPeaks.methods, group.method=group.method, group.methods=group.methods, retcor.method=retcor.method, retcor.methods=retcor.methods, fillPeaks.method=fillPeaks.method, fillPeaks.methods=fillPeaks.methods, specDist.methods=specDist.methods, getEIC.method=getEIC.method, - useOriginalCode = FALSE, sortMethod = sortMeth) + useOriginalCode = FALSE) class(xcms.opt) <- "BioCPkg" diff --git a/R/methods-Chromatogram.R b/R/methods-Chromatogram.R new file mode 100644 index 000000000..61dae643d --- /dev/null +++ b/R/methods-Chromatogram.R @@ -0,0 +1,213 @@ +#' @include DataClasses.R functions-Chromatogram.R functions-utils.R + +setMethod("initialize", "Chromatogram", function(.Object, ...) { + classVersion(.Object)["Chromatogram"] <- "0.0.1" + callNextMethod(.Object, ...) +}) + + +#' @rdname Chromatogram-class +setMethod("show", "Chromatogram", function(object) { + cat("Object of class: ", class(object), "\n", sep = "") + if (length(object@aggregationFun)) + cat(names(.SUPPORTED_AGG_FUN_CHROM)[.SUPPORTED_AGG_FUN_CHROM == + object@aggregationFun], "\n") + cat("length of object: ", length(object@rtime), "\n", sep = "") + cat("from file: ", object@fromFile, "\n", sep = "") + cat("mz range: [", object@mz[1], ", ", object@mz[2], "]\n", sep = "") + if (length(object@rtime) > 0) { + rtr <- range(object@rtime) + cat("rt range: [", rtr[1], ", ", rtr[2], "]\n", sep = "") + } +}) + +## Methods: + +## rtime +#' @description \code{rtime} returns the retention times for the rentention time +#' - intensity pairs stored in the chromatogram. +#' +#' @param object A \code{Chromatogram} object. +#' +#' @rdname Chromatogram-class +setMethod("rtime", "Chromatogram", function(object) { + return(object@rtime) +}) + +## intensity +#' @description \code{intensity} returns the intensity for the rentention time +#' - intensity pairs stored in the chromatogram. +#' +#' @rdname Chromatogram-class +setMethod("intensity", "Chromatogram", function(object) { + return(object@intensity) +}) + +## mz +#' @description \code{mz} get the mz (range) of the chromatogram. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @param filter For \code{mz}: whether the mz range used to filter the +#' original object should be returned (\code{filter = TRUE}), or the mz +#' range calculated on the real data (\code{filter = FALSE}). +#' +#' @rdname Chromatogram-class +setMethod("mz", "Chromatogram", function(object, filter = FALSE) { + if (filter) + return(object@filterMz) + return(object@mz) +}) +## #' @rdname Chromatogram-class +## setReplaceMethod("mz", "CentWaveParam", function(object, value) { +## object@mzrange <- value +## if (validObject(object)) +## return(object) +## }) + +#' @description \code{precursorMz} get the mz of the precursor ion. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @rdname Chromatogram-class +setMethod("precursorMz", "Chromatogram", function(object) { + return(object@precursorMz) +}) + +#' @aliases productMz +#' +#' @description \code{productMz} get the mz of the product chromatogram/ion. The +#' function returns a \code{numeric(2)} with the lower and upper mz value. +#' +#' @rdname Chromatogram-class +setMethod("productMz", "Chromatogram", function(object) { + return(object@productMz) +}) + +## aggregationFun +#' @aliases aggregationFun +#' +#' @description \code{aggregationFun,aggregationFun<-} get or set the +#' aggregation function. +#' +#' @rdname Chromatogram-class +setMethod("aggregationFun", "Chromatogram", function(object) { + return(object@aggregationFun) +}) +## #' @rdname Chromatogram-class +## setReplaceMethod("aggregationFun", "CentWaveParam", function(object, value) { +## object@aggregationFun <- value +## if (validObject(object)) +## return(object) +## }) + +## fromFile +#' @description \code{fromFile} returns the value from the \code{fromFile} slot. +#' +#' @rdname Chromatogram-class +setMethod("fromFile", "Chromatogram", function(object) { + return(object@fromFile) +}) + +## length +#' @description \code{length} returns the length (number of retention time - +#' intensity pairs) of the chromatogram. +#' +#' @param x For \code{as.data.frame} and \code{length}: a \code{Chromatogram} +#' object. +#' +#' @rdname Chromatogram-class +setMethod("length", "Chromatogram", function(x) { + return(length(x@rtime)) +}) + +## as.data.frame +#' @description \code{as.data.frame} returns the \code{rtime} and +#' \code{intensity} values from the object as \code{data.frame}. +#' +#' @rdname Chromatogram-class +setMethod("as.data.frame", "Chromatogram", function(x) { + return(data.frame(rtime = x@rtime, intensity = x@intensity)) +}) + +#' @description \code{filterRt}: filters the chromatogram based on the provided +#' retention time range. +#' +#' @param rt For \code{filterRt}: \code{numeric(2)} defining the lower and +#' upper retention time for the filtering. +#' +#' @rdname Chromatogram-class +#' +#' @examples +#' +#' ## Create a simple Chromatogram object based on random values. +#' chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), +#' rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) +#' chr +#' +#' ## Get the intensities +#' head(intensity(chr)) +#' +#' ## Get the retention time +#' head(rtime(chr)) +#' +#' ## What is the retention time range of the object? +#' range(rtime(chr)) +#' +#' ## Filter the chromatogram to keep only values between 4 and 10 seconds +#' chr2 <- filterRt(chr, rt = c(4, 10)) +#' +#' range(rtime(chr2)) +setMethod("filterRt", "Chromatogram", function(object, rt) { + if (missing(rt)) + return(object) + rt <- range(rt) + ## Use which to be robust against NAs + keep_em <- which(rtime(object) >= rt[1] & rtime(object) <= rt[2]) + if (length(keep_em)) { + object@rtime <- rtime(object)[keep_em] + object@intensity <- intensity(object)[keep_em] + } else { + object@rtime <- numeric() + object@intensity <- numeric() + } + if (validObject(object)) + object +}) + +#' @description \code{clean}: \emph{cleans} a \code{Chromatogram} class by +#' removing all \code{0} and \code{NA} intensity signals (along with the +#' associates retention times). By default (if \code{all = FALSE}) \code{0} +#' values that are directly adjacent to peaks are kept too. \code{NA} +#' values are always removed. +#' +#' @param all For \code{clean}: \code{logical(1)} whether all \code{0} intensity +#' value pairs should be removed (defaults to \code{FALSE}). +#' +#' @return For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. +#' +#' @rdname Chromatogram-class +#' +#' @examples +#' +#' ## Create a simple Chromatogram object +#' +#' chr <- Chromatogram(rtime = 1:12, +#' intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) +#' +#' ## Remove 0-intensity values keeping those adjacent to peaks +#' chr <- clean(chr) +#' intensity(chr) +#' +#' ## Remove all 0-intensity values +#' chr <- clean(chr, all = TRUE) +#' intensity(chr) +setMethod("clean", signature = signature("Chromatogram"), + function(object, all = FALSE) { + if (all) + keep <- which(object@intensity > 0) + else + keep <- which(.grow_trues(object@intensity > 0)) + object@intensity <- object@intensity[keep] + object@rtime <- object@rtime[keep] + if (validObject(object)) + object + }) diff --git a/R/methods-MsFeatureData.R b/R/methods-MsFeatureData.R index 6ffe3ee2f..254eccc2f 100644 --- a/R/methods-MsFeatureData.R +++ b/R/methods-MsFeatureData.R @@ -1,5 +1,5 @@ ## Methods for the MsFeatureData class. -#' @include functions-MsFeatureData.R +#' @include functions-MsFeatureData.R do_adjustRtime-functions.R setMethod("initialize", "MsFeatureData", function(.Object, ...) { classVersion(.Object)["MsFeatureData"] <- "0.0.1" @@ -21,24 +21,27 @@ setMethod("show", "MsFeatureData", function(object) { } }) -## features: getter and setter for the features matrix. -## featureGroups: getter and setter for the featureGroups DataFrame. +## (features) chromPeaks: getter and setter for the chromatographic peaks matrix. +## (featureDefinitions) featureDefinitions: getter and setter for the features DataFrame. ## adjustedRtime: getter and setter for the adjustedRtime list. ##' @rdname XCMSnExp-class setMethod("hasAdjustedRtime", "MsFeatureData", function(object) { - return(any(ls(object) == "adjustedRtime")) + return(!is.null(object$adjustedRtime)) + ## return(any(ls(object) == "adjustedRtime")) }) ##' @rdname XCMSnExp-class -setMethod("hasAlignedFeatures", "MsFeatureData", function(object) { - return(any(ls(object) == "featureGroups")) +setMethod("hasFeatures", "MsFeatureData", function(object) { + return(!is.null(object$featureDefinitions)) + ## return(any(ls(object) == "featureDefinitions")) }) ##' @rdname XCMSnExp-class -setMethod("hasDetectedFeatures", "MsFeatureData", function(object) { - return(any(ls(object) == "features")) +setMethod("hasChromPeaks", "MsFeatureData", function(object) { + return(!is.null(object$chromPeaks)) + ## return(any(ls(object) == "chromPeaks")) }) ##' @rdname XCMSnExp-class @@ -56,47 +59,48 @@ setReplaceMethod("adjustedRtime", "MsFeatureData", function(object, value) { }) ##' @rdname XCMSnExp-class setMethod("dropAdjustedRtime", "MsFeatureData", function(object) { - if (hasAdjustedRtime(object)) + if (hasAdjustedRtime(object)) { rm(list = "adjustedRtime", envir = object) + } return(object) }) ##' @rdname XCMSnExp-class -setMethod("featureGroups", "MsFeatureData", function(object) { - if (hasAlignedFeatures(object)) - return(object$featureGroups) +setMethod("featureDefinitions", "MsFeatureData", function(object) { + if (hasFeatures(object)) + return(object$featureDefinitions) warning("No aligned feature information available.") return(NULL) }) ##' @rdname XCMSnExp-class -setReplaceMethod("featureGroups", "MsFeatureData", function(object, value) { - object$featureGroups <- value +setReplaceMethod("featureDefinitions", "MsFeatureData", function(object, value) { + object$featureDefinitions <- value if (validObject(object)) return(object) }) ##' @rdname XCMSnExp-class -setMethod("dropFeatureGroups", "MsFeatureData", function(object) { - if (hasAlignedFeatures(object)) - rm(list = "featureGroups", envir = object) +setMethod("dropFeatureDefinitions", "MsFeatureData", function(object) { + if (hasFeatures(object)) + rm(list = "featureDefinitions", envir = object) return(object) }) ##' @rdname XCMSnExp-class -setMethod("features", "MsFeatureData", function(object) { - if (hasDetectedFeatures(object)) - return(object$features) - warning("No detected features available.") +setMethod("chromPeaks", "MsFeatureData", function(object) { + if (hasChromPeaks(object)) + return(object$chromPeaks) + warning("No chromatographic peaks available.") return(NULL) }) ##' @rdname XCMSnExp-class -setReplaceMethod("features", "MsFeatureData", function(object, value) { - object$features <- value +setReplaceMethod("chromPeaks", "MsFeatureData", function(object, value) { + object$chromPeaks <- value if (validObject(object)) return(object) }) ##' @rdname XCMSnExp-class -setMethod("dropFeatures", "MsFeatureData", function(object) { - if (hasDetectedFeatures(object)) - rm(list = "features", envir = object) +setMethod("dropChromPeaks", "MsFeatureData", function(object) { + if (hasChromPeaks(object)) + rm(list = "chromPeaks", envir = object) return(object) }) diff --git a/R/methods-OnDiskMSnExp.R b/R/methods-OnDiskMSnExp.R index afb5fdbd3..ff0a8cc87 100644 --- a/R/methods-OnDiskMSnExp.R +++ b/R/methods-OnDiskMSnExp.R @@ -1,16 +1,16 @@ ## Methods for MSnbase's OnDiskMSnExp and MSnExp objects. -#' @include functions-OnDiskMSnExp.R do_detectFeatures-functions.R +#' @include DataClasses.R functions-OnDiskMSnExp.R do_findChromPeaks-functions.R ## Main roxygen documentation for the centWace feature detection is in ## DataClasses, before the definition of the CentWaveParam class. -## The centWave feature detection method for OnDiskMSnExp: -##' @title Feature detection using the centWave method +## The centWave peak detection method for OnDiskMSnExp: +##' @title Chromatographic peak detection using the centWave method ##' -##' @description The \code{detectFeatures,OnDiskMSnExp,CentWaveParam} method -##' performs feature detection using the \emph{centWave} algorithm on all -##' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +##' @description The \code{detectChromPeaks,OnDiskMSnExp,CentWaveParam} method +##' performs chromatographic peak detection using the \emph{centWave} algorithm +##' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. ##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific ##' data and load the spectra data (mz and intensity values) on the fly from the ##' original files applying also all eventual data manipulations. @@ -20,9 +20,9 @@ ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{detectFeatures}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' @@ -32,25 +32,25 @@ ##' @param BPPARAM A parameter class specifying if and how parallel processing ##' should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. ##' See documentation of the \code{BiocParallel} for more details. If parallel -##' processing is enables, feature detection is performed in parallel on several +##' processing is enables, peak detection is performed in parallel on several ##' of the input samples. ##' ##' @param return.type Character specifying what type of object the method should -##' return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or +##' return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or ##' \code{"xcmsSet"}. ##' -##' @return For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the feature detection. +##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified features/peaks. +##' samples with matrices specifying the identified peaks. ##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the feature detection. +##' with the results of the peak detection. ##' ##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the feature detection. +##' the peak detection. ##' -##' @rdname featureDetection-centWave -setMethod("detectFeatures", +##' @rdname findChromPeaks-centWave +setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWaveParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { return.type <- match.arg(return.type, c("XCMSnExp", "list", @@ -63,11 +63,12 @@ setMethod("detectFeatures", warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") ## (1) split the object per file. - ## (2) use bplapply to do the feature detection. + ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), filterFile, object = object), - FUN = detectFeatures_OnDiskMSnExp, - method = "centWave", param = param) + FUN = findChromPeaks_OnDiskMSnExp, + method = "centWave", param = param, + BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", @@ -77,13 +78,15 @@ setMethod("detectFeatures", ## that later, but for now seems reasonable to have it in one, ## since we're calling the method once on all. xph <- XProcessHistory(param = param, date. = startDate, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex = 1:length(fileNames(object))) object <- as(object, "XCMSnExp") object@.processHistory <- list(xph) - if (hasAdjustedRtime(object) | hasAlignedFeatures(object)) + if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - features(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) if (validObject(object)) return(object) } @@ -105,67 +108,68 @@ setMethod("detectFeatures", }) -## The centWave feature detection method for MSnExp: -##' @title Feature detection using the centWave method -##' -##' @description The \code{detectFeatures,MSnExp,CentWaveParam} method performs -##' feature detection using the \emph{centWave} algorithm on all samples from -##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -##' intensity values of all spectra hence no additional data input from the -##' original files is required. -##' -##' @rdname featureDetection-centWave -setMethod("detectFeatures", - signature(object = "MSnExp", param = "CentWaveParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ## Restrict to MS1 data. - ## Man, that's too slow! We're doing the MS1 restriction below. - ## object <- filterMsLevel(object, msLevel. = 1) - ## (1) split the spectra per file - this means we have a second - ## copy of the data, but there is no way around that as - ## filterFile is pretty slow on MSnExp. - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for feature detection!") - ## Check if the data is centroided - if (!isCentroided(object[[ms1_idx[1]]])) - warning("Your data appears to be not centroided! CentWave", - " works best on data in centroid mode.") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - ## (2) use bplapply to do the feature detection. - resList <- bplapply(spect_list, function(z) { - detectFeatures_Spectrum_list(z, - method = "centWave", - param = param) - }, BPPARAM = BPPARAM) - ## (3) collect the results. - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ## The centWave peak detection method for MSnExp: +## ##' @title Chromatographic peak detection using the centWave method +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,CentWaveParam} method performs +## ##' peak detection using the \emph{centWave} algorithm on all samples from +## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## ##' intensity values of all spectra hence no additional data input from the +## ##' original files is required. +## ##' +## ##' @rdname findChromPeaks-centWave +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "CentWaveParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ## Restrict to MS1 data. +## ## Man, that's too slow! We're doing the MS1 restriction below. +## ## object <- filterMsLevel(object, msLevel. = 1) +## ## (1) split the spectra per file - this means we have a second +## ## copy of the data, but there is no way around that as +## ## filterFile is pretty slow on MSnExp. +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## ## Check if the data is centroided +## if (!isCentroided(object[[ms1_idx[1]]])) +## warning("Your data appears to be not centroided! CentWave", +## " works best on data in centroid mode.") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## ## (2) use bplapply to do the peak detection. +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "centWave", +## param = param) +## }, BPPARAM = BPPARAM) +## ## (3) collect the results. +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) -## The matchedFilter feature detection method for OnDiskMSnExp: +## The matchedFilter peak detection method for OnDiskMSnExp: ##' @title Peak detection in the chromatographic time domain ##' -##' @description The \code{detectFeatures,OnDiskMSnExp,MatchedFilterParam} -##' method performs feature detection using the \emph{matchedFilter} algorithm +##' @description The \code{findChromPeaks,OnDiskMSnExp,MatchedFilterParam} +##' method performs peak detection using the \emph{matchedFilter} algorithm ##' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. ##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific ##' data and load the spectra data (mz and intensity values) on the fly from the @@ -175,30 +179,30 @@ setMethod("detectFeatures", ##' be configured either by the \code{BPPARAM} parameter or by globally defining ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. - -##' @param object For \code{detectFeatures}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' ##' @param param An \code{MatchedFilterParam} object containing all settings for ##' the matchedFilter algorithm. ##' -##' @inheritParams featureDetection-centWave +##' @inheritParams findChromPeaks-centWave ##' -##' @return For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the feature detection. +##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified features/peaks. +##' samples with matrices specifying the identified peaks. ##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the feature detection. +##' with the results of the peak detection. ##' ##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the feature detection. +##' the chromatographic peak detection. ##' -##' @rdname featureDetection-matchedFilter -setMethod("detectFeatures", +##' @rdname findChromPeaks-matchedFilter +setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MatchedFilterParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { return.type <- match.arg(return.type, c("XCMSnExp", "list", @@ -207,11 +211,12 @@ setMethod("detectFeatures", ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) ## (1) split the object per file. - ## (2) use bplapply to do the feature detection. + ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), filterFile, object = object), - FUN = detectFeatures_OnDiskMSnExp, - method = "matchedFilter", param = param) + FUN = findChromPeaks_OnDiskMSnExp, + method = "matchedFilter", param = param, + BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", @@ -221,13 +226,18 @@ setMethod("detectFeatures", ## that later, but for now seems reasonable to have it in one, ## since we're calling the method once on all. xph <- XProcessHistory(param = param, date. = startDate, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex = 1:length(fileNames(object))) object <- as(object, "XCMSnExp") object@.processHistory <- list(xph) - if (hasAdjustedRtime(object) | hasAlignedFeatures(object)) + if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - features(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -248,56 +258,57 @@ setMethod("detectFeatures", } }) -##' @title Peak detection in the chromatographic time domain -##' -##' @description The \code{detectFeatures,MSnExp,MatchedFilterParam} method -##' performs feature detection using the \emph{matchedFilter} method on all -##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -##' mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname featureDetection-matchedFilter -setMethod("detectFeatures", - signature(object = "MSnExp", param = "MatchedFilterParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for feature detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - detectFeatures_Spectrum_list(z, - method = "matchedFilter", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Peak detection in the chromatographic time domain +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MatchedFilterParam} method +## ##' performs peak detection using the \emph{matchedFilter} method on all +## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## ##' mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findChromPeaks-matchedFilter +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MatchedFilterParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "matchedFilter", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## massifquant -## The massifquant feature detection method for OnDiskMSnExp: -##' @title Feature detection using the massifquant method +## The massifquant peak detection method for OnDiskMSnExp: +##' @title Chromatographic peak detection using the massifquant method ##' -##' @description The \code{detectFeatures,OnDiskMSnExp,MassifquantParam} -##' method performs feature detection using the \emph{massifquant} algorithm -##' on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +##' @description The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} +##' method performs chromatographic peak detection using the \emph{massifquant} +##' algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. ##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific ##' data and load the spectra data (mz and intensity values) on the fly from the ##' original files applying also all eventual data manipulations. @@ -307,29 +318,29 @@ setMethod("detectFeatures", ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{detectFeatures}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' ##' @param param An \code{MassifquantParam} object containing all settings for ##' the massifquant algorithm. ##' -##' @inheritParams featureDetection-centWave +##' @inheritParams findChromPeaks-centWave ##' -##' @return For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the feature detection. +##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified features/peaks. +##' samples with matrices specifying the identified peaks. ##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the feature detection. +##' with the results of the peak detection. ##' ##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the feature detection. +##' the peak detection. ##' -##' @rdname featureDetection-massifquant -setMethod("detectFeatures", +##' @rdname findChromPeaks-massifquant +setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MassifquantParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { return.type <- match.arg(return.type, c("XCMSnExp", "list", @@ -338,11 +349,12 @@ setMethod("detectFeatures", ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) ## (1) split the object per file. - ## (2) use bplapply to do the feature detection. + ## (2) use bplapply to do the peaks detection. resList <- bplapply(lapply(1:length(fileNames(object)), filterFile, object = object), - FUN = detectFeatures_OnDiskMSnExp, - method = "massifquant", param = param) + FUN = findChromPeaks_OnDiskMSnExp, + method = "massifquant", param = param, + BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", @@ -352,13 +364,18 @@ setMethod("detectFeatures", ## that later, but for now seems reasonable to have it in one, ## since we're calling the method once on all. xph <- XProcessHistory(param = param, date. = startDate, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex = 1:length(fileNames(object))) object <- as(object, "XCMSnExp") object@.processHistory <- list(xph) - if (hasAdjustedRtime(object) | hasAlignedFeatures(object)) + if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - features(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -380,56 +397,57 @@ setMethod("detectFeatures", }) -##' @title Feature detection using the massifquant method -##' -##' @description The \code{detectFeatures,MSnExp,MassifquantParam} method -##' performs feature detection using the \emph{massifquant} method on all -##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -##' mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname featureDetection-massifquant -setMethod("detectFeatures", - signature(object = "MSnExp", param = "MassifquantParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for feature detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - detectFeatures_Spectrum_list(z, - method = "massifquant", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Chromatographic peak detection using the massifquant method +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MassifquantParam} method +## ##' performs chromatographic peak detection using the \emph{massifquant} method +## ##' on all samples from an \code{\link[MSnbase]{MSnExp}} object. These objects +## ##' contain mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findChromPeaks-massifquant +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MassifquantParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "massifquant", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) ## MSW -## The MSW feature detection method for OnDiskMSnExp: -##' @title Single-spectrum non-chromatography MS data feature detection +## The MSW peak detection method for OnDiskMSnExp: +##' @title Single-spectrum non-chromatography MS data peak detection ##' -##' @description The \code{detectFeatures,OnDiskMSnExp,MSWParam} -##' method performs feature detection in single-spectrum non-chromatography MS +##' @description The \code{findChromPeaks,OnDiskMSnExp,MSWParam} +##' method performs peak detection in single-spectrum non-chromatography MS ##' data using functionality from the \code{MassSpecWavelet} package on all ##' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. ##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific @@ -441,43 +459,49 @@ setMethod("detectFeatures", ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @param object For \code{detectFeatures}: Either an -##' \code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -##' object containing the MS- and all other experiment-relevant data. +##' @param object For \code{findChromPeaks}: an +##' \code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +##' experiment-relevant data. ##' ##' For all other methods: a parameter object. ##' ##' @param param An \code{MSWParam} object containing all settings for ##' the algorithm. ##' -##' @inheritParams featureDetection-centWave +##' @inheritParams findChromPeaks-centWave ##' -##' @return For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the feature detection. +##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified features/peaks. +##' samples with matrices specifying the identified peaks. ##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the feature detection. +##' with the results of the detection. ##' ##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the feature detection. +##' the peak detection. ##' -##' @rdname featureDetection-MSW -setMethod("detectFeatures", +##' @rdname findPeaks-MSW +setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "MSWParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { return.type <- match.arg(return.type, c("XCMSnExp", "list", "xcmsSet")) startDate <- date() - ## TODO @jo: ensure that we're having single spectra files! ## Restrict to MS1 data. object <- filterMsLevel(object, msLevel. = 1) + + rts <- split(rtime(object), f = fromFile(object)) + if (any(lengths(rts)) > 1) + stop("The MSW method can only be applied to single spectrum,", + " non-chromatogrphic, files (i.e. with a single ", + "retention time).") ## (1) split the object per file. - ## (2) use bplapply to do the feature detection. + ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), filterFile, object = object), - FUN = detectFeatures_MSW_OnDiskMSnExp, - method = "MSW", param = param) + FUN = findPeaks_MSW_OnDiskMSnExp, + method = "MSW", param = param, + BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", @@ -487,13 +511,18 @@ setMethod("detectFeatures", ## that later, but for now seems reasonable to have it in one, ## since we're calling the method once on all. xph <- XProcessHistory(param = param, date. = startDate, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex = 1:length(fileNames(object))) object <- as(object, "XCMSnExp") object@.processHistory <- list(xph) - if (hasAdjustedRtime(object) | hasAlignedFeatures(object)) + if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - features(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -514,79 +543,82 @@ setMethod("detectFeatures", } }) -##' @title Single-spectrum non-chromatography MS data feature detection -##' -##' @description The \code{detectFeatures,MSnExp,MSWParam} method -##' performs feature detection in single-spectrum non-chromatography MS -##' data using functionality from the \code{MassSpecWavelet} package on all -##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -##' mz and intensity values of all spectra hence no additional -##' data input from the original files is required. -##' -##' @rdname featureDetection-MSW -setMethod("detectFeatures", - signature(object = "MSnExp", param = "MSWParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for feature detection!") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - resList <- bplapply(spect_list, function(z) { - detectFeatures_MSW_Spectrum_list(z, - method = "MSW", - param = param) - }, BPPARAM = BPPARAM) - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) - } - }) +## ##' @title Single-spectrum non-chromatography MS data peak detection +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,MSWParam} method +## ##' performs peak detection in single-spectrum non-chromatography MS +## ##' data using functionality from the \code{MassSpecWavelet} package on all +## ##' samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain +## ##' mz and intensity values of all spectra hence no additional +## ##' data input from the original files is required. +## ##' +## ##' @rdname findPeaks-MSW +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "MSWParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## resList <- bplapply(spect_list, function(z) { +## findPeaks_MSW_Spectrum_list(z, +## method = "MSW", +## param = param) +## }, BPPARAM = BPPARAM) +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) -## The centWave with predicted isotope feature detection method for OnDiskMSnExp: -##' @title Two-step centWave feature detection considering also feature isotopes +## The centWave with predicted isotope peak detection method for OnDiskMSnExp: +##' @title Two-step centWave peak detection considering also isotopes ##' -##' @description The \code{detectFeatures,OnDiskMSnExp,CentWavePredIsoParam} method -##' performs a two-step centWave-based feature detection on all samples from an -##' \code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} -##' objects encapsule all experiment specific data and load the spectra data -##' (mz and intensity values) on the fly from the original files applying also -##' all eventual data manipulations. +##' @description The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} method +##' performs a two-step centWave-based chromatographic peak detection on all +##' samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +##' \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific +##' data and load the spectra data (mz and intensity values) on the fly from +##' the original files applying also all eventual data manipulations. ##' ##' @details Parallel processing (one process per sample) is supported and can ##' be configured either by the \code{BPPARAM} parameter or by globally defining ##' the parallel processing mode using the \code{\link[BiocParallel]{register}} ##' method from the \code{BiocParallel} package. ##' -##' @inheritParams featureDetection-centWave -##' -##' @return For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -##' \code{\link{XCMSnExp}} object with the results of the feature detection. +##' @param param An \code{CentWavePredIsoParam} object with the settings for the +##' chromatographic peak detection algorithm. +##' @inheritParams findChromPeaks-centWave +##' +##' @return For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +##' \code{\link{XCMSnExp}} object with the results of the peak detection. ##' If \code{return.type = "list"} a list of length equal to the number of -##' samples with matrices specifying the identified features/peaks. +##' samples with matrices specifying the identified peaks. ##' If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -##' with the results of the feature detection. +##' with the results of the peak detection. ##' ##' @seealso \code{\link{XCMSnExp}} for the object containing the results of -##' the feature detection. +##' the peak detection. ##' -##' @rdname featureDetection-centWaveWithPredIsoROIs -setMethod("detectFeatures", +##' @rdname findChromPeaks-centWaveWithPredIsoROIs +setMethod("findChromPeaks", signature(object = "OnDiskMSnExp", param = "CentWavePredIsoParam"), function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { return.type <- match.arg(return.type, c("XCMSnExp", "list", @@ -599,11 +631,12 @@ setMethod("detectFeatures", warning("Your data appears to be not centroided! CentWave", " works best on data in centroid mode.") ## (1) split the object per file. - ## (2) use bplapply to do the feature detection. + ## (2) use bplapply to do the peak detection. resList <- bplapply(lapply(1:length(fileNames(object)), filterFile, object = object), - FUN = detectFeatures_OnDiskMSnExp, - method = "centWaveWithPredIsoROIs", param = param) + FUN = findChromPeaks_OnDiskMSnExp, + method = "centWaveWithPredIsoROIs", param = param, + BPPARAM = BPPARAM) ## (3) collect the results. res <- .processResultList(resList, getProcHist = return.type == "xcmsSet", @@ -613,13 +646,18 @@ setMethod("detectFeatures", ## that later, but for now seems reasonable to have it in one, ## since we're calling the method once on all. xph <- XProcessHistory(param = param, date. = startDate, - type. = .PROCSTEP.FEATURE.DETECTION, + type. = .PROCSTEP.PEAK.DETECTION, fileIndex = 1:length(fileNames(object))) object <- as(object, "XCMSnExp") object@.processHistory <- list(xph) - if (hasAdjustedRtime(object) | hasAlignedFeatures(object)) + if (hasAdjustedRtime(object) | hasFeatures(object)) object@msFeatureData <- new("MsFeatureData") - features(object) <- do.call(rbind, res$peaks) + pks <- do.call(rbind, res$peaks) + if (length(pks) > 0) + chromPeaks(object) <- cbind(pks, is_filled = 0) + ## ## chromPeaks(object) <- do.call(rbind, res$peaks) + ## chromPeaks(object) <- cbind(do.call(rbind, res$peaks), + ## is_filled = 0) if (validObject(object)) return(object) } @@ -641,58 +679,166 @@ setMethod("detectFeatures", }) -## The centWave with predicted isotope feature detection method for MSnExp: -##' @title Two-step centWave feature detection considering also feature isotopes -##' -##' @description The \code{detectFeatures,MSnExp,CentWavePredIsoParam} method -##' performs a two-step centWave-based feature detection on all samples from -##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -##' intensity values of all spectra hence no additional data input from the -##' original files is required. -##' -##' @rdname featureDetection-centWaveWithPredIsoROIs -setMethod("detectFeatures", - signature(object = "MSnExp", param = "CentWavePredIsoParam"), - function(object, param, BPPARAM = bpparam(), return.type = "list") { - return.type <- match.arg(return.type, c("list", "xcmsSet")) - ## Restrict to MS1 data. - ## Man, that's too slow! We're doing the MS1 restriction below. - ## object <- filterMsLevel(object, msLevel. = 1) - ## (1) split the spectra per file - this means we have a second - ## copy of the data, but there is no way around that as - ## filterFile is pretty slow on MSnExp. - ms1_idx <- which(unname(msLevel(object)) == 1) - if (length(ms1_idx) == 0) - stop("No MS1 spectra available for feature detection!") - ## Check if the data is centroided - if (!isCentroided(object[[ms1_idx[1]]])) - warning("Your data appears to be not centroided! CentWave", - " works best on data in centroid mode.") - spect_list <- split(spectra(object)[ms1_idx], - fromFile(object)[ms1_idx]) - ## (2) use bplapply to do the feature detection. - resList <- bplapply(spect_list, function(z) { - detectFeatures_Spectrum_list(z, - method = "centWaveWithPredIsoROIs", - param = param) - }, BPPARAM = BPPARAM) - ## (3) collect the results. - res <- .processResultList(resList, - getProcHist = return.type != "list", - fnames = fileNames(object)) - if (return.type == "list") - return(res$peaks) - if (return.type == "xcmsSet") { - xs <- .pSet2xcmsSet(object) - peaks(xs) <- do.call(rbind, res$peaks) - xs@.processHistory <- res$procHist - OK <- .validProcessHistory(xs) - if (!is.logical(OK)) - stop(OK) - if (!any(colnames(pData(object)) == "class")) - message("Note: you might want to set/adjust the", - " 'sampclass' of the returned xcmSet object", - " before proceeding with the analysis.") - return(xs) +## ## The centWave with predicted isotope peak detection method for MSnExp: +## ##' @title Two-step centWave peak detection considering also isotopes +## ##' +## ##' @description The \code{findChromPeaks,MSnExp,CentWavePredIsoParam} method +## ##' performs a two-step centWave-based peak detection on all samples from +## ##' an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and +## ##' intensity values of all spectra hence no additional data input from the +## ##' original files is required. +## ##' +## ##' @rdname findChromPeaks-centWaveWithPredIsoROIs +## setMethod("findChromPeaks", +## signature(object = "MSnExp", param = "CentWavePredIsoParam"), +## function(object, param, BPPARAM = bpparam(), return.type = "list") { +## return.type <- match.arg(return.type, c("list", "xcmsSet")) +## ## Restrict to MS1 data. +## ## Man, that's too slow! We're doing the MS1 restriction below. +## ## object <- filterMsLevel(object, msLevel. = 1) +## ## (1) split the spectra per file - this means we have a second +## ## copy of the data, but there is no way around that as +## ## filterFile is pretty slow on MSnExp. +## ms1_idx <- which(unname(msLevel(object)) == 1) +## if (length(ms1_idx) == 0) +## stop("No MS1 spectra available for chromatographic peak", +## " detection!") +## ## Check if the data is centroided +## if (!isCentroided(object[[ms1_idx[1]]])) +## warning("Your data appears to be not centroided! CentWave", +## " works best on data in centroid mode.") +## spect_list <- split(spectra(object)[ms1_idx], +## fromFile(object)[ms1_idx]) +## ## (2) use bplapply to do the peak detection. +## resList <- bplapply(spect_list, function(z) { +## findChromPeaks_Spectrum_list(z, +## method = "centWaveWithPredIsoROIs", +## param = param) +## }, BPPARAM = BPPARAM) +## ## (3) collect the results. +## res <- .processResultList(resList, +## getProcHist = return.type != "list", +## fnames = fileNames(object)) +## if (return.type == "list") +## return(res$peaks) +## if (return.type == "xcmsSet") { +## xs <- .pSet2xcmsSet(object) +## peaks(xs) <- do.call(rbind, res$peaks) +## xs@.processHistory <- res$procHist +## OK <- .validProcessHistory(xs) +## if (!is.logical(OK)) +## stop(OK) +## if (!any(colnames(pData(object)) == "class")) +## message("Note: you might want to set/adjust the", +## " 'sampclass' of the returned xcmSet object", +## " before proceeding with the analysis.") +## return(xs) +## } +## }) + +## profMat method for XCMSnExp/OnDiskMSnExp. +##' @description \code{profMat}: creates a \emph{profile matrix}, which +##' is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and +##' m (columns) the retention time of the corresponding scans. Each cell contains +##' the maximum intensity measured for the specific scan and m/z values. See +##' \code{\link{profMat}} for more details and description of the various binning +##' methods. +##' +##' @param ... Additional parameters. +##' +##' @return For \code{profMat}: a \code{list} with a the profile matrix +##' \code{matrix} (or matrices if \code{fileIndex} was not specified or if +##' \code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for general +##' help and information about the profile matrix. +##' +##' @inheritParams profMat-xcmsSet +##' +##' @rdname XCMSnExp-class +setMethod("profMat", signature(object = "OnDiskMSnExp"), function(object, + method = "bin", + step = 0.1, + baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + fileIndex, + ...) { + ## Subset the object by fileIndex. + if (!missing(fileIndex)) { + if (!is.numeric(fileIndex)) + stop("'fileIndex' has to be an integer.") + if (!all(fileIndex %in% seq_along(fileNames(object)))) + stop("'fileIndex' has to be an integer between 1 and ", + length(fileNames(object)), "!") + object <- filterFile(object, fileIndex) + } + ## Split it by file and bplapply over it to generate the profile matrix. + theF <- factor(seq_along(fileNames(object))) + theDots <- list(...) + if (any(names(theDots) == "returnBreaks")) + returnBreaks <- theDots$returnBreaks + else + returnBreaks <- FALSE + res <- bplapply(splitByFile(object, f = theF), function(z, bmethod, bstep, + bbaselevel, + bbasespace, + bmzrange., + breturnBreaks) { + require(xcms, quietly = TRUE) + ## Note: this is way faster than spectrapply with + ## as.data.frame! + sps <- spectra(z, BPPARAM = SerialParam()) + mzs <- lapply(sps, mz) + vps <- lengths(mzs, use.names = FALSE) + return(.createProfileMatrix(mz = unlist(mzs, use.names = FALSE), + int = unlist(lapply(sps, intensity), + use.names = FALSE), + valsPerSpect = vps, + method = bmethod, + step = bstep, + baselevel = bbaselevel, + basespace = bbasespace, + mzrange. = bmzrange., + returnBreaks = breturnBreaks) + ) + }, bmethod = method, bstep = step, bbaselevel = baselevel, + bbasespace = basespace, bmzrange. = mzrange., breturnBreaks = returnBreaks) + return(res) +}) + +##' @rdname adjustRtime-obiwarp +setMethod("adjustRtime", + signature(object = "OnDiskMSnExp", param = "ObiwarpParam"), + function(object, param) { + res <- .obiwarp(object, param = param) + res <- unlist(res, use.names = FALSE) + sNames <- unlist(split(featureNames(object), fromFile(object)), + use.names = FALSE) + names(res) <- sNames + res <- res[featureNames(object)] + return(res) + }) + +#' @rdname extractChromatograms-method +setMethod("extractChromatograms", + signature(object = "OnDiskMSnExp"), + function(object, rt, mz, aggregationFun = "sum", missing = NA_real_) { + if (!missing(rt)) { + if (is.null(ncol(rt))) + rt <- matrix(range(rt), ncol = 2, nrow = 1) } + if (!missing(mz)) { + if (is.null(ncol(mz))) + mz <- matrix(range(mz), ncol = 2, nrow = 1) + } + ## return(.extractChromatogram(x = object, rt = rt, mz = mz, + ## aggregationFun = aggregationFun)) + .extractMultipleChromatograms(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missingValue = missing) + }) + +#' @rdname extractMsData-method +setMethod("extractMsData", signature(object = "OnDiskMSnExp"), + function(object, rt, mz) { + .extractMsData(object, rt = rt, mz = mz) }) diff --git a/R/methods-Params.R b/R/methods-Params.R index 909fc2dde..29823944b 100644 --- a/R/methods-Params.R +++ b/R/methods-Params.R @@ -16,6 +16,28 @@ setMethod("initialize", "Param", function(.Object, ...) { callNextMethod(.Object, ...) }) +############################################################ +## GenericParam +### +setMethod("initialize", "GenericParam", function(.Object, ...) { + classVersion(.Object)["GenericParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) +#' @param object \code{GenericParam} object. +#' +#' @rdname GenericParam +setMethod("show", "GenericParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat(" fun:", object@fun, "\n") + cat(" arguments:\n") + if (length(object@args) > 0) { + for (i in 1:length(object@args)) { + if (!is.null(names(object@args))) + cat(" ", names(object@args)[i], "= ") + cat(object@args[[i]], "\n") + } + } +}) ############################################################ ## CentWaveParam @@ -25,9 +47,7 @@ setMethod("initialize", "CentWaveParam", function(.Object, ...) { callNextMethod(.Object, ...) }) -## ##' @rdname featureDetection-centWave -## setMethod("print", "CentWaveParam", function(x, ...) show(x)) -##' @rdname featureDetection-centWave +#' @rdname findChromPeaks-centWave setMethod("show", "CentWaveParam", function(object) { cat("Object of class: ", class(object), "\n") cat("Parameters:\n") @@ -46,183 +66,222 @@ setMethod("show", "CentWaveParam", function(object) { cat(" roiScales length:", length(roiScales(object)), "\n") }) -##' @aliases ppm -##' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} -##' slot of the object. -##' @rdname featureDetection-centWave +#' @aliases ppm +#' +#' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} +#' slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("ppm", "CentWaveParam", function(object){ return(object@ppm)}) -##' @aliases ppm<- -##' @param value The value for the slot. -##' @rdname featureDetection-centWave +#' @aliases ppm<- +#' +#' @param value The value for the slot. +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("ppm", "CentWaveParam", function(object, value) { object@ppm <- value if (validObject(object)) return(object) }) -##' @aliases peakwidth -##' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the -##' \code{peakwidth} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases peakwidth +#' +#' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the +#' \code{peakwidth} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("peakwidth", "CentWaveParam", function(object) return(object@peakwidth)) -##' @aliases peakwidth<- -##' @rdname featureDetection-centWave +#' @aliases peakwidth<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("peakwidth", "CentWaveParam", function(object, value) { object@peakwidth <- value if (validObject(object)) return(object) }) -##' @aliases snthresh -##' @description \code{snthresh},\code{snthresh<-}: getter and setter for the -##' \code{snthresh} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases snthresh +#' +#' @description \code{snthresh},\code{snthresh<-}: getter and setter for the +#' \code{snthresh} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("snthresh", "CentWaveParam", function(object) return(object@snthresh)) -##' @aliases snthresh<- -##' @rdname featureDetection-centWave +#' @aliases snthresh<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("snthresh", "CentWaveParam", function(object, value) { object@snthresh <- value if (validObject(object)) return(object) }) -##' @aliases prefilter -##' @description \code{prefilter},\code{prefilter<-}: getter and setter for the -##' \code{prefilter} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases prefilter +#' +#' @description \code{prefilter},\code{prefilter<-}: getter and setter for the +#' \code{prefilter} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("prefilter", "CentWaveParam", function(object) return(object@prefilter)) -##' @aliases prefilter<- -##' @rdname featureDetection-centWave +#' @aliases prefilter<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("prefilter", "CentWaveParam", function(object, value) { object@prefilter <- value if (validObject(object)) return(object) }) -##' @aliases mzCenterFun -##' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the -##' \code{mzCenterFun} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases mzCenterFun +#' +#' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the +#' \code{mzCenterFun} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("mzCenterFun", "CentWaveParam", function(object) return(object@mzCenterFun)) -##' @aliases mzCenterFun<- -##' @rdname featureDetection-centWave +#' @aliases mzCenterFun<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("mzCenterFun", "CentWaveParam", function(object, value) { object@mzCenterFun <- value if (validObject(object)) return(object) }) -##' @description \code{integrate},\code{integrate<-}: getter and setter for the -##' \code{integrate} slot of the object. -##' @param f For \code{integrate}: a \code{CentWaveParam} object. -##' -##' @rdname featureDetection-centWave +#' @description \code{integrate},\code{integrate<-}: getter and setter for the +#' \code{integrate} slot of the object. +#' +#' @param f For \code{integrate}: a \code{CentWaveParam} object. +#' +#' @rdname findChromPeaks-centWave setMethod("integrate", signature(f = "CentWaveParam"), function(f) return(f@integrate)) -##' @aliases integrate<- -##' @rdname featureDetection-centWave +#' @aliases integrate<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("integrate", "CentWaveParam", function(object, value) { object@integrate <- as.integer(value) if (validObject(object)) return(object) }) -##' @aliases mzdiff -##' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the -##' \code{mzdiff} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases mzdiff +#' +#' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the +#' \code{mzdiff} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("mzdiff", "CentWaveParam", function(object) return(object@mzdiff)) -##' @aliases mzdiff<- -##' @rdname featureDetection-centWave +#' @aliases mzdiff<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("mzdiff", "CentWaveParam", function(object, value) { object@mzdiff <- value if (validObject(object)) return(object) }) -##' @aliases fitgauss -##' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the -##' \code{fitgauss} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases fitgauss +#' +#' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the +#' \code{fitgauss} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("fitgauss", "CentWaveParam", function(object) return(object@fitgauss)) -##' @aliases fitgauss<- -##' @rdname featureDetection-centWave +#' @aliases fitgauss<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("fitgauss", "CentWaveParam", function(object, value) { object@fitgauss <- value if (validObject(object)) return(object) }) -##' @aliases noise -##' @description \code{noise},\code{noise<-}: getter and setter for the -##' \code{noise} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases noise +#' +#' @description \code{noise},\code{noise<-}: getter and setter for the +#' \code{noise} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("noise", "CentWaveParam", function(object) return(object@noise)) -##' @aliases noise<- -##' @rdname featureDetection-centWave +#' @aliases noise<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("noise", "CentWaveParam", function(object, value) { object@noise <- value if (validObject(object)) return(object) }) -##' @aliases verboseColumns -##' @description \code{verboseColumns},\code{verboseColumns<-}: getter and -##' setter for the \code{verboseColumns} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases verboseColumns +#' +#' @description \code{verboseColumns},\code{verboseColumns<-}: getter and +#' setter for the \code{verboseColumns} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("verboseColumns", "CentWaveParam", function(object) return(object@verboseColumns)) -##' @aliases verboseColumns<- -##' @rdname featureDetection-centWave +#' @aliases verboseColumns<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("verboseColumns", "CentWaveParam", function(object, value) { object@verboseColumns <- value if (validObject(object)) return(object) }) -##' @aliases roiList -##' @description \code{roiList},\code{roiList<-}: getter and setter for the -##' \code{roiList} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases roiList +#' +#' @description \code{roiList},\code{roiList<-}: getter and setter for the +#' \code{roiList} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("roiList", "CentWaveParam", function(object) return(object@roiList)) -##' @aliases roiList<- -##' @rdname featureDetection-centWave +#' @aliases roiList<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("roiList", "CentWaveParam", function(object, value) { object@roiList <- value if (validObject(object)) return(object) }) -##' @aliases firstBaselineCheck -##' @description \code{fistBaselineCheck},\code{firstBaselineCheck<-}: getter -##' and setter for the \code{firstBaselineCheck} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases firstBaselineCheck +#' +#' @description \code{fistBaselineCheck},\code{firstBaselineCheck<-}: getter +#' and setter for the \code{firstBaselineCheck} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("firstBaselineCheck", "CentWaveParam", function(object) return(object@firstBaselineCheck)) -##' @aliases firstBaselineCheck<- -##' @rdname featureDetection-centWave +#' @aliases firstBaselineCheck<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("firstBaselineCheck", "CentWaveParam", function(object, value) { object@firstBaselineCheck <- value if (validObject(object)) return(object) }) -##' @aliases roiScales -##' @description \code{roiScales},\code{roiScales<-}: getter and setter for the -##' \code{roiScales} slot of the object. -##' @rdname featureDetection-centWave +#' @aliases roiScales +#' +#' @description \code{roiScales},\code{roiScales<-}: getter and setter for the +#' \code{roiScales} slot of the object. +#' +#' @rdname findChromPeaks-centWave setMethod("roiScales", "CentWaveParam", function(object) return(object@roiScales)) -##' @aliases roiScales<- -##' @rdname featureDetection-centWave +#' @aliases roiScales<- +#' +#' @rdname findChromPeaks-centWave setReplaceMethod("roiScales", "CentWaveParam", function(object, value) { object@roiScales <- value if (validObject(object)) @@ -236,7 +295,7 @@ setMethod("initialize", "MatchedFilterParam", function(.Object, ...) { classVersion(.Object)["MatchedFilterParam"] <- "0.0.1" callNextMethod(.Object, ...) }) -##' @rdname featureDetection-matchedFilter +#' @rdname findChromPeaks-matchedFilter setMethod("show", "MatchedFilterParam", function(object) { cat("Object of class: ", class(object), "\n") cat("Parameters:\n") @@ -253,150 +312,179 @@ setMethod("show", "MatchedFilterParam", function(object) { cat(" index:", index(object), "\n") }) -##' @aliases binSize -##' @description \code{binSize},\code{binSize<-}: getter and setter for the -##' \code{binSize} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases binSize +#' +#' @description \code{binSize},\code{binSize<-}: getter and setter for the +#' \code{binSize} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("binSize", "MatchedFilterParam", function(object) return(object@binSize)) -##' @aliases binSize<- -##' @param value The value for the slot. -##' @rdname featureDetection-matchedFilter +#' @aliases binSize<- +#' +#' @param value The value for the slot. +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("binSize", "MatchedFilterParam", function(object, value) { object@binSize <- value if (validObject(object)) return(object) }) -##' @description \code{impute},\code{impute<-}: getter and setter for the -##' \code{impute} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @description \code{impute},\code{impute<-}: getter and setter for the +#' \code{impute} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("impute", "MatchedFilterParam", function(object) return(object@impute)) -##' @aliases impute<- -##' @rdname featureDetection-matchedFilter +#' @aliases impute<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("impute", "MatchedFilterParam", function(object, value) { object@impute <- value if (validObject(object)) return(object) }) -##' @aliases baseValue -##' @description \code{baseValue},\code{baseValue<-}: getter and setter for the -##' \code{baseValue} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases baseValue +#' +#' @description \code{baseValue},\code{baseValue<-}: getter and setter for the +#' \code{baseValue} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("baseValue", "MatchedFilterParam", function(object) return(object@baseValue)) -##' @aliases baseValue<- -##' @rdname featureDetection-matchedFilter +#' @aliases baseValue<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("baseValue", "MatchedFilterParam", function(object, value) { object@baseValue <- value if (validObject(object)) return(object) }) -##' @aliases distance -##' @description \code{distance},\code{distance<-}: getter and setter for the -##' \code{distance} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases distance +#' +#' @description \code{distance},\code{distance<-}: getter and setter for the +#' \code{distance} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("distance", "MatchedFilterParam", function(object) return(object@distance)) -##' @aliases distance<- -##' @rdname featureDetection-matchedFilter +#' @aliases distance<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("distance", "MatchedFilterParam", function(object, value) { object@distance <- value if (validObject(object)) return(object) }) -##' @aliases fwhm -##' @description \code{fwhm},\code{fwhm<-}: getter and setter for the -##' \code{fwhm} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases fwhm +#' +#' @description \code{fwhm},\code{fwhm<-}: getter and setter for the +#' \code{fwhm} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("fwhm", "MatchedFilterParam", function(object) return(object@fwhm)) -##' @aliases fwhm<- -##' @rdname featureDetection-matchedFilter +#' @aliases fwhm<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("fwhm", "MatchedFilterParam", function(object, value) { object@fwhm <- value if (validObject(object)) return(object) }) -##' @aliases sigma -##' @description \code{sigma},\code{sigma<-}: getter and setter for the -##' \code{sigma} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases sigma +#' +#' @description \code{sigma},\code{sigma<-}: getter and setter for the +#' \code{sigma} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("sigma", "MatchedFilterParam", function(object) return(object@sigma)) -##' @aliases sigma<- -##' @rdname featureDetection-matchedFilter +#' @aliases sigma<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("sigma", "MatchedFilterParam", function(object, value) { object@sigma <- value if (validObject(object)) return(object) }) -##' @description \code{max},\code{max<-}: getter and setter for the -##' \code{max} slot of the object. -##' @param x For \code{max}: a \code{MatchedFilterParam} object. -##' @rdname featureDetection-matchedFilter +#' @description \code{max},\code{max<-}: getter and setter for the +#' \code{max} slot of the object. +#' +#' @param x For \code{max}: a \code{MatchedFilterParam} object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("max", signature(x="MatchedFilterParam"), function(x) return(x@max)) -##' @aliases max<- -##' @rdname featureDetection-matchedFilter +#' @aliases max<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("max", "MatchedFilterParam", function(object, value) { object@max <- value if (validObject(object)) return(object) }) -##' @description \code{snthresh},\code{snthresh<-}: getter and setter for the -##' \code{snthresh} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @description \code{snthresh},\code{snthresh<-}: getter and setter for the +#' \code{snthresh} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("snthresh", "MatchedFilterParam", function(object) return(object@snthresh)) -##' @rdname featureDetection-matchedFilter +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("snthresh", "MatchedFilterParam", function(object, value) { object@snthresh <- value if (validObject(object)) return(object) }) -##' @aliases steps -##' @description \code{steps},\code{steps<-}: getter and setter for the -##' \code{steps} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases steps +#' +#' @description \code{steps},\code{steps<-}: getter and setter for the +#' \code{steps} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("steps", "MatchedFilterParam", function(object) return(object@steps)) -##' @aliases steps<- -##' @rdname featureDetection-matchedFilter +#' @aliases steps<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("steps", "MatchedFilterParam", function(object, value) { object@steps <- value if (validObject(object)) return(object) }) -##' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the -##' \code{mzdiff} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the +#' \code{mzdiff} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("mzdiff", "MatchedFilterParam", function(object) return(object@mzdiff)) -##' @rdname featureDetection-matchedFilter +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("mzdiff", "MatchedFilterParam", function(object, value) { object@mzdiff <- value if (validObject(object)) return(object) }) -##' @aliases index -##' @description \code{index},\code{index<-}: getter and setter for the -##' \code{index} slot of the object. -##' @rdname featureDetection-matchedFilter +#' @aliases index +#' +#' @description \code{index},\code{index<-}: getter and setter for the +#' \code{index} slot of the object. +#' +#' @rdname findChromPeaks-matchedFilter setMethod("index", "MatchedFilterParam", function(object) return(object@index)) -##' @aliases index<- -##' @rdname featureDetection-matchedFilter +#' @aliases index<- +#' +#' @rdname findChromPeaks-matchedFilter setReplaceMethod("index", "MatchedFilterParam", function(object, value) { object@index <- value if (validObject(object)) @@ -411,7 +499,7 @@ setMethod("initialize", "MassifquantParam", function(.Object, ...) { callNextMethod(.Object, ...) }) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setMethod("show", "MassifquantParam", function(object) { cat("Object of class: ", class(object), "\n") cat("Parameters:\n") @@ -432,150 +520,167 @@ setMethod("show", "MassifquantParam", function(object) { cat(" withWave:", withWave(object), "\n") }) -##' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} -##' slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} +#' slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("ppm", "MassifquantParam", function(object){ return(object@ppm)}) -##' @param value The value for the slot. -##' @rdname featureDetection-massifquant +#' @param value The value for the slot. +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("ppm", "MassifquantParam", function(object, value) { object@ppm <- value if (validObject(object)) return(object) }) -##' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the -##' \code{peakwidth} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{peakwidth},\code{peakwidth<-}: getter and setter for the +#' \code{peakwidth} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("peakwidth", "MassifquantParam", function(object) return(object@peakwidth)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("peakwidth", "MassifquantParam", function(object, value) { object@peakwidth <- value if (validObject(object)) return(object) }) -##' @description \code{snthresh},\code{snthresh<-}: getter and setter for the -##' \code{snthresh} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{snthresh},\code{snthresh<-}: getter and setter for the +#' \code{snthresh} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("snthresh", "MassifquantParam", function(object) return(object@snthresh)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("snthresh", "MassifquantParam", function(object, value) { object@snthresh <- value if (validObject(object)) return(object) }) -##' @description \code{prefilter},\code{prefilter<-}: getter and setter for the -##' \code{prefilter} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{prefilter},\code{prefilter<-}: getter and setter for the +#' \code{prefilter} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("prefilter", "MassifquantParam", function(object) return(object@prefilter)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("prefilter", "MassifquantParam", function(object, value) { object@prefilter <- value if (validObject(object)) return(object) }) -##' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the -##' \code{mzCenterFun} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the +#' \code{mzCenterFun} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("mzCenterFun", "MassifquantParam", function(object) return(object@mzCenterFun)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("mzCenterFun", "MassifquantParam", function(object, value) { object@mzCenterFun <- value if (validObject(object)) return(object) }) -##' @description \code{integrate},\code{integrate<-}: getter and setter for the -##' \code{integrate} slot of the object. -##' @param f For \code{integrate}: a \code{MassifquantParam} object. -##' -##' @rdname featureDetection-massifquant +#' @description \code{integrate},\code{integrate<-}: getter and setter for the +#' \code{integrate} slot of the object. +#' +#' @param f For \code{integrate}: a \code{MassifquantParam} object. +#' +#' @rdname findChromPeaks-massifquant setMethod("integrate", signature(f = "MassifquantParam"), function(f) return(f@integrate)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("integrate", "MassifquantParam", function(object, value) { object@integrate <- as.integer(value) if (validObject(object)) return(object) }) -##' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the -##' \code{mzdiff} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{mzdiff},\code{mzdiff<-}: getter and setter for the +#' \code{mzdiff} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("mzdiff", "MassifquantParam", function(object) return(object@mzdiff)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("mzdiff", "MassifquantParam", function(object, value) { object@mzdiff <- value if (validObject(object)) return(object) }) -##' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the -##' \code{fitgauss} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{fitgauss},\code{fitgauss<-}: getter and setter for the +#' \code{fitgauss} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("fitgauss", "MassifquantParam", function(object) return(object@fitgauss)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("fitgauss", "MassifquantParam", function(object, value) { object@fitgauss <- value if (validObject(object)) return(object) }) -##' @description \code{noise},\code{noise<-}: getter and setter for the -##' \code{noise} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{noise},\code{noise<-}: getter and setter for the +#' \code{noise} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("noise", "MassifquantParam", function(object) return(object@noise)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("noise", "MassifquantParam", function(object, value) { object@noise <- value if (validObject(object)) return(object) }) -##' @description \code{verboseColumns},\code{verboseColumns<-}: getter and -##' setter for the \code{verboseColumns} slot of the object. -##' @rdname featureDetection-massifquant +#' @description \code{verboseColumns},\code{verboseColumns<-}: getter and +#' setter for the \code{verboseColumns} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("verboseColumns", "MassifquantParam", function(object) return(object@verboseColumns)) -##' @rdname featureDetection-massifquant +#' @rdname findChromPeaks-massifquant setReplaceMethod("verboseColumns", "MassifquantParam", function(object, value) { object@verboseColumns <- value if (validObject(object)) return(object) }) -##' @aliases criticalValue -##' @description \code{criticalValue},\code{criticalValue<-}: getter and -##' setter for the \code{criticalValue} slot of the object. -##' @rdname featureDetection-massifquant +#' @aliases criticalValue +#' +#' @description \code{criticalValue},\code{criticalValue<-}: getter and +#' setter for the \code{criticalValue} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("criticalValue", "MassifquantParam", function(object) return(object@criticalValue)) -##' @aliases criticalValue<- -##' @rdname featureDetection-massifquant +#' @aliases criticalValue<- +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("criticalValue", "MassifquantParam", function(object, value) { object@criticalValue <- value if (validObject(object)) return(object) }) -##' @aliases consecMissedLimit -##' @description \code{consecMissedLimit},\code{consecMissedLimit<-}: getter and -##' setter for the \code{consecMissedLimit} slot of the object. -##' @rdname featureDetection-massifquant +#' @aliases consecMissedLimit +#' +#' @description \code{consecMissedLimit},\code{consecMissedLimit<-}: getter and +#' setter for the \code{consecMissedLimit} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("consecMissedLimit", "MassifquantParam", function(object) return(object@consecMissedLimit)) -##' @aliases consecMissedLimit<- -##' @rdname featureDetection-massifquant +#' @aliases consecMissedLimit<- +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("consecMissedLimit", "MassifquantParam", function(object, value) { object@consecMissedLimit <- as.integer(value) @@ -583,42 +688,51 @@ setReplaceMethod("consecMissedLimit", "MassifquantParam", return(object) }) -##' @aliases unions -##' @description \code{unions},\code{unions<-}: getter and -##' setter for the \code{unions} slot of the object. -##' @rdname featureDetection-massifquant +#' @aliases unions +#' +#' @description \code{unions},\code{unions<-}: getter and +#' setter for the \code{unions} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("unions", "MassifquantParam", function(object) return(object@unions)) -##' @aliases unions<- -##' @rdname featureDetection-massifquant +#' @aliases unions<- +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("unions", "MassifquantParam", function(object, value) { object@unions <- as.integer(value) if (validObject(object)) return(object) }) -##' @aliases checkBack -##' @description \code{checkBack},\code{checkBack<-}: getter and -##' setter for the \code{checkBack} slot of the object. -##' @rdname featureDetection-massifquant +#' @aliases checkBack +#' +#' @description \code{checkBack},\code{checkBack<-}: getter and +#' setter for the \code{checkBack} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("checkBack", "MassifquantParam", function(object) return(object@checkBack)) -##' @aliases checkBack<- -##' @rdname featureDetection-massifquant +#' @aliases checkBack<- +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("checkBack", "MassifquantParam", function(object, value) { object@checkBack <- as.integer(value) if (validObject(object)) return(object) }) -##' @aliases withWave -##' @description \code{withWave},\code{withWave<-}: getter and -##' setter for the \code{withWave} slot of the object. -##' @rdname featureDetection-massifquant +#' @aliases withWave +#' +#' @description \code{withWave},\code{withWave<-}: getter and +#' setter for the \code{withWave} slot of the object. +#' +#' @rdname findChromPeaks-massifquant setMethod("withWave", "MassifquantParam", function(object) return(object@withWave)) -##' @aliases withWave<- -##' @rdname featureDetection-massifquant +#' @aliases withWave<- +#' +#' @rdname findChromPeaks-massifquant setReplaceMethod("withWave", "MassifquantParam", function(object, value) { object@withWave <- value if (validObject(object)) @@ -634,7 +748,7 @@ setMethod("initialize", "MSWParam", function(.Object, ...) { callNextMethod(.Object, ...) }) -##' @rdname featureDetection-MSW +#' @rdname findPeaks-MSW setMethod("show", "MSWParam", function(object) { cat("Object of class: ", class(object), "\n") cat("Parameters:\n") @@ -657,149 +771,178 @@ setMethod("show", "MSWParam", function(object) { } }) -##' @description \code{snthresh},\code{snthresh<-}: getter and setter for the -##' \code{snthresh} slot of the object. -##' @rdname featureDetection-MSW +#' @description \code{snthresh},\code{snthresh<-}: getter and setter for the +#' \code{snthresh} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("snthresh", "MSWParam", function(object){ return(object@snthresh)}) -##' @param value The value for the slot. -##' @rdname featureDetection-MSW +#' @param value The value for the slot. +#' +#' @rdname findPeaks-MSW setReplaceMethod("snthresh", "MSWParam", function(object, value) { object@snthresh <- value if (validObject(object)) return(object) }) -##' @description \code{verboseColumns},\code{verboseColumns<-}: getter and setter -##' for the \code{verboseColumns} slot of the object. -##' @rdname featureDetection-MSW +#' @description \code{verboseColumns},\code{verboseColumns<-}: getter and setter +#' for the \code{verboseColumns} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("verboseColumns", "MSWParam", function(object){ return(object@verboseColumns)}) -##' @rdname featureDetection-MSW +#' @rdname findPeaks-MSW setReplaceMethod("verboseColumns", "MSWParam", function(object, value) { object@verboseColumns <- value if (validObject(object)) return(object) }) -##' @aliases scales -##' @description \code{scales},\code{scales<-}: getter and setter for the -##' \code{scales} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases scales +#' +#' @description \code{scales},\code{scales<-}: getter and setter for the +#' \code{scales} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("scales", "MSWParam", function(object){ return(object@scales)}) -##' @aliases scales<- -##' @rdname featureDetection-MSW +#' @aliases scales<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("scales", "MSWParam", function(object, value) { object@scales <- value if (validObject(object)) return(object) }) -##' @aliases nearbyPeak -##' @description \code{nearbyPeak},\code{nearbyPeak<-}: getter and setter for the -##' \code{nearbyPeak} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases nearbyPeak +#' +#' @description \code{nearbyPeak},\code{nearbyPeak<-}: getter and setter for the +#' \code{nearbyPeak} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("nearbyPeak", "MSWParam", function(object){ return(object@nearbyPeak)}) -##' @aliases nearbyPeak<- -##' @rdname featureDetection-MSW +#' @aliases nearbyPeak<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("nearbyPeak", "MSWParam", function(object, value) { object@nearbyPeak <- value if (validObject(object)) return(object) }) -##' @aliases peakScaleRange -##' @description \code{peakScaleRange},\code{peakScaleRange<-}: getter and setter -##' for the \code{peakScaleRange} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases peakScaleRange +#' +#' @description \code{peakScaleRange},\code{peakScaleRange<-}: getter and setter +#' for the \code{peakScaleRange} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("peakScaleRange", "MSWParam", function(object){ return(object@peakScaleRange)}) -##' @aliases peakScaleRange<- -##' @rdname featureDetection-MSW +#' @aliases peakScaleRange<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("peakScaleRange", "MSWParam", function(object, value) { object@peakScaleRange <- value if (validObject(object)) return(object) }) -##' @aliases ampTh -##' @description \code{ampTh},\code{ampTh<-}: getter and setter for the -##' \code{ampTh} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases ampTh +#' +#' @description \code{ampTh},\code{ampTh<-}: getter and setter for the +#' \code{ampTh} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("ampTh", "MSWParam", function(object){ return(object@ampTh)}) -##' @aliases ampTh<- -##' @rdname featureDetection-MSW +#' @aliases ampTh<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("ampTh", "MSWParam", function(object, value) { object@ampTh <- value if (validObject(object)) return(object) }) -##' @aliases minNoiseLevel -##' @description \code{minNoiseLevel},\code{minNoiseLevel<-}: getter and setter -##' for the \code{minNoiseLevel} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases minNoiseLevel +#' +#' @description \code{minNoiseLevel},\code{minNoiseLevel<-}: getter and setter +#' for the \code{minNoiseLevel} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("minNoiseLevel", "MSWParam", function(object){ return(object@minNoiseLevel)}) -##' @aliases minNoiseLevel<- -##' @rdname featureDetection-MSW +#' @aliases minNoiseLevel<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("minNoiseLevel", "MSWParam", function(object, value) { object@minNoiseLevel <- value if (validObject(object)) return(object) }) -##' @aliases ridgeLength -##' @description \code{ridgeLength},\code{ridgeLength<-}: getter and setter for -##' the \code{ridgeLength} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases ridgeLength +#' +#' @description \code{ridgeLength},\code{ridgeLength<-}: getter and setter for +#' the \code{ridgeLength} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("ridgeLength", "MSWParam", function(object){ return(object@ridgeLength)}) -##' @aliases ridgeLength<- -##' @rdname featureDetection-MSW +#' @aliases ridgeLength<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("ridgeLength", "MSWParam", function(object, value) { object@ridgeLength <- value if (validObject(object)) return(object) }) -##' @aliases peakThr -##' @description \code{peakThr},\code{peakThr<-}: getter and setter for the -##' \code{peakThr} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases peakThr +#' +#' @description \code{peakThr},\code{peakThr<-}: getter and setter for the +#' \code{peakThr} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("peakThr", "MSWParam", function(object){ return(object@peakThr)}) -##' @aliases peakThr<- -##' @rdname featureDetection-MSW +#' @aliases peakThr<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("peakThr", "MSWParam", function(object, value) { object@peakThr <- value if (validObject(object)) return(object) }) -##' @aliases tuneIn -##' @description \code{tuneIn},\code{tuneIn<-}: getter and setter for the -##' \code{tuneIn} slot of the object. -##' @rdname featureDetection-MSW +#' @aliases tuneIn +#' +#' @description \code{tuneIn},\code{tuneIn<-}: getter and setter for the +#' \code{tuneIn} slot of the object. +#' +#' @rdname findPeaks-MSW setMethod("tuneIn", "MSWParam", function(object){ return(object@tuneIn)}) -##' @aliases tuneIn<- -##' @rdname featureDetection-MSW +#' @aliases tuneIn<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("tuneIn", "MSWParam", function(object, value) { object@tuneIn <- value if (validObject(object)) return(object) }) -##' @aliases addParams -##' @description \code{addParams},\code{addParams<-}: getter and setter for the -##' \code{addParams} slot of the object. This slot stores optional additional -##' parameters to be passed to the -##' \code{\link[MassSpecWavelet]{identifyMajorPeaks}} and -##' \code{\link[MassSpecWavelet]{sav.gol}} functions from the -##' \code{MassSpecWavelet} package. -##' -##' @rdname featureDetection-MSW +#' @aliases addParams +#' +#' @description \code{addParams},\code{addParams<-}: getter and setter for the +#' \code{addParams} slot of the object. This slot stores optional additional +#' parameters to be passed to the +#' \code{\link[MassSpecWavelet]{identifyMajorPeaks}} and +#' \code{\link[MassSpecWavelet]{sav.gol}} functions from the +#' \code{MassSpecWavelet} package. +#' +#' @rdname findPeaks-MSW setMethod("addParams", "MSWParam", function(object){ return(object@addParams)}) -##' @aliases addParams<- -##' @rdname featureDetection-MSW +#' @aliases addParams<- +#' +#' @rdname findPeaks-MSW setReplaceMethod("addParams", "MSWParam", function(object, value) { object@addParams <- value if (validObject(object)) @@ -829,7 +972,7 @@ setMethod("initialize", "CentWavePredIsoParam", function(.Object, ...) { callNextMethod(.Object, ...) }) -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("show", "CentWavePredIsoParam", function(object) { cat("Object of class: ", class(object), "\n") cat("Parameters:\n") @@ -853,56 +996,68 @@ setMethod("show", "CentWavePredIsoParam", function(object) { cat(" polarity:", polarity(object), "\n") }) -##' @aliases snthreshIsoROIs -##' @description \code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and -##' setter for the \code{snthreshIsoROIs} slot of the object. -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases snthreshIsoROIs +#' +#' @description \code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and +#' setter for the \code{snthreshIsoROIs} slot of the object. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("snthreshIsoROIs", "CentWavePredIsoParam", function(object){ return(object@snthreshIsoROIs)}) -##' @aliases snthreshIsoROIs<- -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases snthreshIsoROIs<- +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("snthreshIsoROIs", "CentWavePredIsoParam", function(object, value) { object@snthreshIsoROIs <- value if (validObject(object)) return(object) }) -##' @aliases maxCharge -##' @description \code{maxCharge},\code{maxCharge<-}: getter and -##' setter for the \code{maxCharge} slot of the object. -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases maxCharge +#' +#' @description \code{maxCharge},\code{maxCharge<-}: getter and +#' setter for the \code{maxCharge} slot of the object. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("maxCharge", "CentWavePredIsoParam", function(object){ return(object@maxCharge)}) -##' @aliases maxCharge<- -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases maxCharge<- +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("maxCharge", "CentWavePredIsoParam", function(object, value) { object@maxCharge <- as.integer(value) if (validObject(object)) return(object) }) -##' @aliases maxIso -##' @description \code{maxIso},\code{maxIso<-}: getter and -##' setter for the \code{maxIso} slot of the object. -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases maxIso +#' +#' @description \code{maxIso},\code{maxIso<-}: getter and +#' setter for the \code{maxIso} slot of the object. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("maxIso", "CentWavePredIsoParam", function(object){ return(object@maxIso)}) -##' @aliases maxIso<- -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases maxIso<- +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("maxIso", "CentWavePredIsoParam", function(object, value) { object@maxIso <- as.integer(value) if (validObject(object)) return(object) }) -##' @aliases mzIntervalExtension -##' @description \code{mzIntervalExtension},\code{mzIntervalExtension<-}: getter -##' and setter for the \code{mzIntervalExtension} slot of the object. -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases mzIntervalExtension +#' +#' @description \code{mzIntervalExtension},\code{mzIntervalExtension<-}: getter +#' and setter for the \code{mzIntervalExtension} slot of the object. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("mzIntervalExtension", "CentWavePredIsoParam", function(object){ return(object@mzIntervalExtension)}) -##' @aliases mzIntervalExtension<- -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases mzIntervalExtension<- +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("mzIntervalExtension", "CentWavePredIsoParam", function(object, value) { object@mzIntervalExtension <- value @@ -910,15 +1065,727 @@ setReplaceMethod("mzIntervalExtension", "CentWavePredIsoParam", return(object) }) -##' @description \code{polarity},\code{polarity<-}: getter and -##' setter for the \code{polarity} slot of the object. -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @description \code{polarity},\code{polarity<-}: getter and +#' setter for the \code{polarity} slot of the object. +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setMethod("polarity", "CentWavePredIsoParam", function(object){ return(object@polarity)}) -##' @aliases polarity<- -##' @rdname featureDetection-centWaveWithPredIsoROIs +#' @aliases polarity<- +#' +#' @rdname findChromPeaks-centWaveWithPredIsoROIs setReplaceMethod("polarity", "CentWavePredIsoParam", function(object, value) { object@polarity <- value if (validObject(object)) return(object) }) + + +############################################################ +## PeakDensityParam +setMethod("initialize", "PeakDensityParam", function(.Object, ...) { + classVersion(.Object)["PeakDensityParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) + +#' @rdname groupChromPeaks-density +setMethod("show", "PeakDensityParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" sampleGroups:", class(object@sampleGroups), "of length", + length(object@sampleGroups), "\n") + cat(" bw:", object@bw, "\n") + cat(" minFraction:", minFraction(object), "\n") + cat(" minSamples:", minSamples(object), "\n") + cat(" binSize:", binSize(object), "\n") + cat(" maxFeatures:", maxFeatures(object), "\n") +}) + +#' @aliases sampleGroups +#' +#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter +#' for the \code{sampleGroups} slot of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("sampleGroups", "PeakDensityParam", function(object){ + return(object@sampleGroups)}) +#' @aliases sampleGroups<- +#' +#' @param value The value for the slot. +#' +#' @rdname groupChromPeaks-density +setReplaceMethod("sampleGroups", "PeakDensityParam", function(object, value) { + object@sampleGroups <- value + if (validObject(object)) + return(object) +}) + +#' @aliases bw +#' +#' @description \code{bw},\code{bw<-}: getter and setter for the \code{bw} slot +#' of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("bw", "PeakDensityParam", function(object){ + return(object@bw)}) +#' @aliases bw<- +#' +#' @rdname groupChromPeaks-density +setReplaceMethod("bw", "PeakDensityParam", function(object, value) { + object@bw <- value + if (validObject(object)) + return(object) +}) + +#' @aliases minFraction +#' +#' @description \code{minFraction},\code{minFraction<-}: getter and setter for +#' the \code{minFraction} slot of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("minFraction", "PeakDensityParam", function(object){ + return(object@minFraction)}) +#' @aliases minFraction<- +#' +#' @rdname groupChromPeaks-density +setReplaceMethod("minFraction", "PeakDensityParam", function(object, value) { + object@minFraction <- value + if (validObject(object)) + return(object) +}) + +#' @aliases minSamples +#' +#' @description \code{minSamples},\code{minSamples<-}: getter and setter for the +#' \code{minSamples} slot of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("minSamples", "PeakDensityParam", function(object){ + return(object@minSamples)}) +#' @aliases minSamples<- +#' +#' @rdname groupChromPeaks-density +setReplaceMethod("minSamples", "PeakDensityParam", function(object, value) { + object@minSamples <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{binSize},\code{binSize<-}: getter and setter for the +#' \code{binSize} slot of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("binSize", "PeakDensityParam", function(object){ + return(object@binSize)}) +#' @rdname groupChromPeaks-density +setReplaceMethod("binSize", "PeakDensityParam", function(object, value) { + object@binSize <- value + if (validObject(object)) + return(object) +}) + +#' @aliases maxFeatures +#' +#' @description \code{maxFeatures},\code{maxFeatures<-}: getter and setter for +#' the \code{maxFeatures} slot of the object. +#' +#' @rdname groupChromPeaks-density +setMethod("maxFeatures", "PeakDensityParam", function(object){ + return(object@maxFeatures)}) +#' @aliases maxFeatures<- +#' +#' @rdname groupChromPeaks-density +setReplaceMethod("maxFeatures", "PeakDensityParam", function(object, value) { + object@maxFeatures <- value + if (validObject(object)) + return(object) +}) + + +############################################################ +## MzClustParam +setMethod("initialize", "MzClustParam", function(.Object, ...) { + classVersion(.Object)["MzClustParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) + +#' @rdname groupChromPeaks-mzClust +setMethod("show", "MzClustParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" sampleGroups:", class(object@sampleGroups), "of length", + length(object@sampleGroups), "\n") + cat(" ppm:", object@ppm, "\n") + cat(" absMz:", object@absMz, "\n") + cat(" minFraction:", minFraction(object), "\n") + cat(" minSamples:", minSamples(object), "\n") +}) + +#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter +#' for the \code{sampleGroups} slot of the object. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("sampleGroups", "MzClustParam", function(object){ + return(object@sampleGroups)}) +#' @param value The value for the slot. +#' +#' @rdname groupChromPeaks-mzClust +setReplaceMethod("sampleGroups", "MzClustParam", function(object, value) { + object@sampleGroups <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} +#' slot of the object. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("ppm", "MzClustParam", function(object){ + return(object@ppm)}) +#' @rdname groupChromPeaks-mzClust +setReplaceMethod("ppm", "MzClustParam", function(object, value) { + object@ppm <- value + if (validObject(object)) + return(object) +}) + +#' @aliases absMz +#' +#' @description \code{absMz},\code{absMz<-}: getter and setter for the +#' \code{absMz} slot of the object. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("absMz", "MzClustParam", function(object){ + return(object@absMz)}) +#' @aliases absMz<- +#' +#' @rdname groupChromPeaks-mzClust +setReplaceMethod("absMz", "MzClustParam", function(object, value) { + object@absMz <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{minFraction},\code{minFraction<-}: getter and setter for +#' the \code{minFraction} slot of the object. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("minFraction", "MzClustParam", function(object){ + return(object@minFraction)}) +#' @rdname groupChromPeaks-mzClust +setReplaceMethod("minFraction", "MzClustParam", function(object, value) { + object@minFraction <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{minSamples},\code{minSamples<-}: getter and setter for the +#' \code{minSamples} slot of the object. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("minSamples", "MzClustParam", function(object){ + return(object@minSamples)}) +#' @rdname groupChromPeaks-mzClust +setReplaceMethod("minSamples", "MzClustParam", function(object, value) { + object@minSamples <- value + if (validObject(object)) + return(object) +}) + + +############################################################ +## NearestPeaksParam +setMethod("initialize", "NearestPeaksParam", function(.Object, ...) { + classVersion(.Object)["NearestPeaksParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) + +#' @rdname groupChromPeaks-nearest +setMethod("show", "NearestPeaksParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" sampleGroups:", class(object@sampleGroups), "of length", + length(object@sampleGroups), "\n") + cat(" mzVsRtBalance:", object@mzVsRtBalance, "\n") + cat(" absMz:", object@absMz, "\n") + cat(" absRt:", object@absRt, "\n") + cat(" kNN:", object@kNN, "\n") +}) + +#' @description \code{sampleGroups},\code{sampleGroups<-}: getter and setter +#' for the \code{sampleGroups} slot of the object. +#' +#' @rdname groupChromPeaks-nearest +setMethod("sampleGroups", "NearestPeaksParam", function(object){ + return(object@sampleGroups)}) +#' @param value The value for the slot. +#' +#' @rdname groupChromPeaks-nearest +setReplaceMethod("sampleGroups", "NearestPeaksParam", function(object, value) { + object@sampleGroups <- value + if (validObject(object)) + return(object) +}) + +#' @aliases mzVsRtBalance +#' +#' @description \code{mzVsRtBalance},\code{mzVsRtBalance<-}: getter and setter +#' for the \code{mzVsRtBalance} slot of the object. +#' +#' @rdname groupChromPeaks-nearest +setMethod("mzVsRtBalance", "NearestPeaksParam", function(object){ + return(object@mzVsRtBalance)}) +#' @aliases mzVsRtBalance<- +#' +#' @rdname groupChromPeaks-nearest +setReplaceMethod("mzVsRtBalance", "NearestPeaksParam", function(object, value) { + object@mzVsRtBalance <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{absMz},\code{absMz<-}: getter and setter for the +#' \code{absMz} slot of the object. +#' +#' @rdname groupChromPeaks-nearest +setMethod("absMz", "NearestPeaksParam", function(object){ + return(object@absMz)}) +#' @rdname groupChromPeaks-nearest +setReplaceMethod("absMz", "NearestPeaksParam", function(object, value) { + object@absMz <- value + if (validObject(object)) + return(object) +}) + +#' @aliases absRt +#' +#' @description \code{absRt},\code{absRt<-}: getter and setter for the +#' \code{absRt} slot of the object. +#' +#' @rdname groupChromPeaks-nearest +setMethod("absRt", "NearestPeaksParam", function(object){ + return(object@absRt)}) +#' @aliases absRt<- +#' +#' @rdname groupChromPeaks-nearest +setReplaceMethod("absRt", "NearestPeaksParam", function(object, value) { + object@absRt <- value + if (validObject(object)) + return(object) +}) + +#' @aliases kNN +#' +#' @description \code{kNN},\code{kNN<-}: getter and setter for the +#' \code{kNN} slot of the object. +#' +#' @rdname groupChromPeaks-nearest +setMethod("kNN", "NearestPeaksParam", function(object){ + return(object@kNN)}) +#' @aliases kNN<- +#' +#' @rdname groupChromPeaks-nearest +setReplaceMethod("kNN", "NearestPeaksParam", function(object, value) { + object@kNN <- value + if (validObject(object)) + return(object) +}) + + +############################################################ +## PeakGroupsParam +setMethod("initialize", "PeakGroupsParam", function(.Object, ...) { + classVersion(.Object)["PeakGroupsParam"] <- "0.0.2" + callNextMethod(.Object, ...) +}) + +#' @rdname adjustRtime-peakGroups +setMethod("show", "PeakGroupsParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" minFraction:", object@minFraction, "\n") + cat(" extraPeaks:", object@extraPeaks, "\n") + cat(" smooth:", object@smooth, "\n") + cat(" span:", object@span, "\n") + cat(" family:", object@family, "\n") + pgm <- peakGroupsMatrix(object) + if (nrow(pgm)) + cat(" number of peak groups:", nrow(pgm), "\n") +}) + +#' @description \code{minFraction},\code{minFraction<-}: getter and setter +#' for the \code{minFraction} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("minFraction", "PeakGroupsParam", function(object){ + return(object@minFraction)}) +#' @param value The value for the slot. +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("minFraction", "PeakGroupsParam", function(object, value) { + object@minFraction <- value + if (validObject(object)) + return(object) +}) + +#' @aliases extraPeaks +#' +#' @description \code{extraPeaks},\code{extraPeaks<-}: getter and setter +#' for the \code{extraPeaks} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("extraPeaks", "PeakGroupsParam", function(object){ + return(object@extraPeaks)}) +#' @aliases extraPeaks<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("extraPeaks", "PeakGroupsParam", function(object, value) { + object@extraPeaks <- value + if (validObject(object)) + return(object) +}) + +#' @aliases smooth +#' +#' @description \code{smooth},\code{smooth<-}: getter and setter +#' for the \code{smooth} slot of the object. +#' +#' @param x a \code{PeakGroupsParam} object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("smooth", "PeakGroupsParam", function(x){ + return(x@smooth)}) +#' @aliases smooth<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("smooth", "PeakGroupsParam", function(object, value) { + object@smooth <- value + if (validObject(object)) + return(object) +}) + +#' @aliases span +#' +#' @description \code{span},\code{span<-}: getter and setter +#' for the \code{span} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("span", "PeakGroupsParam", function(object){ + return(object@span)}) +#' @aliases span<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("span", "PeakGroupsParam", function(object, value) { + object@span <- value + if (validObject(object)) + return(object) +}) + +#' @aliases family +#' +#' @description \code{family},\code{family<-}: getter and setter +#' for the \code{family} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("family", "PeakGroupsParam", function(object){ + return(object@family)}) +#' @aliases family<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("family", "PeakGroupsParam", function(object, value) { + object@family <- value + if (validObject(object)) + return(object) +}) + +#' @aliases peakGroupsMatrix +#' +#' @description \code{peakGroupsMatrix},\code{peakGroupsMatrix<-}: getter and +#' setter for the \code{peakGroupsMatrix} slot of the object. +#' +#' @rdname adjustRtime-peakGroups +setMethod("peakGroupsMatrix", "PeakGroupsParam", function(object){ + return(object@peakGroupsMatrix)}) +#' @aliases peakGroupsMatrix<- +#' +#' @rdname adjustRtime-peakGroups +setReplaceMethod("peakGroupsMatrix", "PeakGroupsParam", function(object, value) { + object@peakGroupsMatrix <- value + if (validObject(object)) + return(object) +}) + + +############################################################ +## ObiwarpParam +setMethod("initialize", "ObiwarpParam", function(.Object, ...) { + classVersion(.Object)["ObiwarpParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) + +#' @rdname adjustRtime-obiwarp +setMethod("show", "ObiwarpParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" binSize:", binSize(object), "\n") + cat(" centerSample:", centerSample(object), "\n") + cat(" response:", response(object), "\n") + cat(" distFun:", distFun(object), "\n") + cat(" gapInit:", gapInit(object), "\n") + cat(" gapExtend:", gapExtend(object), "\n") + cat(" factorDiag:", factorDiag(object), "\n") + cat(" factorGap:", factorGap(object), "\n") + cat(" localAlignment:", localAlignment(object), "\n") + cat(" initPenalty:", initPenalty(object), "\n") +}) + +#' @description \code{binSize},\code{binSize<-}: getter and setter +#' for the \code{binSize} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("binSize", "ObiwarpParam", function(object){ + return(object@binSize)}) +#' @param value The value for the slot. +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("binSize", "ObiwarpParam", function(object, value) { + object@binSize <- value + if (validObject(object)) + return(object) +}) + +#' @aliases centerSample +#' +#' @description \code{centerSample},\code{centerSample<-}: getter and setter +#' for the \code{centerSample} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("centerSample", "ObiwarpParam", function(object){ + return(object@centerSample)}) +#' @aliases centerSample<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("centerSample", "ObiwarpParam", function(object, value) { + object@centerSample <- as.integer(value) + if (validObject(object)) + return(object) +}) + +#' @aliases response +#' +#' @description \code{response},\code{response<-}: getter and setter +#' for the \code{response} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("response", "ObiwarpParam", function(object){ + return(object@response)}) +#' @aliases response<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("response", "ObiwarpParam", function(object, value) { + object@response <- as.integer(value) + if (validObject(object)) + return(object) +}) + +#' @aliases distFun +#' +#' @description \code{distFun},\code{distFun<-}: getter and setter +#' for the \code{distFun} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("distFun", "ObiwarpParam", function(object){ + return(object@distFun)}) +#' @aliases distFun<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("distFun", "ObiwarpParam", function(object, value) { + object@distFun <- value + if (validObject(object)) + return(object) +}) + +#' @aliases gapInit +#' +#' @description \code{gapInit},\code{gapInit<-}: getter and setter +#' for the \code{gapInit} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("gapInit", "ObiwarpParam", function(object){ + if (length(object@gapInit) == 0) { + if (object@distFun == "cor" | object@distFun == "cor_opt") + return(0.3) + if (object@distFun == "cov" | object@distFun == "prd") + return(0) + if (object@distFun == "euc") + return(0.9) + } + return(object@gapInit)}) +#' @aliases gapInit<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("gapInit", "ObiwarpParam", function(object, value) { + object@gapInit <- value + if (validObject(object)) + return(object) +}) + +#' @aliases gapExtend +#' +#' @description \code{gapExtend},\code{gapExtend<-}: getter and setter +#' for the \code{gapExtend} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("gapExtend", "ObiwarpParam", function(object){ + if (length(object@gapExtend) == 0) { + if (object@distFun == "cor" | object@distFun == "cor_opt") + return(2.4) + if (object@distFun == "cov") + return(11.7) + if (object@distFun == "euc") + return(1.8) + if (object@distFun == "prd") + return(7.8) + } + return(object@gapExtend)}) +#' @aliases gapExtend<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("gapExtend", "ObiwarpParam", function(object, value) { + object@gapExtend <- value + if (validObject(object)) + return(object) +}) + +#' @aliases factorDiag +#' +#' @description \code{factorDiag},\code{factorDiag<-}: getter and setter +#' for the \code{factorDiag} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("factorDiag", "ObiwarpParam", function(object){ + return(object@factorDiag)}) +#' @aliases factorDiag<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("factorDiag", "ObiwarpParam", function(object, value) { + object@factorDiag <- value + if (validObject(object)) + return(object) +}) + +#' @aliases factorGap +#' +#' @description \code{factorGap},\code{factorGap<-}: getter and setter +#' for the \code{factorGap} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("factorGap", "ObiwarpParam", function(object){ + return(object@factorGap)}) +#' @aliases factorGap<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("factorGap", "ObiwarpParam", function(object, value) { + object@factorGap <- value + if (validObject(object)) + return(object) +}) + +#' @aliases localAlignment +#' +#' @description \code{localAlignment},\code{localAlignment<-}: getter and setter +#' for the \code{localAlignment} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("localAlignment", "ObiwarpParam", function(object){ + return(object@localAlignment)}) +#' @aliases localAlignment<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("localAlignment", "ObiwarpParam", function(object, value) { + object@localAlignment <- value + if (validObject(object)) + return(object) +}) + +#' @aliases initPenalty +#' +#' @description \code{initPenalty},\code{initPenalty<-}: getter and setter +#' for the \code{initPenalty} slot of the object. +#' +#' @rdname adjustRtime-obiwarp +setMethod("initPenalty", "ObiwarpParam", function(object){ + return(object@initPenalty)}) +#' @aliases initPenalty<- +#' +#' @rdname adjustRtime-obiwarp +setReplaceMethod("initPenalty", "ObiwarpParam", function(object, value) { + object@initPenalty <- value + if (validObject(object)) + return(object) +}) + +############################################################ +## FillChromPeaksParam +### +setMethod("initialize", "FillChromPeaksParam", function(.Object, ...) { + classVersion(.Object)["FillChromPeaksParam"] <- "0.0.1" + callNextMethod(.Object, ...) +}) +#' @rdname fillChromPeaks +setMethod("show", "FillChromPeaksParam", function(object) { + cat("Object of class: ", class(object), "\n") + cat("Parameters:\n") + cat(" expandMz:", object@expandMz, "\n") + cat(" expandRt:", object@expandRt, "\n") + cat(" ppm:", object@ppm, "\n") +}) + +#' @aliases expandMz +#' +#' @description \code{expandMz},\code{expandMz<-}: getter and setter +#' for the \code{expandMz} slot of the object. +#' +#' @param value The value for the slot. +#' +#' @rdname fillChromPeaks +setMethod("expandMz", "FillChromPeaksParam", function(object){ + return(object@expandMz)}) +#' @aliases expandMz<- +#' +#' @rdname fillChromPeaks +setReplaceMethod("expandMz", "FillChromPeaksParam", function(object, value) { + object@expandMz <- value + if (validObject(object)) + return(object) +}) + +#' @aliases expandRt +#' +#' @description \code{expandRt},\code{expandRt<-}: getter and setter +#' for the \code{expandRt} slot of the object. +#' +#' @rdname fillChromPeaks +setMethod("expandRt", "FillChromPeaksParam", function(object){ + return(object@expandRt)}) +#' @aliases expandRt<- +#' +#' @rdname fillChromPeaks +setReplaceMethod("expandRt", "FillChromPeaksParam", function(object, value) { + object@expandRt <- value + if (validObject(object)) + return(object) +}) + +#' @description \code{ppm},\code{ppm<-}: getter and setter +#' for the \code{ppm} slot of the object. +#' +#' @rdname fillChromPeaks +setMethod("ppm", "FillChromPeaksParam", function(object){ + return(object@ppm)}) +#' @rdname fillChromPeaks +setReplaceMethod("ppm", "FillChromPeaksParam", function(object, value) { + object@ppm <- value + if (validObject(object)) + return(object) +}) diff --git a/R/methods-XCMSnExp.R b/R/methods-XCMSnExp.R index 2b0ca14db..16149a3be 100644 --- a/R/methods-XCMSnExp.R +++ b/R/methods-XCMSnExp.R @@ -1,6 +1,7 @@ ## Methods for the XCMSnExp object representing untargeted metabolomics ## results -#' @include functions-XCMSnExp.R +#' @include functions-XCMSnExp.R do_groupChromPeaks-functions.R functions-utils.R +#' do_adjustRtime-functions.R methods-xcmsRaw.R functions-OnDiskMSnExp.R setMethod("initialize", "XCMSnExp", function(.Object, ...) { classVersion(.Object)["XCMSnExp"] <- "0.0.1" @@ -9,72 +10,101 @@ setMethod("initialize", "XCMSnExp", function(.Object, ...) { return(.Object) }) -##' @rdname XCMSnExp-class +#' @rdname XCMSnExp-class setMethod("show", "XCMSnExp", function(object) { callNextMethod() ## And not XCMSnExp related stuff. cat("- - - xcms preprocessing - - -\n") - if (hasDetectedFeatures(object)) { - cat("Feature detection:\n") - cat(" ", nrow(features(object)), " features identified in ", + if (hasChromPeaks(object)) { + cat("Chromatographic peak detection:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + cat(" method:", .param2string(ph[[1]]@param), "\n") + cat(" ", nrow(chromPeaks(object)), " peaks identified in ", length(fileNames(object)), " samples.\n", sep = "") cat(" On average ", - format(mean(table(features(object)[, "sample"])), digits = 3), - " features per sample.\n", sep = "") - } - if (hasAlignedFeatures(object)) { + format(mean(table(chromPeaks(object)[, "sample"])), digits = 3), + " chromatographic peaks per sample.\n", sep = "") } if (hasAdjustedRtime(object)) { + cat("Alignment/retention time adjustment:\n") + ph <- processHistory(object, type = .PROCSTEP.RTIME.CORRECTION) + cat(" method:", .param2string(ph[[1]]@param), "\n") + } + if (hasFeatures(object)) { + cat("Correspondence:\n") + ph <- processHistory(object, type = .PROCSTEP.PEAK.GROUPING) + cat(" method:", .param2string(ph[[1]]@param), "\n") + cat(" ", nrow(featureDefinitions(object)), " features identified.\n", + sep = "") + cat(" Median mz range of features: ", + format(median(featureDefinitions(object)[, "mzmax"] - + featureDefinitions(object)[, "mzmin"]), digits = 5), + "\n", sep = "") + cat(" Median rt range of features: ", + format(median(featureDefinitions(object)[, "rtmax"] - + featureDefinitions(object)[, "rtmin"]), digits = 5), + "\n", sep = "") + if (.hasFilledPeaks(object)) { + totF <- chromPeaks(object)[, "is_filled"] == 1 + fp <- chromPeaks(object)[totF, , drop = FALSE] + cat("", sum(totF), "filled peaks (on average", + mean(table(fp[, "sample"])), "per sample).\n") + } } }) -##' @aliases hasAdjustedRtime -##' -##' @description \code{hasAdjustedRtime}: whether the object provides adjusted -##' retention times. -##' -##' @rdname XCMSnExp-class +#' @aliases hasAdjustedRtime +#' +#' @description \code{hasAdjustedRtime}: whether the object provides adjusted +#' retention times. +#' +#' @rdname XCMSnExp-class setMethod("hasAdjustedRtime", "XCMSnExp", function(object) { return(hasAdjustedRtime(object@msFeatureData)) }) -##' @aliases hasAlignedFeatures -##' -##' @description \code{hasAlignedFeatures}: whether the object contains feature -##' alignment results. -##' -##' @rdname XCMSnExp-class -setMethod("hasAlignedFeatures", "XCMSnExp", function(object) { - return(hasAlignedFeatures(object@msFeatureData)) -}) - -##' @aliases hasDetectedFeatures -##' -##' @description \code{hasDetectedFeatures}: whether the object contains feature -##' detection results. -##' -##' @rdname XCMSnExp-class -setMethod("hasDetectedFeatures", "XCMSnExp", function(object) { - return(hasDetectedFeatures(object@msFeatureData)) -}) - -##' @aliases adjustedRtime -##' -##' @description The \code{adjustedRtime},\code{adjustedRtime<-} method -##' extract/set adjusted retention times. Retention times are adjusted by -##' retention time correction/adjustment methods. The \code{bySample} parameter -##' allows to specify whether the adjusted retention time should be grouped by -##' sample (file). -##' -##' @return For \code{adjustedRtime}: if \code{bySample = FALSE} a \code{numeric} -##' vector with the adjusted retention for each spectrum of all files/samples -##' within the object. If \code{bySample = TRUE } a \code{list} (length equal to -##' the number of samples) with adjusted retention times grouped by sample. -##' Returns \code{NULL} if no adjusted retention times are present. -##' -##' @rdname XCMSnExp-class +#' @aliases hasFeatures +#' +#' @description \code{hasFeatures}: whether the object contains correspondence +#' results (i.e. features). +#' +#' @rdname XCMSnExp-class +setMethod("hasFeatures", "XCMSnExp", function(object) { + return(hasFeatures(object@msFeatureData)) +}) + +#' @aliases hasChromPeaks +#' +#' @description \code{hasChromPeaks}: whether the object contains peak +#' detection results. +#' +#' @rdname XCMSnExp-class +setMethod("hasChromPeaks", "XCMSnExp", function(object) { + return(hasChromPeaks(object@msFeatureData)) +}) + +#' @aliases adjustedRtime +#' +#' @description \code{adjustedRtime},\code{adjustedRtime<-}: +#' extract/set adjusted retention times. \code{adjustedRtime<-} should not +#' be called manually, it is called internally by the +#' \code{\link{adjustRtime}} methods. For \code{XCMSnExp} objects, +#' \code{adjustedRtime<-} does also apply the retention time adjustment to +#' the chromatographic peaks in the object. The \code{bySample} parameter +#' allows to specify whether the adjusted retention time should be grouped +#' by sample (file). +#' +#' @return For \code{adjustedRtime}: if \code{bySample = FALSE} a \code{numeric} +#' vector with the adjusted retention for each spectrum of all files/samples +#' within the object. If \code{bySample = TRUE } a \code{list} (length equal +#' to the number of samples) with adjusted retention times grouped by +#' sample. Returns \code{NULL} if no adjusted retention times are present. +#' +#' @rdname XCMSnExp-class setMethod("adjustedRtime", "XCMSnExp", function(object, bySample = FALSE) { res <- adjustedRtime(object@msFeatureData) + if (length(res) == 0) + return(res) ## Adjusted retention time is a list of retention times. if (!bySample) { ## Have to re-order the adjusted retention times by spectrum name, such @@ -87,49 +117,72 @@ setMethod("adjustedRtime", "XCMSnExp", function(object, bySample = FALSE) { } return(res) }) -##' @aliases adjustedRtime<- -##' -##' @rdname XCMSnExp-class +#' @aliases adjustedRtime<- +#' +#' @rdname XCMSnExp-class setReplaceMethod("adjustedRtime", "XCMSnExp", function(object, value) { + if (!is.list(value)) + stop("'value' is supposed to be a list of retention time values!") + if (hasAdjustedRtime(object)) + object <- dropAdjustedRtime(object) + ## Check if we have some unsorted retention times (issue #146) + unsorted <- unlist(lapply(value, is.unsorted), use.names = FALSE) + if (any(unsorted)) + warning("Adjusted retention times for file(s) ", + paste(basename(fileNames(object)[unsorted]), collapse = ", "), + " not sorted increasingly.") newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) adjustedRtime(newFd) <- value + if (hasChromPeaks(newFd)) { + ## Change also the retention times reported in the peak matrix. + if (length(value) != length(rtime(object, bySample = TRUE))) + stop("The length of 'value' has to match the number of samples!") + message("Applying retention time adjustment to the identified", + " chromatographic peaks ... ", appendLF = FALSE) + fts <- .applyRtAdjToChromPeaks(chromPeaks(newFd), + rtraw = rtime(object, bySample = TRUE), + rtadj = value) + ## Calling this on the MsFeatureData to avoid all results being removed + ## again by the chromPeaks<- method. + chromPeaks(newFd) <- fts + message("OK") + } lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd - if (validObject(object)) { - ## Lock the environment so that only accessor methods can change values. - ## lockEnvironment(newFd, bindings = TRUE) - ## object@msFeatureData <- newFd + if (validObject(object)) return(object) - } }) -##' @aliases featureGroups -##' -##' @description The \code{featureGroups}, \code{featureGroups<-} methods extract -##' or set the feature alignment results. -##' -##' @return For \code{featureGroups}: a \code{DataFrame} with feature alignment -##' information, each row corresponding to one group of aligned features (across -##' samples) and columns \code{"mzmed"} (median mz value), \code{"mzmin"} -##' (minimal mz value), \code{"mzmax"} (maximum mz value), \code{"rtmed"} (median -##' retention time), \code{"rtmin"} (minimal retention time), \code{"rtmax"} -##' (maximal retention time) and \code{"featureidx"}. Column \code{"featureidx"} -##' contains a \code{list} with indices of features (rows) in the matrix returned -##' by the \code{features} method that belong to that feature group. The method -##' returns \code{NULL} if no aligned feature information is present. -##' -##' @rdname XCMSnExp-class -setMethod("featureGroups", "XCMSnExp", function(object) { - return(featureGroups(object@msFeatureData)) -}) -##' @aliases featureGroups<- -##' -##' @rdname XCMSnExp-class -setReplaceMethod("featureGroups", "XCMSnExp", function(object, value) { +#' @aliases featureDefinitions +#' +#' @description \code{featureDefinitions}, \code{featureDefinitions<-}: extract +#' or set the correspondence results, i.e. the mz-rt features (peak groups). +#' +#' @return For \code{featureDefinitions}: a \code{DataFrame} with peak grouping +#' information, each row corresponding to one mz-rt feature (grouped peaks +#' within and across samples) and columns \code{"mzmed"} (median mz value), +#' \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximum mz value), +#' \code{"rtmed"} (median retention time), \code{"rtmin"} (minimal retention +#' time), \code{"rtmax"} (maximal retention time) and \code{"peakidx"}. +#' Column \code{"peakidx"} contains a \code{list} with indices of +#' chromatographic peaks (rows) in the matrix returned by the +#' \code{chromPeaks} method that belong to that feature group. The method +#' returns \code{NULL} if no feature definitions are present. +#' +#' @rdname XCMSnExp-class +setMethod("featureDefinitions", "XCMSnExp", function(object) { + return(featureDefinitions(object@msFeatureData)) +}) +#' @aliases featureDefinitions<- +#' +#' @rdname XCMSnExp-class +setReplaceMethod("featureDefinitions", "XCMSnExp", function(object, value) { + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - featureGroups(newFd) <- value + featureDefinitions(newFd) <- value lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd if (validObject(object)) { @@ -140,80 +193,157 @@ setReplaceMethod("featureGroups", "XCMSnExp", function(object, value) { } }) -##' @aliases features -##' -##' @description The \code{features}, \code{features<-} methods extract or set -##' the matrix containing the information on identified features. Parameter -##' \code{bySample} allows to specify whether features should be returned -##' ungrouped (default \code{bySample = FALSE}) or grouped by sample ( -##' \code{bySample = TRUE}). -##' See description on the return value for details on the matrix columns. Users -##' usually don't have to use the \code{features<-} method directly as detected -##' features are added to the object by the \code{\link{detectFeatures}} method. -##' -##' @return For \code{features}: if \code{bySample = FALSE} a \code{matrix} with -##' at least the following columns: \code{"mz"} (mz value for the largest -##' intensity), \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximal mz -##' value), \code{"rt"} (retention time for the peak apex), \code{"rtmin"} -##' (minimal retention time), \code{"rtmax"} (maximal retention time), -##' \code{"into"} (integrated, original, intensity of the feature) and -##' \code{"sample"} (sample index in which the feature was identified). -##' Depending on the employed feature detection algorithm and the -##' \code{verboseColumns} parameter of it additional columns might be returned. -##' For \code{bySample = TRUE} the features are returned as a \code{list} of -##' matrices, each containing the features of a specific sample. For sample in -##' which no feastures were detected a matrix with 0 rows is returned. -##' -##' @rdname XCMSnExp-class -setMethod("features", "XCMSnExp", function(object, bySample = FALSE) { +#' @aliases chromPeaks +#' +#' @description \code{chromPeaks}, \code{chromPeaks<-}: extract or set +#' the matrix containing the information on identified chromatographic +#' peaks. Parameter \code{bySample} allows to specify whether peaks should +#' be returned ungrouped (default \code{bySample = FALSE}) or grouped by +#' sample (\code{bySample = TRUE}). The \code{chromPeaks<-} method for +#' \code{XCMSnExp} objects removes also all correspondence (peak grouping) +#' and retention time correction (alignment) results. The optional +#' arguments \code{rt}, \code{mz} and \code{ppm} allow to extract only +#' chromatographic peaks overlapping (if \code{type = "any"}) or completely +#' within (if \code{type = "within"}) the defined retention time and mz +#' ranges. +#' See description of the return value for details on the returned matrix. +#' Users usually don't have to use the \code{chromPeaks<-} method directly +#' as detected chromatographic peaks are added to the object by the +#' \code{\link{findChromPeaks}} method. +#' +#' @param rt optional \code{numeric(2)} defining the retention time range for +#' which chromatographic peaks should be returned. +#' +#' @param mz optional \code{numeric(2)} defining the mz range for which +#' chromatographic peaks should be returned. +#' +#' @param ppm optional \code{numeric(1)} specifying the ppm by which the +#' \code{mz} range should be extended. For a value of \code{ppm = 10}, all +#' peaks within \code{mz[1] - ppm / 1e6} and \code{mz[2] + ppm / 1e6} are +#' returned. +#' +#' @return For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} +#' with at least the following columns: +#' \code{"mz"} (intensity-weighted mean of mz values of the peak across +#' scans/retention times), +#' \code{"mzmin"} (minimal mz value), +#' \code{"mzmax"} (maximal mz value), +#' \code{"rt"} (retention time for the peak apex), +#' \code{"rtmin"} (minimal retention time), +#' \code{"rtmax"} (maximal retention time), +#' \code{"into"} (integrated, original, intensity of the peak), +#' \code{"maxo"} (maximum intentity of the peak), +#' \code{"sample"} (sample index in which the peak was identified) and +#' \code{"is_filled"} defining whether the chromatographic peak was +#' identified by the peak picking algorithm (\code{0}) or was added by the +#' \code{fillChromPeaks} method (\code{1}). +#' Depending on the employed peak detection algorithm and the +#' \code{verboseColumns} parameter of it additional columns might be +#' returned. For \code{bySample = TRUE} the chronatographic peaks are +#' returned as a \code{list} of matrices, each containing the +#' chromatographic peaks of a specific sample. For samples in which no +#' peaks were detected a matrix with 0 rows is returned. +#' +#' @rdname XCMSnExp-class +setMethod("chromPeaks", "XCMSnExp", function(object, bySample = FALSE, + rt = numeric(), mz = numeric(), + ppm = 0, type = "any") { + pks <- chromPeaks(object@msFeatureData) + type <- match.arg(type, c("any", "within")) + ## Select peaks within rt range. + if (length(rt)) { + rt <- range(rt) + if (type == "within") + keep <- which(pks[, "rtmin"] >= rt[1] & pks[, "rtmax"] <= rt[2]) + else + keep <- which(pks[, "rtmax"] >= rt[1] & pks[, "rtmin"] <= rt[2]) + pks <- pks[keep, , drop = FALSE] + } + ## Select peaks within mz range, considering also ppm + if (length(mz) && length(pks)) { + mz <- range(mz) + ## Increase mz by ppm. + if (is.finite(mz[1])) + mz[1] <- mz[1] - mz[1] * ppm / 1e6 + if (is.finite(mz[2])) + mz[2] <- mz[2] + mz[2] * ppm / 1e6 + if (type == "within") + keep <- which(pks[, "mzmin"] >= mz[1] & pks[, "mzmax"] <= mz[2]) + else + keep <- which(pks[, "mzmax"] >= mz[1] & pks[, "mzmin"] <= mz[2]) + pks <- pks[keep, , drop = FALSE] + } if (bySample) { - tmp <- split(features(object), f = features(object)[, "sample"]) ## Ensure we return something for each sample in case there is a sample - ## without detected features. + ## without detected peaks. res <- vector("list", length(fileNames(object))) names(res) <- as.character(1:length(res)) - tmp <- split.data.frame(features(object), f = features(object)[, "sample"]) - res[as.numeric(names(tmp))] <- tmp - if (any(lengths(res) == 0)) { - emat <- matrix(nrow = 0, ncol = ncol(tmp[[1]])) - colnamers(emat) <- colnames(tmp[[1]]) - res[lengths(res) == 0] <- emat + if (length(pks)) { + tmp <- split.data.frame(pks, + f = pks[, "sample"]) + res[as.numeric(names(tmp))] <- tmp + if (any(lengths(res) == 0)) { + emat <- matrix(nrow = 0, ncol = ncol(tmp[[1]])) + colnames(emat) <- colnames(tmp[[1]]) + for (i in which(lengths(res) == 0)) + res[[i]] <- emat + } + } else { + for(i in 1:length(res)) + res[[i]] <- pks } - return(res) - } else { - return(features(object@msFeatureData)) - } + res + } else + pks }) -##' @aliases features<- -##' -##' @rdname XCMSnExp-class -setReplaceMethod("features", "XCMSnExp", function(object, value) { +#' @aliases chromPeaks<- +#' +#' @rdname XCMSnExp-class +setReplaceMethod("chromPeaks", "XCMSnExp", function(object, value) { newFd <- new("MsFeatureData") + ## Dropping all alignment results and all retention time corrections. + suppressMessages( + object <- dropChromPeaks(object) + ) + ## Ensure that we remove ALL related process history steps newFd@.xData <- .copy_env(object@msFeatureData) - features(newFd) <- value + chromPeaks(newFd) <- value lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd if (validObject(object)) { - ## ## Lock the environment so that only accessor methods can change values. - ## lockEnvironment(newFd, bindings = TRUE) - ## object@msFeatureData <- newFd return(object) } }) -##' @description The \code{rtime} method extracts the retention time for each -##' scan. The \code{bySample} parameter allows to return the values grouped -##' by sample/file. -##' -##' @param bySample logical(1) specifying whether results should be grouped by -##' sample. -##' -##' @return For \code{rtime}: if \code{bySample = FALSE} a numeric vector with the -##' retention times of each scan, if \code{bySample = TRUE} a \code{list} of -##' numeric vectors with the retention times per sample. -##' -##' @rdname XCMSnExp-class -setMethod("rtime", "XCMSnExp", function(object, bySample = FALSE) { +#' @description \code{rtime}: extracts the retention time for each +#' scan. The \code{bySample} parameter allows to return the values grouped +#' by sample/file and \code{adjusted} whether adjusted or raw retention +#' times should be returned. By default the method returns adjusted +#' retention times, if they are available (i.e. if retention times were +#' adjusted using the \code{\link{adjustRtime}} method). +#' +#' @param bySample logical(1) specifying whether results should be grouped by +#' sample. +#' +#' @param adjusted logical(1) whether adjusted or raw (i.e. the original +#' retention times reported in the files) should be returned. +#' +#' @return For \code{rtime}: if \code{bySample = FALSE} a numeric vector with +#' the retention times of each scan, if \code{bySample = TRUE} a +#' \code{list} of numeric vectors with the retention times per sample. +#' +#' @rdname XCMSnExp-class +setMethod("rtime", "XCMSnExp", function(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object)) { + if (adjusted) { + ## ensure that we DO have adjusted retention times. + if (hasAdjustedRtime(object)) { + return(adjustedRtime(object = object, bySample = bySample)) + } else { + warning("Adjusted retention times requested but none present. ", + "returning raw retention times instead.") + } + } ## Alternative: ## theM <- getMethod("rtime", "OnDiskMSnExp") ## res <- theM(object) @@ -227,20 +357,21 @@ setMethod("rtime", "XCMSnExp", function(object, bySample = FALSE) { return(res) }) -##' @description The \code{mz} method extracts the mz values from each scan of -##' all files within an \code{XCMSnExp} object. These values are extracted from -##' the original data files and eventual processing steps are applied -##' \emph{on the fly}. Using the \code{bySample} parameter it is possible to -##' switch from the default grouping of mz values by spectrum/scan to a grouping -##' by sample/file. -##' -##' @return For \code{mz}: if \code{bySample = FALSE} a \code{list} with the mz -##' values (numeric vectors) of each scan. If \code{bySample = TRUE} a -##' \code{list} with the mz values per sample. -##' -##' @rdname XCMSnExp-class -setMethod("mz", "XCMSnExp", function(object, bySample = FALSE) { - res <- callNextMethod(object = object) +#' @description \code{mz}: extracts the mz values from each scan of +#' all files within an \code{XCMSnExp} object. These values are extracted +#' from the original data files and eventual processing steps are applied +#' \emph{on the fly}. Using the \code{bySample} parameter it is possible to +#' switch from the default grouping of mz values by spectrum/scan to a +#' grouping by sample/file. +#' +#' @return For \code{mz}: if \code{bySample = FALSE} a \code{list} with the mz +#' values (numeric vectors) of each scan. If \code{bySample = TRUE} a +#' \code{list} with the mz values per sample. +#' +#' @rdname XCMSnExp-class +setMethod("mz", "XCMSnExp", function(object, bySample = FALSE, + BPPARAM = bpparam()) { + res <- callNextMethod(object = object, BPPARAM = BPPARAM) if (bySample) { tmp <- lapply(split(res, fromFile(object)), unlist, use.names = FALSE) res <- vector("list", length(fileNames(object))) @@ -250,20 +381,22 @@ setMethod("mz", "XCMSnExp", function(object, bySample = FALSE) { return(res) }) -##' @description The \code{intensity} method extracts the intensity values from -##' each scan of all files within an \code{XCMSnExp} object. These values are -##' extracted from the original data files and eventual processing steps are -##' applied \emph{on the fly}. Using the \code{bySample} parameter it is possible -##' to switch from the default grouping of intensity values by spectrum/scan to -##' a grouping by sample/file. -##' -##' @return For \code{intensity}: if \code{bySample = FALSE} a \code{list} with -##' the intensity values (numeric vectors) of each scan. If -##' \code{bySample = TRUE} a \code{list} with the intensity values per sample. -##' -##' @rdname XCMSnExp-class -setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE) { - res <- callNextMethod(object = object) +#' @description \code{intensity}: extracts the intensity values from +#' each scan of all files within an \code{XCMSnExp} object. These values are +#' extracted from the original data files and eventual processing steps are +#' applied \emph{on the fly}. Using the \code{bySample} parameter it is +#' possible to switch from the default grouping of intensity values by +#' spectrum/scan to a grouping by sample/file. +#' +#' @return For \code{intensity}: if \code{bySample = FALSE} a \code{list} with +#' the intensity values (numeric vectors) of each scan. If +#' \code{bySample = TRUE} a \code{list} with the intensity values per +#' sample. +#' +#' @rdname XCMSnExp-class +setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE, + BPPARAM = bpparam()) { + res <- callNextMethod(object = object, BPPARAM = BPPARAM) if (bySample) { tmp <- lapply(split(res, fromFile(object)), unlist, use.names = FALSE) res <- vector("list", length(fileNames(object))) @@ -273,21 +406,37 @@ setMethod("intensity", "XCMSnExp", function(object, bySample = FALSE) { return(res) }) -##' @description The \code{spectra} method extracts the -##' \code{\link[MSnbase]{Spectrum}} objects containing all data from -##' \code{object}. These values are extracted from the original data files and -##' eventual processing steps are applied \emph{on the fly}. Setting -##' \code{bySample = TRUE} the spectra are returned grouped by sample/file. -##' -##' @return For \code{spectra}: if \code{bySample = FALSE} a \code{list} with -##' \code{\link[MSnbase]{Spectrum}} objects. If \code{bySample = TRUE} the result -##' is grouped by sample, i.e. as a \code{list} of \code{lists}, each element in -##' the \emph{outer} \code{list} being the \code{list} of spectra of the specific -##' file. -##' -##' @rdname XCMSnExp-class -setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE) { - res <- callNextMethod(object = object) +#' @description \code{spectra}: extracts the +#' \code{\link[MSnbase]{Spectrum}} objects containing all data from +#' \code{object}. The values are extracted from the original data files and +#' eventual processing steps are applied \emph{on the fly}. By setting +#' \code{bySample = TRUE}, the spectra are returned grouped by sample/file. +#' If the \code{XCMSnExp} object contains adjusted retention times, these +#' are returned by default in the \code{Spectrum} objects (can be +#' overwritten by setting \code{adjusted = FALSE}). +#' +#' @param BPPARAM Parameter class for parallel processing. See +#' \code{\link[BiocParallel]{bpparam}}. +#' +#' @return For \code{spectra}: if \code{bySample = FALSE} a \code{list} with +#' \code{\link[MSnbase]{Spectrum}} objects. If \code{bySample = TRUE} the +#' result is grouped by sample, i.e. as a \code{list} of \code{lists}, each +#' element in the \emph{outer} \code{list} being the \code{list} of spectra +#' of the specific file. +#' +#' @rdname XCMSnExp-class +setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object), + BPPARAM = bpparam()) { + res <- callNextMethod(object = object, BPPARAM = BPPARAM) + ## replace the rtime of these with the adjusted ones - if present. + if (adjusted & hasAdjustedRtime(object)) { + rts <- adjustedRtime(object) + res <- mapply(FUN = function(a, b) { + a@rt <- b + return(a) + }, a = res, b = rts) + } if (bySample) { tmp <- split(res, fromFile(object)) ## That's to ensure that we're always returning something for all files. @@ -298,29 +447,34 @@ setMethod("spectra", "XCMSnExp", function(object, bySample = FALSE) { return(res) }) -## processHistory -##' @aliases processHistory -##' @description The \code{processHistory} method returns a \code{list} with -##' \code{\link{ProcessHistory}} objects (or objects inheriting from this base -##' class) representing the individual processing steps that have been performed, -##' eventually along with their settings (\code{Param} parameter class). Optional -##' arguments \code{fileIndex} and \code{type} allow to restrict to process steps -##' of a certain type or performed on a certain file. -##' -##' @param fileIndex For \code{processHistory}: optional \code{numeric} -##' specifying the index of the files/samples for which the -##' \code{\link{ProcessHistory}} objects should be retrieved. -##' -##' @param type For \code{processHistory}: restrict returned -##' \code{\link{ProcessHistory}} objects to analysis steps of a certain type. -##' Supported values are \code{"Unknown"}, \code{"Feature detection"}, -##' \code{"Feature alignment"} and \code{"Retention time correction"}. -##' -##' @return For \code{processHistory}: a \code{list} of -##' \code{\link{ProcessHistory}} objects providing the details of the individual -##' data processing steps that have been performed. -##' -##' @rdname XCMSnExp-class +#' @aliases processHistory +#' +#' @description \code{processHistory}: returns a \code{list} with +#' \code{\link{ProcessHistory}} objects (or objects inheriting from this +#' base class) representing the individual processing steps that have been +#' performed, eventually along with their settings (\code{Param} parameter +#' class). Optional arguments \code{fileIndex} and \code{type} allow to +#' restrict to process steps of a certain type or performed on a certain +#' file. +#' +#' @param fileIndex For \code{processHistory}: optional \code{numeric} +#' specifying the index of the files/samples for which the +#' \code{\link{ProcessHistory}} objects should be retrieved. +#' +#' @param type For \code{processHistory}: restrict returned +#' \code{\link{ProcessHistory}} objects to analysis steps of a certain +#' type. Use the \code{processHistoryTypes} to list all supported values. +#' For \code{chromPeaks}: \code{character} specifying which peaks to return +#' if \code{rt} or \code{mz} are defined. For \code{type = "any"} all +#' chromatographic peaks that \emph{overlap} the range defined by the +#' \code{mz} or by the \code{rt}. For \code{type = "within"} only peaks +#' completely within the range(s) are returned. +#' +#' @return For \code{processHistory}: a \code{list} of +#' \code{\link{ProcessHistory}} objects providing the details of the +#' individual data processing steps that have been performed. +#' +#' @rdname XCMSnExp-class setMethod("processHistory", "XCMSnExp", function(object, fileIndex, type) { ph <- object@.processHistory if (length(ph)) { @@ -339,7 +493,7 @@ setMethod("processHistory", "XCMSnExp", function(object, fileIndex, type) { return(any(type == processType(z))) })) if (!any(gotIt)) - return(list) + return(list()) ph <- ph[gotIt] } return(ph) @@ -348,12 +502,13 @@ setMethod("processHistory", "XCMSnExp", function(object, fileIndex, type) { } }) -##' @description The \code{addProcessHistory} method adds (appends) a single -##' \code{\link{ProcessHistory}} object to the \code{.processHistory} slot. -##' -##' @return The \code{addProcessHistory} method returns the input object with the -##' provided \code{\link{ProcessHistory}} appended to the process history. -##' @noRd +#' @description \code{addProcessHistory}: adds (appends) a single +#' \code{\link{ProcessHistory}} object to the \code{.processHistory} slot. +#' +#' @return The \code{addProcessHistory} method returns the input object with the +#' provided \code{\link{ProcessHistory}} appended to the process history. +#' +#' @noRd setMethod("addProcessHistory", "XCMSnExp", function(object, ph) { if (!inherits(ph, "ProcessHistory")) stop("Argument 'ph' has to be of type 'ProcessHistory' or a class ", @@ -363,91 +518,185 @@ setMethod("addProcessHistory", "XCMSnExp", function(object, ph) { return(object) }) -##' @aliases dropFeatures -##' -##' @description The \code{dropFeatures} method drops any identified features -##' and returns the object without that information. Note that for -##' \code{XCMSnExp} objects the method drops all results from a feature alignment -##' or retention time adjustment too. For \code{XCMSnExp} objects the method -##' drops also any related process history steps. -##' -##' @rdname XCMSnExp-class -setMethod("dropFeatures", "XCMSnExp", function(object) { - if (hasDetectedFeatures(object)) { - object <- dropFeatureGroups(object) - object <- dropAdjustedRtime(object) - object <- dropProcessHistories(object, type = .PROCSTEP.FEATURE.DETECTION) +#' @aliases dropChromPeaks +#' +#' @description \code{dropChromPeaks}: drops any identified chromatographic +#' peaks and returns the object without that information. Note that for +#' \code{XCMSnExp} objects the method drops all results from a +#' correspondence (peak grouping) or alignment (retention time adjustment) +#' too. For \code{XCMSnExp} objects the method drops also any related +#' process history steps. +#' +#' @rdname XCMSnExp-class +setMethod("dropChromPeaks", "XCMSnExp", function(object) { + if (hasChromPeaks(object)) { + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.DETECTION) + ## Make sure we delete all related process history steps + object <- dropProcessHistories(object, type = .PROCSTEP.RTIME.CORRECTION) + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.GROUPING) + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) ## idx_fd <- which(unlist(lapply(processHistory(object), processType)) == - ## .PROCSTEP.FEATURE.DETECTION) + ## .PROCSTEP.PEAK.DETECTION) ## if (length(idx_fd) > 0) ## object@.processHistory <- object@.processHistory[-idx_fd] newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - newFd <- dropFeatures(newFd) + newFd <- dropChromPeaks(newFd) + ## Dropping other results from the environment (not the object). + if (hasAdjustedRtime(newFd)) + newFd <- dropAdjustedRtime(newFd) + if (hasFeatures(newFd)) + newFd <- dropFeatureDefinitions(newFd) lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd } if (validObject(object)) return(object) }) -##' @aliases dropFeatureGroups -##' -##' @description The \code{dropFeatureGroups} method drops aligned feature -##' information (i.e. feature groups) and returns the object -##' without that information. Note that for \code{XCMSnExp} objects the method -##' drops also retention time adjustments. -##' For \code{XCMSnExp} objects the method drops also any related process history -##' steps. -##' -##' @rdname XCMSnExp-class -setMethod("dropFeatureGroups", "XCMSnExp", function(object) { - if (hasAlignedFeatures(object)) { - ## phTypes <- unlist(lapply(processHistory(object), processType)) - ## idx_fal <- which(phTypes == .PROCSTEP.FEATURE.ALIGNMENT) - ## idx_art <- which(phTypes == .PROCSTEP.RTIME.CORRECTION) - ## if (length(idx_fal) > 0) - ## object@.processHistory <- object@.processHistory[-idx_fal] - object <- dropProcessHistories(object, type = .PROCSTEP.FEATURE.ALIGNMENT) + +#' @aliases dropFeatureDefinitions +#' +#' @description \code{dropFeatureDefinitions}: drops the results from a +#' correspondence (peak grouping) analysis, i.e. the definition of the mz-rt +#' features and returns the object without that information. Note that for +#' \code{XCMSnExp} objects the method will also drop retention time +#' adjustment results, if these were performed after the last peak grouping +#' (i.e. which base on the results from the peak grouping that are going to +#' be removed). +#' For \code{XCMSnExp} objects also all related process history steps are +#' removed. Also eventually filled in peaks (by \code{\link{fillChromPeaks}} +#' ) will be removed too. +#' +#' @param keepAdjRtime For \code{dropFeatureDefinitions,XCMSnExp}: +#' \code{logical(1)} defining whether eventually present retention time +#' adjustment should not be dropped. By default dropping feature definitions +#' drops retention time adjustment results too. +#' +#' @param dropLastN For \code{dropFeatureDefinitions,XCMSnExp}: +#' \code{numeric(1)} defining the number of peak grouping related process +#' history steps to remove. By default \code{dropLastN = -1}, dropping the +#' chromatographic peaks removes all process history steps related to peak +#' grouping. Setting e.g. \code{dropLastN = 1} will only remove the most +#' recent peak grouping related process history step. +#' +#' @rdname XCMSnExp-class +setMethod("dropFeatureDefinitions", "XCMSnExp", function(object, + keepAdjRtime = FALSE, + dropLastN = -1) { + if (hasFeatures(object)) { + phTypes <- unlist(lapply(processHistory(object), function(z) + processType(z))) + idx_art <- which(phTypes == .PROCSTEP.RTIME.CORRECTION) + idx_fal <- which(phTypes == .PROCSTEP.PEAK.GROUPING) + if (length(idx_art) == 0) + idx_art <- -1L + if (length(idx_fal) == 0) + idx_fal <- -1L + ## 1) drop last related process history step and results + object <- dropProcessHistories(object, + type = .PROCSTEP.PEAK.GROUPING, + num = 1) newFd <- new("MsFeatureData") newFd@.xData <- .copy_env(object@msFeatureData) - newFd <- dropFeatureGroups(newFd) + newFd <- dropFeatureDefinitions(newFd) + if (.hasFilledPeaks(object)) { + ## Remove filled in peaks + chromPeaks(newFd) <- + chromPeaks(newFd)[chromPeaks(newFd)[, "is_filled"] == 0, , + drop = FALSE] + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + } lockEnvironment(newFd, bindings = TRUE) object@msFeatureData <- newFd + ## 2) If retention time correction was performed after the latest peak + ## alignment, drop also the retention time correction and all related + ## process history steps. + ## Otherwise (grouping performed after retention time adjustment) do + ## nothing - this keeps eventual alignment related process history + ## steps performed before retention time correction. if (hasAdjustedRtime(object)) { - ## ALWAYS drop retention time adjustments, since these are performed - ## after alignment. - object <- dropAdjustedRtime(object) + if (max(idx_art) > max(idx_fal)) { + object <- dropProcessHistories(object, + type = .PROCSTEP.PEAK.GROUPING) + ## This will ensure that the retention times of the peaks + ## are restored. + object <- dropAdjustedRtime(object) + warning("Removed also correspondence (peak grouping) results as", + " these based on the retention time correction results", + " that were dropped.") + } } } if (validObject(object)) return(object) }) -##' @aliases dropAdjustedRtime -##' -##' @description The \code{dropAdjustedRtime} method drops any retention time -##' adjustment information and returns the object without adjusted retention -##' time. Note that for \code{XCMSnExp} objects the method drops also all feature -##' alignment results if these were performed after the retention time adjustment. -##' For \code{XCMSnExp} objects the method drops also any related process history -##' steps. -##' -##' @rdname XCMSnExp-class + +#' @aliases dropAdjustedRtime +#' +#' @description \code{dropAdjustedRtime}: drops any retention time +#' adjustment information and returns the object without adjusted retention +#' time. For \code{XCMSnExp} object this also reverts the retention times +#' reported for the chromatographic peaks in the peak matrix to the +#' original, raw, ones (after chromatographic peak detection). Note that +#' for \code{XCMSnExp} objects the method drops also all peak grouping +#' results if these were performed \emph{after} the retention time +#' adjustment. For \code{XCMSnExp} objects the method drops also any +#' related process history steps. +#' +#' @rdname XCMSnExp-class setMethod("dropAdjustedRtime", "XCMSnExp", function(object) { if (hasAdjustedRtime(object)) { + ## Get the process history types to determine the order of the analysis + ## steps. phTypes <- unlist(lapply(processHistory(object), function(z) processType(z))) idx_art <- which(phTypes == .PROCSTEP.RTIME.CORRECTION) - idx_fal <- which(phTypes == .PROCSTEP.FEATURE.ALIGNMENT) - ## Drop retention time - object@.processHistory <- object@.processHistory[-idx_art] + idx_fal <- which(phTypes == .PROCSTEP.PEAK.GROUPING) + if (length(idx_art) == 0) + idx_art <- -1L + if (length(idx_fal) == 0) + idx_fal <- -1L + ## Copy the content of the object newFd <- new("MsFeatureData") - newFd@.xData <- .copy_env(object@msFeatureData) + newFd@.xData <- .copy_env(object@msFeatureData) + ## Revert applied adjustments in peaks: + if (hasChromPeaks(newFd)) { + message("Reverting retention times of identified peaks to ", + "original values ... ", appendLF = FALSE) + fts <- .applyRtAdjToChromPeaks(chromPeaks(newFd), + rtraw = adjustedRtime(object, + bySample = TRUE), + rtadj = rtime(object, + bySample = TRUE, + adjusted = FALSE)) + ## Replacing peaks in MsFeatureData, not in XCMSnExp to avoid + ## all results being removed. + chromPeaks(newFd) <- fts + message("OK") + } + ## 1) Drop the retention time adjustment and (the latest) related process + ## history + object <- dropProcessHistories(object, + type = .PROCSTEP.RTIME.CORRECTION, + num = 1) newFd <- dropAdjustedRtime(newFd) object@msFeatureData <- newFd lockEnvironment(newFd, bindings = TRUE) - if (hasAlignedFeatures(object)) { - if (max(idx_fal) > max(idx_art)) - object <- dropFeatureGroups(object) + ## 2) If grouping has been performed AFTER retention time correction it + ## has to be dropped too, including ALL related process histories. + if (hasFeatures(object)) { + if (max(idx_fal) > max(idx_art)) { + object <- dropFeatureDefinitions(object) + object <- dropProcessHistories(object, + type = .PROCSTEP.PEAK.GROUPING, + num = -1) + } + } else { + ## If there is any peak alignment related process history, but no + ## peak alignment results, drop them. + object <- dropProcessHistories(object, + type = .PROCSTEP.PEAK.GROUPING, + num = -1) } } if (validObject(object)) @@ -455,230 +704,263 @@ setMethod("dropAdjustedRtime", "XCMSnExp", function(object) { }) -############################################################ -## Methods inherited from OnDiskMSnExp. -## For some of these methods (altering the raw data or subsetting) we have to -## remove the results. -## Methods to consider: -## o [ subset spectra, return an OnDiskMSnExp. -## o bin remove the results -## o clean remove the results. -## o featureNames returns the names of the spectra. -## o filterAcquisitionNum remove the results. -## o filterFile remove the results. -## o filterMsLevel remove the results. -## o filterMz -## o filterRt -## o fromFile<- -## o normalize remove the results. -## o pickPeaks remove the results. -## o removePeaks remove the results. -## o smooth remove the results. - -##' @title XCMSnExp data manipulation methods inherited from MSnbase -##' -##' @description The methods listed on this page are \code{\link{XCMSnExp}} -##' methods inherited from its parent, the \code{\link[MSnbase]{OnDiskMSnExp}} -##' class from the \code{MSnbase} package, that alter the raw data or are related -##' to data subsetting. Thus calling any of these methods causes all \code{xcms} -##' pre-processing results to be removed from the \code{\link{XCMSnExp}} object -##' to ensure its data integrity. -##' -##' The \code{[} method allows to subset a \code{\link{XCMSnExp}} object by -##' spectra. For more details and examples see the documentation for -##' \code{\link[MSnbase]{OnDiskMSnExp}}. -##' -##' @param x For \code{[}: an \code{\link{XCMSnExp}} object. -##' -##' @param i For \code{[}: \code{numeric} or \code{logical} vector specifying to -##' which spectra the data set should be reduced. -##' -##' @param j For \code{[}: not supported. -##' -##' @param drop For \code{[}: not supported. -##' -##' @return For all methods: a \code{XCMSnExp} object. -##' -##' @rdname XCMSnExp-inherited-methods -##' -##' @seealso \code{\link{XCMSnExp-filter}} for methods to filter and subset -##' \code{XCMSnExp} objects. -##' @seealso \code{\link{XCMSnExp}} for base class documentation. -##' @seealso \code{\link[MSnbase]{OnDiskMSnExp}} for the documentation of the -##' parent class. -##' -##' @author Johannes Rainer +#' @title XCMSnExp data manipulation methods inherited from MSnbase +#' +#' @description The methods listed on this page are \code{\link{XCMSnExp}} +#' methods inherited from its parent, the +#' \code{\link[MSnbase]{OnDiskMSnExp}} class from the \code{MSnbase} +#' package, that alter the raw data or are related to data subsetting. Thus +#' calling any of these methods causes all \code{xcms} pre-processing +#' results to be removed from the \code{\link{XCMSnExp}} object to ensure +#' its data integrity. +#' +#' The \code{[} method allows to subset a \code{\link{XCMSnExp}} object by +#' spectra. For more details and examples see the documentation for +#' \code{\link[MSnbase]{OnDiskMSnExp}}. +#' +#' @param x For \code{[}: an \code{\link{XCMSnExp}} object. +#' +#' @param i For \code{[}: \code{numeric} or \code{logical} vector specifying to +#' which spectra the data set should be reduced. +#' +#' @param j For \code{[}: not supported. +#' +#' @param drop For \code{[}: not supported. +#' +#' @return For all methods: a \code{XCMSnExp} object. +#' +#' @rdname XCMSnExp-inherited-methods +#' +#' @seealso \code{\link{XCMSnExp-filter}} for methods to filter and subset +#' \code{XCMSnExp} objects. +#' \code{\link{XCMSnExp}} for base class documentation. +#' \code{\link[MSnbase]{OnDiskMSnExp}} for the documentation of the +#' parent class. +#' +#' @author Johannes Rainer setMethod("[", signature(x = "XCMSnExp", i = "logicalOrNumeric", j = "missing", drop = "missing"), function(x, i, j, drop) { - if (hasAdjustedRtime(x) | hasAlignedFeatures(x) | - hasDetectedFeatures(x)) { + ## Want to support subsetting of the peaks! + ## This means that we will also have to adjust the process + ## history accordingly. + if (hasAdjustedRtime(x) | hasFeatures(x) | + hasChromPeaks(x)) { ## x@.processHistory <- list() ## x@msFeatureData <- new("MsFeatureData") - x <- dropAdjustedRtime(x) - x <- dropFeatureGroups(x) - x <- dropFeatures(x) + suppressMessages( + x <- dropAdjustedRtime(x) + ) + suppressMessages( + x <- dropFeatureDefinitions(x) + ) + suppressMessages( + x <- dropChromPeaks(x) + ) warning("Removed preprocessing results") } callNextMethod() }) -##' @description The \code{bin} method allows to \emph{bin} spectra. See -##' \code{\link[MSnbase]{bin}} documentation for more details and examples. -##' -##' @param object An \code{\link{XCMSnExp}} object. -##' -##' @param binSize \code{numeric(1)} defining the size of a bin (in Dalton). -##' -##' @param msLevel. For \code{bin}, \code{clean}, \code{filterMsLevel}, -##' \code{removePeaks}: \code{numeric(1)} defining the MS level(s) -##' to which operations should be applied or to which the object should be -##' subsetted. -##' -##' @rdname XCMSnExp-inherited-methods +## setMethod("splitByFile", c("XCMSnExp", "factor"), function(x, f) { +## if (length(f) != length(fileNames(x))) +## stop("length of 'f' has to match the length of samples/files in 'object'.") +## idxs <- lapply(levels(f), function(z) which(f == z)) +## ## Now I can run a filterFile on these. +## res <- lapply(idxs, function(z) { +## return(filterFile(x, file = z)) +## }) +## names(res) <- levels(f) +## return(res) +## }) + +#' @description \code{bin}: allows to \emph{bin} spectra. See +#' \code{\link[MSnbase]{bin}} documentation for more details and examples. +#' +#' @param object An \code{\link{XCMSnExp}} or \code{OnDiskMSnExp} object. +#' +#' @param binSize \code{numeric(1)} defining the size of a bin (in Dalton). +#' +#' @param msLevel. For \code{bin}, \code{clean}, \code{filterMsLevel}, +#' \code{removePeaks}: \code{numeric(1)} defining the MS level(s) +#' to which operations should be applied or to which the object should be +#' subsetted. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("bin", "XCMSnExp", function(object, binSize = 1L, msLevel.) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' @description The \code{clean} method removes unused \code{0} intensity data -##' points. See \code{\link[MSnbase]{clean}} documentation for details and -##' examples. -##' -##' @param all For \code{clean}: \code{logical(1)}, if \code{TRUE} all zeros are -##' removed. -##' -##' @param verbose \code{logical(1)} whether progress information should be -##' displayed. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description \code{clean}: removes unused \code{0} intensity data +#' points. See \code{\link[MSnbase]{clean}} documentation for details and +#' examples. +#' +#' @param all For \code{clean}: \code{logical(1)}, if \code{TRUE} all zeros are +#' removed. +#' +#' @param verbose \code{logical(1)} whether progress information should be +#' displayed. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("clean", "XCMSnExp", function(object, all = FALSE, verbose = FALSE, msLevel.) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' @description The \code{filterMsLevel} reduces the \code{\link{XCMSnExp}} -##' object to spectra of the specified MS level(s). See -##' \code{\link[MSnbase]{filterMsLevel}} documentation for details and examples. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description \code{filterMsLevel}: reduces the \code{\link{XCMSnExp}} +#' object to spectra of the specified MS level(s). See +#' \code{\link[MSnbase]{filterMsLevel}} documentation for details and +#' examples. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("filterMsLevel", "XCMSnExp", function(object, msLevel.) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' @description The \code{filterAcquisitionNum} method filters the -##' \code{\link{XCMSnExp}} object keeping only spectra with the provided -##' acquisition numbers. See \code{\link[MSnbase]{filterAcquisitionNum}} for -##' details and examples. -##' -##' @param n For \code{filterAcquisitionNum}: \code{integer} defining the -##' acquisition numbers of the spectra to which the data set should be -##' sub-setted. -##' -##' @param file For \code{filterAcquisitionNum}: -##' \code{integer} defining the file index within the object to subset the -##' object by file. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description \code{filterAcquisitionNum}: filters the +#' \code{\link{XCMSnExp}} object keeping only spectra with the provided +#' acquisition numbers. See \code{\link[MSnbase]{filterAcquisitionNum}} for +#' details and examples. +#' +#' @param n For \code{filterAcquisitionNum}: \code{integer} defining the +#' acquisition numbers of the spectra to which the data set should be +#' sub-setted. +#' +#' @param file For \code{filterAcquisitionNum}: +#' \code{integer} defining the file index within the object to subset the +#' object by file. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("filterAcquisitionNum", "XCMSnExp", function(object, n, file) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' @aliases XCMSnExp-filter -##' @title XCMSnExp filtering and subsetting -##' -##' The methods listed on this page allow to filter and subset -##' \code{\link{XCMSnExp}} objects. Most of them are inherited from the -##' \code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for -##' \code{\link{XCMSnExp}} to enable subsetting also on the preprocessing -##' results. -##' -##' @description The \code{filterFile} method allows to reduce the -##' \code{\link{XCMSnExp}} to data from only certain files. Identified features -##' for these files are retained while eventually all present feature -##' alignment/grouping information and adjusted retention times are dropped.. -##' -##' @note The \code{filterFile} method removes also process history steps not -##' related to the files to which the object should be sub-setted and updates -##' the \code{fileIndex} attribute accordingly. Also, the method does not allow -##' arbitrary ordering of the files or re-ordering of the files within the -##' object. -##' -##' @param object A \code{\link{XCMSnExp}} object. -##' -##' @param file For \code{filterFile}: \code{integer} defining the file index -##' within the object to subset the object by file or \code{character} specifying -##' the file names to sub set. The indices are expected to be increasingly -##' ordered, if not they are ordered internally. -##' -##' @return All methods return an \code{\link{XCMSnExp}} object. -##' -##' @author Johannes Rainer -##' -##' @seealso \code{\link{XCMSnExp}} for base class documentation. -##' -##' @rdname XCMSnExp-filter-methods -##' @examples -##' -##' ## Load some of the files from the faahKO package. -##' library(faahKO) -##' fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), -##' system.file('cdf/KO/ko16.CDF', package = "faahKO"), -##' system.file('cdf/KO/ko18.CDF', package = "faahKO")) -##' ## Read the files -##' od <- readMSData2(fs) -##' -##' ## Perform feature detection on them using default matched filter settings. -##' mfp <- MatchedFilterParam() -##' xod <- detectFeatures(od, param = mfp) -##' -##' ## Subset the dataset to the first and third file. -##' xod_sub <- filterFile(xod, file = c(1, 3)) -##' -##' ## The number of features per file for the full object -##' table(features(xod)[, "sample"]) -##' -##' ## The number of features per file for the subset -##' table(features(xod_sub)[, "sample"]) -##' -##' basename(fileNames(xod)) -##' basename(fileNames(xod_sub)) -setMethod("filterFile", "XCMSnExp", function(object, file) { +#' @aliases XCMSnExp-filter +#' +#' @title XCMSnExp filtering and subsetting +#' +#' @description The methods listed on this page allow to filter and subset +#' \code{\link{XCMSnExp}} objects. Most of them are inherited from the +#' \code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for +#' \code{\link{XCMSnExp}} to enable subsetting also on the preprocessing +#' results. +#' +#' \code{filterFile}: allows to reduce the +#' \code{\link{XCMSnExp}} to data from only certain files. Identified +#' chromatographic peaks for these files are retained while all eventually +#' present features (peak grouping information) are dropped. By default also +#' adjusted retention times are removed. This can be overwritten by setting +#' \code{keepAdjustedRtime = TRUE}, but users should use this option with +#' caution. +#' +#' @note The \code{filterFile} method removes also process history steps not +#' related to the files to which the object should be sub-setted and updates +#' the \code{fileIndex} attribute accordingly. Also, the method does not +#' allow arbitrary ordering of the files or re-ordering of the files within +#' the object. +#' +#' @param object A \code{\link{XCMSnExp}} object. +#' +#' @param file For \code{filterFile}: \code{integer} defining the file index +#' within the object to subset the object by file or \code{character} +#' specifying the file names to sub set. The indices are expected to be +#' increasingly ordered, if not they are ordered internally. +#' +#' @param keepAdjustedRtime For \code{filterFile}: \code{logical(1)} defining +#' whether the adjusted retention times should be kept, even if features are +#' being removed (and the retention time correction being potentially +#' performed on these features). +#' +#' @return All methods return an \code{\link{XCMSnExp}} object. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{XCMSnExp}} for base class documentation. +#' +#' @rdname XCMSnExp-filter-methods +#' +#' @examples +#' +#' ## Load some of the files from the faahKO package. +#' library(faahKO) +#' fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' ## Read the files +#' od <- readMSData2(fs) +#' +#' ## Perform peak detection on them using default matched filter settings. +#' mfp <- MatchedFilterParam() +#' xod <- findChromPeaks(od, param = mfp) +#' +#' ## Subset the dataset to the first and third file. +#' xod_sub <- filterFile(xod, file = c(1, 3)) +#' +#' ## The number of chromatographic peaks per file for the full object +#' table(chromPeaks(xod)[, "sample"]) +#' +#' ## The number of chromatographic peaks per file for the subset +#' table(chromPeaks(xod_sub)[, "sample"]) +#' +#' basename(fileNames(xod)) +#' basename(fileNames(xod_sub)) +#' +#' ## Filter on mz values; chromatographic peaks and features within the +#' ## mz range are retained (as well as adjusted retention times). +#' xod_sub <- filterMz(xod, mz = c(300, 400)) +#' head(chromPeaks(xod_sub)) +#' nrow(chromPeaks(xod_sub)) +#' nrow(chromPeaks(xod)) +#' +#' ## Filter on rt values. All chromatographic peaks and features within the +#' ## retention time range are retained. Filtering is performed by default on +#' ## adjusted retention times, if present. +#' xod_sub <- filterRt(xod, rt = c(2700, 2900)) +#' +#' range(rtime(xod_sub)) +#' head(chromPeaks(xod_sub)) +#' range(chromPeaks(xod_sub)[, "rt"]) +#' +#' nrow(chromPeaks(xod)) +#' nrow(chromPeaks(xod_sub)) +setMethod("filterFile", "XCMSnExp", function(object, file, + keepAdjustedRtime = FALSE) { if (missing(file)) return(object) if (is.character(file)) { file <- base::match(file, basename(fileNames(object))) @@ -689,24 +971,33 @@ setMethod("filterFile", "XCMSnExp", function(object, file) { ## Error checking - seems that's not performed downstream. if (!all(file %in% 1:length(fileNames(object)))) stop("'file' has to be within 1 and the number of files in the object!") - ## Get the data we want to keep/subset - fts <- features(object) - if (hasAdjustedRtime(object)) { - warning("Adjusted retention times removed.") + ## Dropping data. + if (hasAdjustedRtime(object) & !keepAdjustedRtime) { + message("Adjusted retention times removed.") object <- dropAdjustedRtime(object) } - if (hasAlignedFeatures(object)) { - warning("Feature alignment information removed.") - object <- dropFeatureGroups(object) - } - ## Process the processing history. - object <- dropProcessHistories(object, type = c(.PROCSTEP.FEATURE.ALIGNMENT, - .PROCSTEP.RTIME.CORRECTION)) + suppressWarnings( + adjRt <- adjustedRtime(object, bySample = TRUE) + ) + ## Get the data we want to keep/subset. + fts <- chromPeaks(object) + ## Keep also the processHistory ph <- processHistory(object) + if (hasFeatures(object)) { + message("Correspondence results (features) removed.") + suppressMessages( + object <- dropFeatureDefinitions(object) + ) + } + ph <- dropProcessHistoriesList(ph, type = .PROCSTEP.PEAK.GROUPING) + ## ## Process the processing history. + ## object <- dropProcessHistories(object, type = c(.PROCSTEP.PEAK.GROUPING, + ## .PROCSTEP.RTIME.CORRECTION)) + ## ph <- processHistory(object) ## The next method will actually clean everything, process history and ## msFeatureData suppressWarnings( - object <- callNextMethod() + object <- callNextMethod(object = object, file = file) ) ## Remove ProcessHistory not related to any of the files. if (length(ph)) { @@ -720,43 +1011,55 @@ setMethod("filterFile", "XCMSnExp", function(object, file) { updateFileIndex(z, old = file, new = 1:length(file)) }) } - ## Process features. + ## Process peaks. fts <- fts[fts[, "sample"] %in% file, , drop = FALSE] fts[, "sample"] <- match(fts[, "sample"], file) - features(object) <- fts + if (length(adjRt) & keepAdjustedRtime) { + ## Put all directly into the msFeatureData environment to avoid an + ## additional correction of the peak retention times by the adjusted rt. + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + chromPeaks(newFd) <- fts + adjustedRtime(newFd) <- adjRt[file] + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + } else { + chromPeaks(object) <- fts + } object@.processHistory <- ph return(object) }) -##' @description The \code{filterMz} method filters the data set based on the -##' provided mz value range. All features and feature groups (aligned features) -##' falling completely within the provided mz value range are retained (if their -##' minimal mz value is \code{>= mz[1]} and the maximal mz value \code{<= mz[2]}. -##' Adjusted retention times, if present, are not altered by the filtering. -##' -##' @param mz For \code{filterMz}: \code{numeric(2)} defining the lower and upper -##' mz value for the filtering. -##' -##' @param msLevel. For \code{filterMz}, \code{filterRt}, \code{numeric(1)} -##' defining the MS level(s) to which operations should be applied or to which -##' the object should be subsetted. -##' -##' @param ... Optional additional arguments. -##' -##' @rdname XCMSnExp-filter-methods +#' @description \code{filterMz}: filters the data set based on the +#' provided mz value range. All chromatographic peaks and features (grouped +#' peaks) falling completely within the provided mz value range are retained +#' (if their minimal mz value is \code{>= mz[1]} and the maximal mz value +#' \code{<= mz[2]}. Adjusted retention times, if present, are not altered by +#' the filtering. +#' +#' @param mz For \code{filterMz}: \code{numeric(2)} defining the lower and upper +#' mz value for the filtering. +#' +#' @param msLevel. For \code{filterMz}, \code{filterRt}, \code{numeric(1)} +#' defining the MS level(s) to which operations should be applied or to +#' which the object should be subsetted. +#' +#' @param ... Optional additional arguments. +#' +#' @rdname XCMSnExp-filter-methods setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { if (missing(mz)) return(object) - if (!is.numeric(mz) | length(mz) != 2) + if (!is.numeric(mz)) stop("'mz' has to be a numeric vector of length(2)!") mz <- range(mz) - ## Subset features if present. + ## Subset peaks if present. object <- callNextMethod() # just adds to processing queue. - if (hasDetectedFeatures(object)) { - fts <- features(object) + if (hasChromPeaks(object)) { + fts <- chromPeaks(object) keepIdx <- which(fts[, "mzmin"] >= mz[1] & fts[, "mzmax"] <= mz[2]) - newE <- .filterFeatures(object@msFeatureData, idx = keepIdx) + newE <- .filterChromPeaks(object@msFeatureData, idx = keepIdx) lockEnvironment(newE, bindings = TRUE) object@msFeatureData <- newE } @@ -764,22 +1067,30 @@ setMethod("filterMz", "XCMSnExp", function(object, mz, msLevel., ...) { return(object) }) -##' @description The \code{filterRt} method filters the data set based on the -##' provided retention time range. All features and feature groups within -##' the specified retention time window are retained. Filtering by retention time -##' does not drop any preprocessing results. The method returns an empty object -##' if no spectrum or feature is within the specified retention time range. -##' -##' @param rt For \code{filterRt}: \code{numeric(2)} defining the retention time -##' window (lower and upper bound) for the filtering. -##' -##' @param adjusted For \code{filterRt}: \code{logical} indicating whether the -##' object should be filtered by original (\code{adjusted = FALSE}) or adjusted -##' retention times (\code{adjusted = TRUE}). -##' -##' @rdname XCMSnExp-filter-methods +#' @description \code{filterRt}: filters the data set based on the +#' provided retention time range. All chromatographic peaks and features +#' (grouped peaks) the specified retention time window are retained (i.e. if +#' the retention time corresponding to the peak's apex is within the +#' specified rt range). If retention time correction has been performed, +#' the method will by default filter the object by adjusted retention times. +#' The argument \code{adjusted} allows to specify manually whether filtering +#' should be performed by raw or adjusted retention times. Filtering by +#' retention time does not drop any preprocessing results. +#' The method returns an empty object if no spectrum or feature is within +#' the specified retention time range. +#' +#' @param rt For \code{filterRt}: \code{numeric(2)} defining the retention time +#' window (lower and upper bound) for the filtering. +#' +#' @param adjusted For \code{filterRt}: \code{logical} indicating whether the +#' object should be filtered by original (\code{adjusted = FALSE}) or +#' adjusted retention times (\code{adjusted = TRUE}). +#' For \code{spectra}: whether the retention times in the individual +#' \code{Spectrum} objects should be the adjusted or raw retention times. +#' +#' @rdname XCMSnExp-filter-methods setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., - adjusted = FALSE) { + adjusted = hasAdjustedRtime(object)) { if (missing(rt)) return(object) if (!missing(msLevel.)) @@ -787,11 +1098,11 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., rt <- range(rt) ## Get index of spectra within the rt window. ## Subset using [ + ## Subset peaks ## Subset features - ## Subset feature groups ## Subset adjusted retention time if (!adjusted) { - have_rt <- rtime(object) + have_rt <- rtime(object, adjusted = FALSE, bySample = FALSE) } else { have_rt <- adjustedRtime(object, bySample = FALSE) if (is.null(have_rt)) @@ -801,7 +1112,7 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., msg <- paste0("Filter: select retention time [", paste0(rt, collapse = "-"), "] and MS level(s), ", - paste(unique(msLevel(object)), + paste(base::unique(msLevel(object)), collapse = " ")) msg <- paste0(msg, " [", date(), "]") if (!any(keep_logical)) { @@ -814,35 +1125,57 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., ## mfd <- as(.copy_env(object@msFeatureData), "MsFeatureData") newMfd <- new("MsFeatureData") ph <- processHistory(object) - ## 1) Subset features within the retention time range and feature groups. + ## 1) Subset peaks within the retention time range and peak groups. keep_fts <- numeric() - if (hasDetectedFeatures(object)) { - keep_fts <- which(features(object)[, "rtmin"] >= rt[1] & - features(object)[, "rtmax"] <= rt[2]) + if (hasChromPeaks(object)) { + ftrt <- chromPeaks(object)[, "rt"] + if (!adjusted & hasAdjustedRtime(object)) { + ## Have to convert the rt before subsetting. + fts <- .applyRtAdjToChromPeaks(chromPeaks(object), + rtraw = rtime(object, bySample = TRUE), + rtadj = rtime(object, bySample = TRUE, + adjusted = FALSE)) + ftrt <- fts[, "rt"] + } + keep_fts <- base::which(ftrt >= rt[1] & ftrt <= rt[2]) if (length(keep_fts)) - newMfd <- .filterFeatures(object, idx = keep_fts) + newMfd <- .filterChromPeaks(object, idx = keep_fts) ## features(newMfd) <- features(object)[keep_fts, , drop = FALSE] else ph <- dropProcessHistoriesList(ph, - type = c(.PROCSTEP.FEATURE.DETECTION, - .PROCSTEP.FEATURE.ALIGNMENT, - .PROCSTEP.RTIME.CORRECTION)) + type = c(.PROCSTEP.PEAK.DETECTION, + .PROCSTEP.PEAK.GROUPING)) } ## 2) Subset adjusted retention time - if (hasAdjustedRtime(object) & length(keep_fts)) { + ## if (hasAdjustedRtime(object) & length(keep_fts)) { + if (hasAdjustedRtime(object)) { ## Subset the adjusted retention times (which are stored as a list of ## rts by file): - keep_by_file <- split(keep_logical, fromFile(object)) - adj_rt <- mapply(FUN = function(y, z) { + keep_by_file <- base::split(keep_logical, fromFile(object)) + adj_rt <- base::mapply(FUN = function(y, z) { return(y[z]) }, y = adjustedRtime(object, bySample = TRUE), z = keep_by_file, SIMPLIFY = FALSE) adjustedRtime(newMfd) <- adj_rt } ## 3) Subset the OnDiskMSnExp part - suppressWarnings( - object <- object[which(keep_logical)] - ) + ## suppressWarnings( + ## Specifically call the [ from the OnDiskMSnExp! + ## Otherwise we unnecessarily have to drop stuff which has a negative + ## impact on performance. + ## theM <- getMethod("[", signature = c(x = "OnDiskMSnExp", + ## i = "logicalOrNumeric", + ## j = "missing", + ## drop = "missing")) + ## object <- theM(x = object, i = base::which(keep_logical)) + ## ) + ## Fix for issue #124 + ## Now, this casting is not ideal - have to find an easier way to call the + ## subset method from OnDiskMSnExp... + ## Note: this is still slightly faster than dropping the msFeatureData and + ## calling it on the XCMSnExp! + tmp <- as(object, "OnDiskMSnExp")[base::which(keep_logical)] + object <- as(tmp, "XCMSnExp") ## Put the stuff back object@processingData@processing <- c(object@processingData@processing, msg) lockEnvironment(newMfd, bindings = TRUE) @@ -852,109 +1185,1228 @@ setMethod("filterRt", "XCMSnExp", function(object, rt, msLevel., return(object) }) -##' The \code{normalize} method performs basic normalization of spectra -##' intensities. See \code{\link[MSnbase]{normalize}} documentation for details -##' and examples. -##' -##' @param method For \code{normalize}: \code{character(1)} specifying the -##' normalization method. See \code{\link[MSnbase]{normalize}} for details. -##' For \code{pickPeaks}: \code{character(1)} defining the method. See -##' \code{\link[MSnbase]{pickPeaks}} for options. For \code{smooth}: -##' \code{character(1)} defining the method. See \code{\link[MSnbase]{smooth}} -##' for options and details. -##' -##' @rdname XCMSnExp-inherited-methods + +#' @description The \code{normalize} method performs basic normalization of +#' spectra intensities. See \code{\link[MSnbase]{normalize}} documentation +#' for details and examples. +#' +#' @param method For \code{normalize}: \code{character(1)} specifying the +#' normalization method. See \code{\link[MSnbase]{normalize}} for details. +#' For \code{pickPeaks}: \code{character(1)} defining the method. See +#' \code{\link[MSnbase]{pickPeaks}} for options. For \code{smooth}: +#' \code{character(1)} defining the method. See +#' \code{\link[MSnbase]{smooth}} for options and details. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("normalize", "XCMSnExp", function(object, method = c("max", "sum"), ...) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' The \code{pickPeaks} method performs peak picking. See -##' \code{\link[MSnbase]{pickPeaks}} documentation for details and examples. -##' -##' @param halfWindowSize For \code{pickPeaks} and \code{smooth}: -##' \code{integer(1)} defining the window size for the peak picking. See -##' \code{\link[MSnbase]{pickPeaks}} and \code{\link[MSnbase]{smooth}} for -##' details and options. -##' -##' @param SNR For \code{pickPeaks}: \code{numeric(1)} defining the signal to -##' noise ratio to be considered. See \code{\link[MSnbase]{pickPeaks}} -##' documentation for details. -##' -##' @param ... Optional additional arguments. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description The \code{pickPeaks} method performs peak picking. See +#' \code{\link[MSnbase]{pickPeaks}} documentation for details and examples. +#' +#' @param halfWindowSize For \code{pickPeaks} and \code{smooth}: +#' \code{integer(1)} defining the window size for the peak picking. See +#' \code{\link[MSnbase]{pickPeaks}} and \code{\link[MSnbase]{smooth}} for +#' details and options. +#' +#' @param SNR For \code{pickPeaks}: \code{numeric(1)} defining the signal to +#' noise ratio to be considered. See \code{\link[MSnbase]{pickPeaks}} +#' documentation for details. +#' +#' @param ... Optional additional arguments. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("pickPeaks", "XCMSnExp", function(object, halfWindowSize = 3L, method = c("MAD", "SuperSmoother"), SNR = 0L, ...) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' The \code{removePeaks} method removes peaks (intensities) lower than a -##' threshold. Note that these peaks are not features! See \code{\link[MSnbase]{removePeaks}} documentation for details and examples. -##' -##' @param t For \code{removePeaks}: either a \code{numeric(1)} or \code{"min"} -##' defining the threshold (method) to be used. See -##' \code{\link[MSnbase]{removePeaks}} for details. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description The \code{removePeaks} method removes mass peaks (intensities) +#' lower than a threshold. Note that these peaks refer to \emph{mass} +#' peaks, which are different to the chromatographic peaks detected and +#' analyzed in a metabolomics experiment! See +#' \code{\link[MSnbase]{removePeaks}} documentation for details and +#' examples. +#' +#' @param t For \code{removePeaks}: either a \code{numeric(1)} or \code{"min"} +#' defining the threshold (method) to be used. See +#' \code{\link[MSnbase]{removePeaks}} for details. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("removePeaks", "XCMSnExp", function(object, t = "min", verbose = FALSE, msLevel.) { - if (hasAdjustedRtime(object) | hasAlignedFeatures(object) | - hasDetectedFeatures(object)) { + if (hasAdjustedRtime(object) | hasFeatures(object) | + hasChromPeaks(object)) { ## object@.processHistory <- list() ## object@msFeatureData <- new("MsFeatureData") object <- dropAdjustedRtime(object) - object <- dropFeatureGroups(object) - object <- dropFeatures(object) + object <- dropFeatureDefinitions(object) + object <- dropChromPeaks(object) warning("Removed preprocessing results") } callNextMethod() }) -##' The \code{smooth} method smooths spectra. See \code{\link[MSnbase]{smooth}} -##' documentation for details and examples. -##' -##' @rdname XCMSnExp-inherited-methods +#' @description The \code{smooth} method smooths spectra. See +#' \code{\link[MSnbase]{smooth}} documentation for details and examples. +#' +#' @rdname XCMSnExp-inherited-methods setMethod("smooth", "XCMSnExp", function(x, method = c("SavitzkyGolay", "MovingAverage"), halfWindowSize = 2L, verbose = FALSE, ...) { - if (hasAdjustedRtime(x) | hasAlignedFeatures(x) | - hasDetectedFeatures(x)) { + if (hasAdjustedRtime(x) | hasFeatures(x) | + hasChromPeaks(x)) { ## x@.processHistory <- list() ## x@msFeatureData <- new("MsFeatureData") x <- dropAdjustedRtime(x) - x <- dropFeatureGroups(x) - x <- dropFeatures(x) + x <- dropFeatureDefinitions(x) + x <- dropChromPeaks(x) warning("Removed preprocessing results") } callNextMethod() }) -## @param from For \code{setAs} and \code{as}: an \code{XCMSnExp} object. -## @param to For \code{setAs} and \code{as}: \code{"xcmsSet"} -##' @title Data container storing xcms preprocessing results -##' -##' @aliases setAs -##' @rdname XCMSnExp-class -##' @name XCMSnExp-class +#' @title Data container storing xcms preprocessing results +#' +#' @aliases setAs +#' +#' @rdname XCMSnExp-class +#' +#' @name XCMSnExp-class setAs(from = "XCMSnExp", to = "xcmsSet", def = .XCMSnExp2xcmsSet) + + +#' @title Peak grouping/correspondence based on time dimension peak densities +#' +#' @description \code{groupChromPeaks,XCMSnExp,PeakDensityParam}: +#' performs correspondence (peak grouping within and across samples) within +#' in mz dimension overlapping slices of MS data based on the density +#' distribution of the identified chromatographic peaks in the slice along +#' the time axis. +#' +#' @note Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause +#' all eventually present previous correspondence results to be dropped. +#' +#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object +#' containing the results from a previous peak detection analysis (see +#' \code{\link{findChromPeaks}}). +#' +#' For all other methods: a \code{PeakDensityParam} object. +#' +#' @param param A \code{PeakDensityParam} object containing all settings for +#' the peak grouping algorithm. +#' +#' @return For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the +#' results of the correspondence analysis. The definition of the resulting +#' mz-rt features can be accessed with the \code{\link{featureDefinitions}} +#' method. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the correspondence. +#' +#' @rdname groupChromPeaks-density +setMethod("groupChromPeaks", + signature(object = "XCMSnExp", param = "PeakDensityParam"), + function(object, param) { + if (!hasChromPeaks(object)) + stop("No chromatographic peak detection results in 'object'! ", + "Please perform first a peak detection using the ", + "'findChromPeaks' method.") + ## Get rid of any previous results. + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + ## Check if we've got any sample groups: + if (length(sampleGroups(param)) == 0) { + sampleGroups(param) <- rep(1, length(fileNames(object))) + message("Empty 'sampleGroups' in 'param', assuming all ", + "samples to be in the same group.") + } else { + ## Check that the sampleGroups are OK + if (length(sampleGroups(param)) != length(fileNames(object))) + stop("The 'sampleGroups' value in the provided 'param' ", + "class does not match the number of available files/", + "samples!") + } + startDate <- date() + res <- do_groupChromPeaks_density(chromPeaks(object), + sampleGroups = sampleGroups(param), + bw = bw(param), + minFraction = minFraction(param), + minSamples = minSamples(param), + binSize = binSize(param), + maxFeatures = maxFeatures(param)) + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.GROUPING, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, xph) + ## Add the results. + df <- DataFrame(res$featureDefinitions) + df$peakidx <- res$peakIndex + if (nrow(df) > 0) + rownames(df) <- .featureIDs(nrow(df)) + featureDefinitions(object) <- df + if (validObject(object)) + return(object) + }) + + +#' @title Single-spectrum non-chromatography MS data peak grouping +#' +#' @description \code{groupChromPeaks,XCMSnExp,MzClustParam}: +#' performs high resolution peak grouping for single spectrum +#' metabolomics data. +#' +#' @note Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause +#' all eventually present previous correspondence results to be dropped. +#' +#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object +#' containing the results from a previous chromatographic peak detection +#' analysis (see \code{\link{findChromPeaks}}). +#' +#' For all other methods: a \code{MzClustParam} object. +#' +#' @param param A \code{MzClustParam} object containing all settings for +#' the peak grouping algorithm. +#' +#' @return For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the +#' results of the peak grouping step (i.e. the features). These can be +#' accessed with the \code{\link{featureDefinitions}} method. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak grouping. +#' +#' @rdname groupChromPeaks-mzClust +setMethod("groupChromPeaks", + signature(object = "XCMSnExp", param = "MzClustParam"), + function(object, param) { + if (!hasChromPeaks(object)) + stop("No chromatographic peak detection results in 'object'! ", + "Please perform first a peak detection using the ", + "'findChromPeak' method.") + ## I'm expecting a single spectrum per file! + rtL <- split(rtime(object), f = fromFile(object)) + if (any(lengths(rtL) > 1)) + stop("'object' contains multiple spectra per sample! This ", + "algorithm does only work for single spectra ", + "files/samples!") + ## Get rid of any previous results. + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + ## Check if we've got any sample groups: + if (length(sampleGroups(param)) == 0) { + sampleGroups(param) <- rep(1, length(fileNames(object))) + message("Empty 'sampleGroups' in 'param', assuming all ", + "samples to be in the same group.") + } else { + ## Check that the sampleGroups are OK + if (length(sampleGroups(param)) != length(fileNames(object))) + stop("The 'sampleGroups' value in the provided 'param' ", + "class does not match the number of available files/", + "samples!") + } + startDate <- date() + res <- do_groupPeaks_mzClust(chromPeaks(object), + sampleGroups = sampleGroups(param), + ppm = ppm(param), + absMz = absMz(param), + minFraction = minFraction(param), + minSamples = minSamples(param)) + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.GROUPING, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, xph) + ## Add the results. + df <- DataFrame(res$featureDefinitions) + df$peakidx <- res$peakIndex + if (nrow(df) > 0) + rownames(df) <- .featureIDs(nrow(df)) + featureDefinitions(object) <- df + if (validObject(object)) + return(object) + }) + + +#' @title Peak grouping/correspondence based on proximity in the mz-rt space +#' +#' @description \code{groupChromPeaks,XCMSnExp,NearestPeaksParam}: +#' performs peak grouping based on the proximity between chromatographic +#' peaks from different samples in the mz-rt range. +#' +#' @note Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause +#' all eventually present previous alignment results to be dropped. +#' +#' @param object For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object +#' containing the results from a previous chromatographic peak detection +#' analysis (see \code{\link{findChromPeaks}}). +#' +#' For all other methods: a \code{NearestPeaksParam} object. +#' +#' @param param A \code{NearestPeaksParam} object containing all settings for +#' the peak grouping algorithm. +#' +#' @return For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the +#' results of the peak grouping/correspondence step (i.e. the mz-rt +#' features). These can be accessed with the +#' \code{\link{featureDefinitions}} method. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the peak grouping. +#' +#' @rdname groupChromPeaks-nearest +setMethod("groupChromPeaks", + signature(object = "XCMSnExp", param = "NearestPeaksParam"), + function(object, param) { + if (!hasChromPeaks(object)) + stop("No chromatographic peak detection results in 'object'! ", + "Please perform first a peak detection using the ", + "'findChromPeaks' method.") + ## Get rid of any previous results. + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + ## Check if we've got any sample groups: + if (length(sampleGroups(param)) == 0) { + sampleGroups(param) <- rep(1, length(fileNames(object))) + message("Empty 'sampleGroups' in 'param', assuming all ", + "samples to be in the same group.") + } else { + ## Check that the sampleGroups are OK + if (length(sampleGroups(param)) != length(fileNames(object))) + stop("The 'sampleGroups' value in the provided 'param' ", + "class does not match the number of available files/", + "samples!") + } + startDate <- date() + res <- do_groupChromPeaks_nearest(chromPeaks(object), + sampleGroups = sampleGroups(param), + mzVsRtBalance = mzVsRtBalance(param), + absMz = absMz(param), + absRt = absRt(param), + kNN = kNN(param)) + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.PEAK.GROUPING, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, xph) + ## Add the results. + df <- DataFrame(res$featureDefinitions) + df$peakidx <- res$peakIndex + if (nrow(df) > 0) + rownames(df) <- .featureIDs(nrow(df)) + featureDefinitions(object) <- df + if (validObject(object)) + return(object) + }) + +#' @title Retention time correction based on alignment of house keeping peak +#' groups +#' +#' @description \code{adjustRtime,XCMSnExp,PeakGroupsParam}: +#' performs retention time correction based on the alignment of peak groups +#' (features) found in all/most samples. +#' +#' @note This method requires that a correspondence has been performed on the +#' data (see \code{\link{groupChromPeaks}}). Calling \code{adjustRtime} on +#' an \code{XCMSnExp} object will cause all peak grouping (correspondence) +#' results and any previous retention time adjustments to be dropped. +#' In some instances, the \code{adjustRtime,XCMSnExp,PeakGroupsParam} +#' re-adjusts adjusted retention times to ensure them being in the same +#' order than the raw (original) retention times. +#' +#' @param object For \code{adjustRtime}: an \code{\link{XCMSnExp}} object +#' containing the results from a previous chromatographic peak detection +#' (see \code{\link{findChromPeaks}}) and alignment analysis (see +#' \code{\link{groupChromPeaks}}). +#' +#' For all other methods: a \code{PeakGroupsParam} object. +#' +#' @param param A \code{PeakGroupsParam} object containing all settings for +#' the retention time correction method.. +#' +#' @return For \code{adjustRtime}: a \code{\link{XCMSnExp}} object with the +#' results of the retention time adjustment step. These can be accessed +#' with the \code{\link{adjustedRtime}} method. Retention time correction +#' does also adjust the retention time of the identified chromatographic +#' peaks (accessed \emph{via} \code{\link{chromPeaks}}. Note that retention +#' time correction drops all previous alignment results from the result +#' object. +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the alignment. +#' +#' @rdname adjustRtime-peakGroups +setMethod("adjustRtime", + signature(object = "XCMSnExp", param = "PeakGroupsParam"), + function(object, param) { + if (hasAdjustedRtime(object)) + object <- dropAdjustedRtime(object) + if (!hasChromPeaks(object)) + stop("No chromatographic peak detection results in 'object'! ", + "Please perform first a peak detection using the ", + "'findChromPeaks' method.") + if (!hasFeatures(object)) + stop("No feature definitions found in 'object'! Please ", + "perform first a peak grouping using the ", + "'groupChromPeak' method.") + startDate <- date() + ## If param does contain a peakGroupsMatrix extract that one, + ## otherwise generate it. + if (nrow(peakGroupsMatrix(param))) + pkGrpMat <- peakGroupsMatrix(param) + else + pkGrpMat <- adjustRtimePeakGroups(object, param = param) + res <- do_adjustRtime_peakGroups( + chromPeaks(object), + peakIndex = featureDefinitions(object)$peakidx, + rtime = rtime(object, bySample = TRUE), + minFraction = minFraction(param), + extraPeaks = extraPeaks(param), + smooth = smooth(param), + span = span(param), + family = family(param), + peakGroupsMatrix = pkGrpMat + ) + ## Add the pkGrpMat that's being used to the param object. + peakGroupsMatrix(param) <- pkGrpMat + ## Dropping the peak groups but don't remove its process history + ## step. + ph <- processHistory(object, type = .PROCSTEP.PEAK.GROUPING) + object <- dropFeatureDefinitions(object) + ## Add the results. adjustedRtime<- should also fix the retention + ## times for the peaks! Want to keep also the latest alignment + ## information + adjustedRtime(object) <- res + if (length(ph)) { + object <- addProcessHistory(object, ph[[length(ph)]]) + } + ## Add the process history step. + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.RTIME.CORRECTION, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, xph) + if (validObject(object)) + object + }) + + +#' @title Align retention times across samples using Obiwarp +#' +#' @description \code{adjustRtime,XCMSnExp,ObiwarpParam}: +#' performs retention time correction/alignment based on the total mz-rt +#' data using the \emph{obiwarp} method. +#' +#' @note Calling \code{adjustRtime} on an \code{XCMSnExp} object will cause +#' all peak grouping (correspondence) results and any previous retention +#' time adjustment results to be dropped. +#' +#' @param object For \code{adjustRtime}: an \code{\link{XCMSnExp}} object. +#' +#' For all other methods: a \code{ObiwarpParam} object. +#' +#' @param param A \code{ObiwarpParam} object containing all settings for +#' the alignment method. +#' +#' @return For \code{adjustRtime,XCMSnExp,ObiwarpParam}: a +#' \code{\link{XCMSnExp}} object with the results of the retention time +#' adjustment step. These can be accessed with the +#' \code{\link{adjustedRtime}} method. Retention time correction does also +#' adjust the retention time of the identified chromatographic peaks +#' (accessed \emph{via} \code{\link{chromPeaks}}. Note that retention time +#' correction drops all previous peak grouping results from the result +#' object. +#' +#' For \code{adjustRtime,OnDiskMSnExp,ObiwarpParam}: a \code{numeric} with +#' the adjusted retention times per spectra (in the same order than +#' \code{rtime}). +#' +#' @seealso \code{\link{XCMSnExp}} for the object containing the results of +#' the alignment. +#' +#' @references +#' John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of +#' ESI-LC-MS Proteomic Data Sets by Ordered Bijective Interpolated Warping" +#' \emph{Anal. Chem.} 2006, 78 (17), 6140-6152. +#' +#' @rdname adjustRtime-obiwarp +setMethod("adjustRtime", + signature(object = "XCMSnExp", param = "ObiwarpParam"), + function(object, param) { + ## Drop adjusted retention times if there are some. + if (hasAdjustedRtime(object)) + object <- dropAdjustedRtime(object) + ## We don't require any detected or aligned peaks. + startDate <- date() + res <- .obiwarp(as(object, "OnDiskMSnExp"), param = param) + ## Dropping the feature groups. + object <- dropFeatureDefinitions(object) + ## Add the results. adjustedRtime<- should also fix the retention + ## times for the peaks! Want to keep also the lates alignment + ## information + adjustedRtime(object) <- res + ## Add the process history step. + xph <- XProcessHistory(param = param, date. = startDate, + type. = .PROCSTEP.RTIME.CORRECTION, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, xph) + if (validObject(object)) + return(object) + }) + +## profMat for XCMSnExp +#' @rdname XCMSnExp-class +setMethod("profMat", signature(object = "XCMSnExp"), function(object, + method = "bin", + step = 0.1, + baselevel = NULL, + basespace = NULL, + mzrange. = NULL, + fileIndex, + ...) { + ## We want to coerce that as OnDiskMSnExp so we don't slow down in the + ## filterFile, that would, if rt adjustments are present, revert the whole + ## thing. + return(profMat(as(object, "OnDiskMSnExp"), method = method, step = step, + baselevel = baselevel, basespace = basespace, + mzrange. = mzrange., fileIndex = fileIndex, ...)) +}) + + +#' @aliases featureValues +#' +#' @title Accessing mz-rt feature data values +#' +#' @description \code{featureValues,XCMSnExp} : extract a \code{matrix} for +#' feature values with rows representing features and columns samples. +#' Parameter \code{value} allows to define which column from the +#' \code{\link{chromPeaks}} matrix should be returned. Multiple +#' chromatographic peaks from the same sample can be assigned to a feature. +#' Parameter \code{method} allows to specify the method to be used in such +#' cases to chose from which of the peaks the value should be returned. +#' +#' @note This method is equivalent to the \code{\link{groupval}} for +#' \code{xcmsSet} objects. +#' +#' @param object A \code{\link{XCMSnExp}} object providing the feature +#' definitions. +#' +#' @param method \code{character} specifying the method to resolve +#' multi-peak mappings within the same sample, i.e. to define the +#' \emph{representative} peak for a feature in samples where more than +#' one peak was assigned to the feature. If \code{"medret"}: select the +#' peak closest to the median retention time of the feature. +#' If \code{"maxint"}: select the peak yielding the largest signal. +#' +#' @param value \code{character} specifying the name of the column in +#' \code{chromPeaks(object)} that should be returned or \code{"index"} (the +#' default) to return the index of the peak in the +#' \code{chromPeaks(object)} matrix corresponding to the +#' \emph{representative} peak for the feature in the respective sample. +#' +#' @param intensity \code{character} specifying the name of the column in the +#' \code{chromPeaks(objects)} matrix containing the intensity value of the +#' peak that should be used for the conflict resolution if +#' \code{method = "maxint"}. +#' +#' @param filled \code{logical(1)} specifying whether values for filled-in +#' peaks should be returned or not. If \code{filled = FALSE}, an \code{NA} +#' is returned in the matrix for the respective peak. See +#' \code{\link{fillChromPeaks}} for details on peak filling. +#' +#' @return For \code{featureValues}: a \code{matrix} with +#' feature values, columns representing samples, rows features. The order +#' of the features matches the order found in the +#' \code{featureDefinitions(object)} \code{DataFrame}. The rownames of the +#' \code{matrix} are the same than those of the \code{featureDefinitions} +#' \code{DataFrame}. \code{NA} is reported for features without +#' corresponding chromatographic peak in the respective sample(s). +#' +#' @author Johannes Rainer +#' +#' @seealso +#' \code{\link{XCMSnExp}} for information on the data object. +#' \code{\link{featureDefinitions}} to extract the \code{DataFrame} with the +#' feature definitions. +#' \code{\link{hasFeatures}} to evaluate whether the +#' \code{\link{XCMSnExp}} provides feature definitions. +#' \code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. +#' +#' @rdname XCMSnExp-peak-grouping-results +setMethod("featureValues", + signature(object = "XCMSnExp"), + function(object, method = c("medret", "maxint"), value = "index", + intensity = "into", filled = TRUE) { + ## Input argument checkings + if (!hasFeatures(object)) + stop("No peak groups present! Use 'groupChromPeaks' first.") + if (!hasChromPeaks(object)) + stop("No detected chromatographic peaks present! Use ", + "'findChromPeaks' first.") + method <- match.arg(method) + fNames <- basename(fileNames(object)) + nSamples <- seq_along(fNames) + ## Copy all of the objects to avoid costly S4 method calls - + ## improves speed at the cost of higher memory demand. + fts <- chromPeaks(object) + + ## issue #157: replace all values for filled-in peaks with NA + if (!filled) + fts[fts[, "is_filled"] == 1, ] <- NA + grps <- featureDefinitions(object) + ftIdx <- grps$peakidx + ## Match columns + idx_rt <- match("rt", colnames(fts)) + idx_int <- match(intensity, colnames(fts)) + idx_samp <- match("sample", colnames(fts)) + + vals <- matrix(nrow = length(ftIdx), ncol = length(nSamples)) + + ## Get the indices for the elements. + if (method == "medret") { + medret <- grps$rtmed + for (i in seq_along(ftIdx)) { + gidx <- ftIdx[[i]][base::order(base::abs(fts[ftIdx[[i]], + idx_rt] - + medret[i]))] + vals[i, ] <- gidx[base::match(nSamples, fts[gidx, + idx_samp])] + } + } else { + for (i in seq_along(ftIdx)) { + gidx <- ftIdx[[i]][base::order(fts[ftIdx[[i]], idx_int], + decreasing = TRUE)] + vals[i, ] <- gidx[base::match(nSamples, fts[gidx, idx_samp])] + } + } + + if (value != "index") { + if (!any(colnames(fts) == value)) + stop("Column '", value, + "' not present in the chromatographic peaks matrix!") + vals <- fts[vals, value] + dim(vals) <- c(length(ftIdx), length(nSamples)) + } + colnames(vals) <- fNames + rownames(vals) <- rownames(grps) + vals +}) +## #' @rdname XCMSnExp-peak-grouping-results +## setMethod("groupval", +## signature(object = "XCMSnExp"), +## function(object, method = c("medret", "maxint"), value = "index", +## intensity = "into") { +## featureValues(object = object, method = method, value = value, +## intensity = intensity) +## }) + + +#' @aliases extractChromatograms +#' +#' @title Extracting chromatograms +#' +#' @description \code{extractChromatograms}: the method allows to extract +#' chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and +#' \code{\link{XCMSnExp}} objects. +#' +#' @details Arguments \code{rt} and \code{mz} allow to specify the MS +#' data slice from which the chromatogram should be extracted. +#' The parameter \code{aggregationSum} allows to specify the function to be +#' used to aggregate the intensities across the mz range for the same +#' retention time. Setting \code{aggregationFun = "sum"} would e.g. allow +#' to calculate the \emph{total ion chromatogram} (TIC), +#' \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). +#' The length of the extracted \code{Chromatogram} object, i.e. the number +#' of available data points, corresponds to the number of scans/spectra +#' measured in the specified retention time range. If in a specific scan +#' (for a give retention time) no signal was measured in the specified mz +#' range, a \code{NA_real_} is reported as intensity for the retention time +#' (see Notes for more information). This can be changed using the +#' \code{missing} parameter. +#' +#' @note \code{Chromatogram} objects extracted with \code{extractChromatogram} +#' contain \code{NA_real_} values if, for a given retention time, no +#' signal was measured in the specified mz range. If no spectrum/scan is +#' present in the defined retention time window a \code{Chromatogram} object +#' of length 0 is returned. +#' +#' For \code{\link{XCMSnExp}} objects, if adjusted retention times are +#' available, the \code{extractChromatograms} method will by default report +#' and use these (for the subsetting based on the provided parameter +#' \code{rt}). This can be overwritten with the parameter +#' \code{adjustedRtime}. +#' +#' @param object Either a \code{\link[MSnbase]{OnDiskMSnExp}} or +#' \code{\link{XCMSnExp}} object from which the chromatograms should be +#' extracted. +#' +#' @param rt \code{numeric(2)} or two-column \code{matrix} defining the lower +#' and upper boundary for the retention time range(s). If not specified, +#' the full retention time range of the original data will be used. +#' It is also possible to submit a \code{numeric(1)} in which case +#' \code{range} is called on it to transform it to a \code{numeric(2)}. +#' +#' @param mz \code{numeric(2)} or two-column \code{matrix} defining the lower +#' and upper mz value for the MS data slice(s). If not specified, the +#' chromatograms will be calculated on the full mz range. +#' It is also possible to submit a \code{numeric(1)} in which case +#' \code{range} is called on it to transform it to a \code{numeric(2)}. +#' +#' @param adjustedRtime For \code{extractChromatograms,XCMSnExp}: whether the +#' adjusted (\code{adjustedRtime = TRUE}) or raw retention times +#' (\code{adjustedRtime = FALSE}) should be used for filtering and returned +#' in the resulting \code{\link{Chromatogram}} object. Adjusted retention +#' times are used by default if available. +#' +#' @param aggregationFun \code{character} specifying the function to be used to +#' aggregate intensity values across the mz value range for the same +#' retention time. Allowed values are \code{"sum"}, \code{"max"}, +#' \code{"mean"} and \code{"min"}. +#' +#' @param missing \code{numeric(1)} allowing to specify the intensity value to +#' be used if for a given retention time no signal was measured within the +#' mz range of the corresponding scan. Defaults to \code{NA_real_} (see also +#' Details and Notes sections below). Use \code{missing = 0} to resemble the +#' behaviour of the \code{getEIC} from the \code{old} user interface. +#' +#' @return If a single \code{rt} and \code{mz} range was specified, +#' \code{extractChromatograms} returns a \code{list} of +#' \code{\link{Chromatogram}} classes each element being the chromatogram +#' for one of the samples for the specified range. +#' If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing +#' a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the +#' function returns a \code{list} of \code{list}s. The outer list +#' representing results for the various ranges, the inner the result across +#' files. In other words, \code{result[[1]]} returns a \code{list} with +#' \code{Chromatogram} classes length equal to the number of files, each +#' element representing the \code{Chromatogram} for the first rt/mz range +#' for one file. +#' An empty \code{list} is returned if no MS1 data is present in +#' \code{object} or if not a single spectrum is available for any of the +#' provided retention time ranges in \code{rt}. An empty \code{Chromatogram} +#' object is returned at the correponding position in the result \code{list} +#' if for the specific file no scan/spectrum was measured in the provided +#' rt window. In all other cases, a \code{Chromatogram} with length equal +#' to the number of scans/spectra in the provided rt range is returned. +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{XCMSnExp}} for the data object. +#' \code{\link{Chromatogram}} for the object representing chromatographic +#' data. +#' +#' \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or +#' \code{list} of such objects. +#' +#' \code{\link{extractMsData}} for a method to extract the MS data as +#' \code{data.frame}. +#' +#' @export +#' +#' @rdname extractChromatograms-method +#' +#' @examples +#' ## Read some files from the faahKO package. +#' library(xcms) +#' library(faahKO) +#' faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko16.CDF', package = "faahKO"), +#' system.file('cdf/KO/ko18.CDF', package = "faahKO")) +#' +#' od <- readMSData2(faahko_3_files) +#' +#' ## Extract the ion chromatogram for one chromatographic peak in the data. +#' chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) +#' +#' ## plot the data +#' plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", +#' ylab = "intensity", col = "000080") +#' for(i in c(1, 3)) { +#' points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") +#' } +#' +#' ## Plot the chromatogram using plotChromatogram +#' plotChromatogram(chrs) +#' +#' ## Extract chromatograms for multiple ranges. +#' mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +#' rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) +#' chrs <- extractChromatograms(od, mz = mzr, rt = rtr) +#' +#' ## Plot the extracted chromatograms +#' par(mfrow = c(1, 2)) +#' plotChromatogram(chrs[[1]]) +#' plotChromatogram(chrs[[2]]) +setMethod("extractChromatograms", + signature(object = "XCMSnExp"), + function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object), + aggregationFun = "sum", missing = NA_real_) { + ## Coerce to OnDiskMSnExp. + if (adjustedRtime) + adj_rt <- rtime(object, adjusted = TRUE) + object <- as(object, "OnDiskMSnExp") + if (adjustedRtime) { + ## Replace the original rtime with adjusted ones... + object@featureData$retentionTime <- adj_rt + } + extractChromatograms(object, rt = rt, mz = mz, + aggregationFun = aggregationFun, + missing = missing) + ## .extractChromatogram(x = object, rt = rt, mz = mz, + ## aggregationFun = aggregationFun, + ## adjusted = adjustedRtime) + }) + +#' @rdname XCMSnExp-class +#' +#' @param param A \code{\link{CentWaveParam}}, \code{\link{MatchedFilterParam}}, +#' \code{\link{MassifquantParam}}, \code{\link{MSWParam}} or +#' \code{\link{CentWavePredIsoParam}} object with the settings for the +#' chromatographic peak detection algorithm. +#' +#' @inheritParams findChromPeaks-centWave +setMethod("findChromPeaks", + signature(object = "XCMSnExp", param = "ANY"), + function(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") { + ## Remove all previous results. + if (hasFeatures(object)) + object <- dropFeatureDefinitions(object) + if (hasAdjustedRtime(object)) + object <- dropAdjustedRtime(object) + if (hasChromPeaks(object)) + object <- dropChromPeaks(object) + suppressMessages( + object <- callNextMethod() + ) + object@.processHistory <- list() + return(object) +}) + +#' @aliases fillChromPeaks +#' +#' @title Integrate areas of missing peaks +#' +#' @description Integrate signal in the mz-rt area of a feature (chromatographic +#' peak group) for samples in which no chromatographic peak for this +#' feature was identified and add it to the \code{chromPeaks}. Such peaks +#' will have a value of \code{1} in the \code{"is_filled"} column of the +#' \code{\link{chromPeaks}} matrix of the object. +#' +#' @details After correspondence (i.e. grouping of chromatographic peaks across +#' samples) there will always be features (peak groups) that do not include +#' peaks from every sample. The \code{fillChromPeaks} method defines +#' intensity values for such features in the missing samples by integrating +#' the signal in the mz-rt region of the feature. The mz-rt area is defined +#' by the median mz and rt start and end points of the other detected +#' chromatographic peaks for a given feature. +#' +#' Adjusted retention times will be used if available. +#' +#' Based on the peak finding algorithm that was used to identify the +#' (chromatographic) peaks different internal functions are employed to +#' guarantee that the integrated peak signal matches as much as possible +#' the peak signal integration used during the peak detection. For peaks +#' identified with the \code{\link{matchedFilter}} method, signal +#' integration is performed on the \emph{profile matrix} generated with +#' the same settings used also during peak finding (using the same +#' \code{bin} size for example). For direct injection data and peaks +#' identified with the \code{\link{MSW}} algorithm signal is integrated +#' only along the mz dimension. For all other methods the complete (raw) +#' signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, +#' \code{"rtmin"} and \code{"rtmax"} is used. +#' +#' @note The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and +#' \code{"rtmax"} for the filled peaks represents the actual MS area from +#' which the signal was integrated. +#' Note that no peak is filled in if no signal was present in a file/sample +#' in the respective mz-rt area. These samples will still show a \code{NA} +#' in the matrix returned by the \code{\link{featureValues}} method. This +#' is in contrast to the \code{\link{fillPeaks.chrom}} method that returned +#' an \code{"into"} and \code{"maxo"} of \code{0} for such peak areas. +#' Growing the mz-rt area using the \code{expandMz} and \code{expandRt} +#' might help to reduce the number of missing peak signals after filling. +#' +#' @param object \code{XCMSnExp} object with identified and grouped +#' chromatographic peaks. +#' +#' @param param A \code{FillChromPeaksParam} object with all settings. +#' +#' @param expandMz \code{numeric(1)} defining the value by which the mz width of +#' peaks should be expanded. Each peak is expanded in mz direction by +#' \code{expandMz *} their original mz width. A value of \code{0} means no +#' expansion, a value of \code{1} grows each peak by 1 * the mz width of +#' the peak resulting in peakswith twice their original size in mz +#' direction (expansion by half mz width to both sides). +#' +#' @param expandRt \code{numeric(1)}, same as \code{expandRt} but for the +#' retention time width. +#' +#' @param ppm \code{numeric(1)} optionally specifying a \emph{ppm} by which the +#' mz width of the peak region should be expanded. For peaks with an mz +#' width smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the +#' \code{mzmin} will be replaced by +#' \code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} +#' and \code{mzmax} by +#' \code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. +#' This is applied before eventually expanding the mz width using the +#' \code{expandMz} parameter. +#' +#' @param BPPARAM Parallel processing settings. +#' +#' @return A \code{\link{XCMSnExp}} object with previously missing +#' chromatographic peaks for features filled into its \code{chromPeaks} +#' matrix. +#' +#' @rdname fillChromPeaks +#' +#' @author Johannes Rainer +#' +#' @seealso \code{\link{groupChromPeaks}} for methods to perform the +#' correspondence. +#' \code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +#' +#' @examples +#' +#' ## Perform the peak detection using centWave on some of the files from the +#' ## faahKO package. Files are read using the readMSData2 from the MSnbase +#' ## package +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to +#' ## speed up the execution of the example - in a real use case the default +#' ## value should be used, or it should be set to a reasonable value. +#' cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) +#' +#' res <- findChromPeaks(raw_data, param = cwp) +#' +#' ## Perform the correspondence. +#' res <- groupChromPeaks(res, param = PeakDensityParam()) +#' +#' ## For how many features do we lack an integrated peak signal? +#' sum(is.na(featureValues(res))) +#' +#' ## Filling missing peak data using default settings. +#' res <- fillChromPeaks(res) +#' +#' ## Get the peaks that have been filled in: +#' fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] +#' head(fp) +#' +#' ## Did we get a signal for all missing peaks? +#' sum(is.na(featureValues(res))) +#' +#' ## No. +#' +#' ## Get the process history step along with the parameters used to perform +#' ## The peak filling: +#' ph <- processHistory(res, type = "Missing peak filling")[[1]] +#' ph +#' +#' ## The parameter class: +#' ph@param +#' +#' ## Drop the filled in peaks: +#' res <- dropFilledChromPeaks(res) +#' +#' ## Perform the peak filling with modified settings: allow expansion of the +#' ## mz range by a specified ppm and expanding the mz range by mz width/2 +#' prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) +#' res <- fillChromPeaks(res, param = prm) +#' +#' ## Did we get a signal for all missing peaks? +#' sum(is.na(featureValues(res))) +#' +#' ## Still the same missing peaks. +setMethod("fillChromPeaks", + signature(object = "XCMSnExp", param = "FillChromPeaksParam"), + function(object, param, BPPARAM = bpparam()) { + if (!hasFeatures(object)) + stop("'object' does not provide feature definitions! Please ", + "run 'groupChromPeaks' first.") + ## Don't do that if we have already filled peaks? + if (.hasFilledPeaks(object)) + message("Filled peaks already present, adding still missing", + " peaks.") + + startDate <- date() + expandMz <- expandMz(param) + expandRt <- expandRt(param) + ppm <- ppm(param) + ## Define or extend the peak area from which the signal should be + ## extracted. + ## Original code: use the median of the min/max rt and mz per peak. + fdef <- featureDefinitions(object) + aggFunLow <- median + aggFunHigh <- median + ## Note: we ensure in the downstream function that the rt range is + ## within the rt range. For the mz range it doesn't matter. + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(object)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(aggFunLow(tmp[, 1]), aggFunHigh(tmp[, 2]), + aggFunLow(tmp[, 3]), aggFunHigh(tmp[, 4])) + ## Check if we have to apply ppm replacement: + if (ppm != 0) { + mzmean <- mean(pa[3:4]) + tittle <- mzmean * (ppm / 2) / 1E6 + if ((pa[4] - pa[3]) < (tittle * 2)) { + pa[3] <- mzmean - tittle + pa[4] <- mzmean + tittle + } + } + ## Expand it. + if (expandRt != 0) { + diffRt <- (pa[2] - pa[1]) * expandRt / 2 + pa[1] <- pa[1] - diffRt + pa[2] <- pa[2] + diffRt + } + if (expandMz != 0) { + diffMz <- (pa[4] - pa[3]) * expandMz / 2 + pa[3] <- pa[3] - diffMz + pa[4] <- pa[4] + diffMz + } + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + ## Add mzmed column - needed for MSW peak filling. + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = as.numeric(fdef$mzmed)) + + pkGrpVal <- featureValues(object) + ## Check if there is anything to fill... + if (!any(is.na(rowSums(pkGrpVal)))) { + message("No missing peaks present.") + return(object) + } + ## Split the object by file and define the peaks for which + objectL <- vector("list", length(fileNames(object))) + pkAreaL <- objectL + for (i in 1:length(fileNames(object))) { + suppressMessages( + objectL[[i]] <- filterFile(object, file = i, + keepAdjustedRtime = TRUE) + ) + ## Want to extract intensities only for peaks that were not + ## found in a sample. + pkAreaL[[i]] <- pkArea[is.na(pkGrpVal[, i]), , drop = FALSE] + } + + ## Get to know what algorithm was used for the peak detection. + ## Special cases are MSWParam (no retention time) and + ## MatchedFilterParam (integrate from profile matrix). + ph <- processHistory(object, type = .PROCSTEP.PEAK.DETECTION) + findPeakMethod <- "unknown" + mzCenterFun <- "wMean" + if (length(ph)) { + if (is(ph[[1]], "XProcessHistory")) { + prm <- ph[[1]]@param + findPeakMethod <- .param2string(prm) + ## Check if the param class has a mzCenterFun slot + if (.hasSlot(prm, "mzCenterFun")) + mzCenterFun <- prm@mzCenterFun + } + } + ## Now rename that to the correct function name in xcms. + mzCenterFun <- paste("mzCenter", + gsub(mzCenterFun, pattern = "mzCenter.", + replacement = "", fixed = TRUE), sep=".") + if (findPeakMethod == "MSW") { + rts <- rtime(object, bySample = TRUE) + ## Ensure that we REALLY have direct injection data. + if (any(lengths(rts) > 1)) + stop("The data is supposed to be direct injection data, ", + "but I got files with more than one spectrum/", + "retention time!") + ## That's not working, because integration uses the rt. + res <- bpmapply(FUN = .getMSWPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object))), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } else if (findPeakMethod == "matchedFilter") { + res <- bpmapply(FUN = .getChromPeakData_matchedFilter, + objectL, pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object)), + param = prm + ), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } else { + res <- bpmapply(FUN = .getChromPeakData, objectL, + pkAreaL, as.list(1:length(objectL)), + MoreArgs = list( + cn = colnames(chromPeaks(object)), + mzCenterFun = mzCenterFun), + BPPARAM = BPPARAM, SIMPLIFY = FALSE) + } + + res <- do.call(rbind, res) + if (any(colnames(res) == "is_filled")) + res[, "is_filled"] <- 1 + else + res <- cbind(res, is_filled = 1) + ## cbind the group_idx column to track the feature/peak group. + res <- cbind(res, group_idx = do.call(rbind, pkAreaL)[, "group_idx"]) + ## Remove those without a signal + res <- res[!is.na(res[, "into"]), , drop = FALSE] + if (nrow(res) == 0) { + warning("Could not integrate any signal for the missing ", + "peaks! Consider increasing 'expandMz' and 'expandRt'.") + return(object) + } + + ## Get the msFeatureData: + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + incr <- nrow(chromPeaks(object)) + for (i in unique(res[, "group_idx"])) { + fdef$peakidx[[i]] <- c(fdef$peakidx[[i]], + (which(res[, "group_idx"] == i) + incr)) + } + + chromPeaks(newFd) <- rbind(chromPeaks(object), res[, -ncol(res)]) + featureDefinitions(newFd) <- fdef + lockEnvironment(newFd, bindings = TRUE) + object@msFeatureData <- newFd + ## Add a process history step + ph <- XProcessHistory(param = param, + date. = startDate, + type. = .PROCSTEP.PEAK.FILLING, + fileIndex = 1:length(fileNames(object))) + object <- addProcessHistory(object, ph) ## this also validates object. + return(object) + }) + +#' @rdname fillChromPeaks +setMethod( + "fillChromPeaks", + signature(object = "XCMSnExp", param = "missing"), + function(object, + param, + BPPARAM = bpparam()) { + fillChromPeaks(object, param = FillChromPeaksParam(), + BPPARAM = BPPARAM) + }) + +#' @aliases dropFilledChromPeaks +#' +#' @description \code{dropFilledChromPeaks}: drops any filled-in chromatographic +#' peaks (filled in by the \code{\link{fillChromPeaks}} method) and all +#' related process history steps. +#' +#' @rdname XCMSnExp-class +#' +#' @seealso \code{\link{fillChromPeaks}} for the method to fill-in eventually +#' missing chromatographic peaks for a feature in some samples. +setMethod("dropFilledChromPeaks", "XCMSnExp", function(object) { + if (!.hasFilledPeaks(object)) + return(object) + keep_pks <- which(chromPeaks(object)[, "is_filled"] == 0) + newFd <- new("MsFeatureData") + newFd@.xData <- .copy_env(object@msFeatureData) + ## Update index in featureDefinitions + fd <- featureDefinitions(newFd) + fd <- split(fd, 1:nrow(fd)) + fdL <- lapply(fd, function(z) { + z$peakidx <- list(z$peakidx[[1]][z$peakidx[[1]] %in% keep_pks]) + return(z) + }) + featureDefinitions(newFd) <- do.call(rbind, fdL) + ## Remove peaks + chromPeaks(newFd) <- chromPeaks(newFd)[keep_pks, , drop = FALSE] + ## newFd <- .filterChromPeaks(object@msFeatureData, idx = keep_pks) + object@msFeatureData <- newFd + object <- dropProcessHistories(object, type = .PROCSTEP.PEAK.FILLING) + if (validObject(object)) + return(object) +}) + +#' @aliases extractMsData +#' +#' @title Extract a \code{data.frame} containing MS data +#' +#' @description Extract a \code{data.frame} of retention time, mz and intensity +#' values from each file/sample in the provided rt-mz range (or for the full +#' data range if \code{rt} and \code{mz} are not defined). +#' +#' @param object A \code{XCMSnExp} or \code{OnDiskMSnExp} object. +#' +#' @param rt \code{numeric(2)} with the retention time range from which the +#' data should be extracted. +#' +#' @param mz \code{numeric(2)} with the mz range. +#' +#' @param adjustedRtime (for \code{extractMsData,XCMSnExp}): \code{logical(1)} +#' specifying if adjusted or raw retention times should be reported. +#' Defaults to adjusted retention times, if these are present in +#' \code{object}. +#' +#' @return A \code{list} of length equal to the number of samples/files in +#' \code{object}. Each element being a \code{data.frame} with columns +#' \code{"rt"}, \code{"mz"} and \code{"i"} with the retention time, mz and +#' intensity tuples of a file. If no data is available for the mz-rt range +#' in a file a \code{data.frame} with 0 rows is returned for that file. +#' +#' @seealso \code{\link{XCMSnExp}} for the data object. +#' +#' @rdname extractMsData-method +#' +#' @author Johannes Rainer +#' +#' @examples +#' ## Read some files from the test data package. +#' library(faahKO) +#' library(xcms) +#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, +#' full.names = TRUE) +#' raw_data <- readMSData2(fls[1:2]) +#' +#' ## Read the full MS data for a defined mz-rt region. +#' res <- extractMsData(raw_data, mz = c(300, 320), rt = c(2700, 2900)) +#' +#' ## We've got one data.frame per file +#' length(res) +#' +#' ## With number of rows: +#' nrow(res[[1]]) +#' +#' head(res[[1]]) +setMethod("extractMsData", "XCMSnExp", + function(object, rt, mz, adjustedRtime = hasAdjustedRtime(object)){ + ## Now, this method takes the adjusted rts, casts the object to + ## an OnDiskMSnExp, eventually replaces the rtime in the + ## featureData with the adjusted retention times (depending on + ## adjustedRtime and calls the method for OnDiskMSnExp. + if (adjustedRtime & hasAdjustedRtime(object)) { + fData(object)$retentionTime <- rtime(object, adjusted = TRUE) + } + object <- as(object, "OnDiskMSnExp") + extractMsData(object, rt = rt, mz = mz) + }) diff --git a/R/methods-xcmsRaw.R b/R/methods-xcmsRaw.R index 522f2449a..c852b995d 100755 --- a/R/methods-xcmsRaw.R +++ b/R/methods-xcmsRaw.R @@ -1,5 +1,5 @@ ## All Methods for xcmsRaw should be here. -#' @include functions-xcmsRaw.R +#' @include functions-xcmsRaw.R functions-utils.R ############################################################ ## show @@ -309,46 +309,48 @@ setMethod("findPeaks.matchedFilter_orig", "xcmsRaw", ############################################################ ## findPeaks.matchedFilter -##' @title Feature detection in the chromatographic time domain +##' @title Peak detection in the chromatographic time domain ##' ##' @aliases findPeaks.matchedFilter -##' @description Find features (peaks) in the chromatographic time domain of the -##' profile matrix. For more details see \code{\link{do_detectFeatures_matchedFilter}}. -##' @param object The \code{\linkS4class{xcmsRaw}} object on which feature detection +##' @description Find peaks in the chromatographic time domain of the +##' profile matrix. For more details see +##' \code{\link{do_findChromPeaks_matchedFilter}}. +##' @param object The \code{\linkS4class{xcmsRaw}} object on which peak detection ##' should be performed. -##' @inheritParams featureDetection-matchedFilter +##' @inheritParams findChromPeaks-matchedFilter ##' @param step numeric(1) specifying the width of the bins/slices in m/z ##' dimension. ##' @param sleep (DEFUNCT). This parameter is no longer functional, as it would cause ##' problems in parallel processing mode. ##' @param scanrange Numeric vector defining the range of scans to which the original -##' \code{object} should be sub-setted before feature detection. +##' \code{object} should be sub-setted before peak detection. ##' @author Colin A. Smith -##' @return A matrix, each row representing an intentified feature, with columns: +##' @return A matrix, each row representing an intentified chromatographic peak, +##' with columns: ##' \describe{ -##' \item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} -##' \item{rt}{Retention time of the feature's midpoint.} -##' \item{rtmin}{Minimum retention time of the feature.} -##' \item{rtmax}{Maximum retention time of the feature.} -##' \item{into}{Integrated (original) intensity of the feature.} +##' \item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +##' \item{mzmin}{Minimum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} +##' \item{rt}{Retention time of the peak's midpoint.} +##' \item{rtmin}{Minimum retention time of the peak.} +##' \item{rtmax}{Maximum retention time of the peak.} +##' \item{into}{Integrated (original) intensity of the peak.} ##' \item{intf}{Integrated intensity of the filtered peak.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{maxf}{Maximum intensity of the filtered peak.} -##' \item{i}{Rank of feature in merged EIC (\code{<= max}).} -##' \item{sn}{Signal to noise ratio of the feature} +##' \item{i}{Rank of peak in merged EIC (\code{<= max}).} +##' \item{sn}{Signal to noise ratio of the peak.} ##' } ##' @references ##' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and ##' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite ##' Profiling Using Nonlinear Peak Alignment, Matching, and Identification" ##' \emph{Anal. Chem.} 2006, 78:779-787. -##' @family Old feature detection methods +##' @family Old peak detection methods ##' @seealso \code{\link{matchedFilter}} for the new user interface. ##' \code{\linkS4class{xcmsRaw}}, -##' \code{\link{do_detectFeatures_matchedFilter}} for the core function -##' performing the feature detection. +##' \code{\link{do_findChromPeaks_matchedFilter}} for the core function +##' performing the peak detection. setMethod("findPeaks.matchedFilter", "xcmsRaw", function(object, fwhm = 30, sigma = fwhm/2.3548, max = 5, snthresh = 10, step = 0.1, steps = 2, @@ -430,7 +432,7 @@ setMethod("findPeaks.matchedFilter", "xcmsRaw", distance <- 0 baseValue <- 0 } - res <- do_detectFeatures_matchedFilter(mz = object@env$mz, + res <- do_findChromPeaks_matchedFilter(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = diff(c(object@scanindex, @@ -486,7 +488,7 @@ setMethod("findPeaks.centWave", "xcmsRaw", function(object, ppm=25, object <- object[scanrange[1]:scanrange[2]] vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_detectFeatures_centWave(mz = object@env$mz, + res <- do_findChromPeaks_centWave(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = vps, @@ -923,9 +925,9 @@ setMethod("findPeaks.centWave", "xcmsRaw", function(object, ppm=25, ############################################################ ## findPeaks.centWaveWithPredictedIsotopeROIs -## Performs first a centWave analysis and based on the identified features +## Performs first a centWave analysis and based on the identified peaks ## defines ROIs for a second centWave run to check for presence of -## predicted isotopes for the first features. +## predicted isotopes for the first peaks. setMethod("findPeaks.centWaveWithPredictedIsotopeROIs", "xcmsRaw", function(object, ppm = 25, peakwidth = c(20,50), snthresh = 10, prefilter = c(3,100), mzCenterFun = "wMean", integrate = 1, @@ -952,7 +954,7 @@ setMethod("findPeaks.centWaveWithPredictedIsotopeROIs", "xcmsRaw", object <- object[scanrange[1]:scanrange[2]] vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_detectFeatures_centWaveWithPredIsoROIs(mz = object@env$mz, + res <- do_findChromPeaks_centWaveWithPredIsoROIs(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = vps, @@ -976,52 +978,6 @@ setMethod("findPeaks.centWaveWithPredictedIsotopeROIs", "xcmsRaw", ) invisible(new("xcmsPeaks", res)) }) -## ## Original code: TODO REMOVE ME once method is validated. -## .centWaveWithPredictedIsotopeROIs <- function(object, ppm = 25, -## peakwidth = c(20,50), snthresh = 10, -## prefilter = c(3,100), -## mzCenterFun = "wMean", integrate = 1, -## mzdiff = -0.001, fitgauss = FALSE, -## scanrange = numeric(), -## noise = 0, sleep = 0, -## verbose.columns = FALSE, -## ROI.list = list(), -## firstBaselineCheck = TRUE, -## roiScales = NULL, -## snthreshIsoROIs = 6.25, -## maxcharge = 3, -## maxiso = 5, -## mzIntervalExtension = TRUE) { -## ## perform tradictional peak picking -## xcmsPeaks <- findPeaks.centWave( -## object = object, ppm = ppm, peakwidth = peakwidth, -## snthresh = snthresh, prefilter = prefilter, -## mzCenterFun = mzCenterFun, integrate = integrate, -## mzdiff = mzdiff, fitgauss = fitgauss, scanrange = scanrange, -## noise = noise, sleep = sleep, verbose.columns = TRUE, -## ROI.list = ROI.list, firstBaselineCheck = firstBaselineCheck, -## roiScales = roiScales) - -## return( -## .addPredictedIsotopeFeatures(object = object, -## ppm = ppm, -## peakwidth = peakwidth, -## prefilter = prefilter, -## mzCenterFun = mzCenterFun, -## integrate = integrate, -## mzdiff = mzdiff, -## fitgauss = fitgauss, -## scanrange = scanrange, -## noise = noise, -## sleep = sleep, -## verbose.columns = verbose.columns, -## xcmsPeaks = xcmsPeaks, -## snthresh = snthreshIsoROIs, -## maxcharge = maxcharge, -## maxiso = maxiso, -## mzIntervalExtension = mzIntervalExtension -## )) -## } setMethod("findPeaks.addPredictedIsotopeFeatures", "xcmsRaw", function(object, ppm = 25, peakwidth = c(20,50), @@ -1051,7 +1007,7 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", stop("Pparameter >xcmsPeaks< is not of class 'xcmsPeaks'!\n") vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_detectFeatures_addPredIsoROIs(mz = object@env$mz, + res <- do_findChromPeaks_addPredIsoROIs(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = vps, @@ -1065,410 +1021,21 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", fitgauss = fitgauss, noise = noise, verboseColumns = verbose.columns, - features. = xcmsPeaks@.Data, + peaks. = xcmsPeaks@.Data, maxCharge = maxcharge, maxIso = maxiso, mzIntervalExtension = mzIntervalExtension ) invisible(new("xcmsPeaks", res)) }) -## ## Original code: TODO REMOVE ME once method is validated. -## .addPredictedIsotopeFeatures <- -## function(object, ppm = 25, peakwidth = c(20,50), -## prefilter = c(3,100), mzCenterFun = "wMean", -## integrate = 1, mzdiff = -0.001, fitgauss = FALSE, -## scanrange = numeric(), noise=0, ## noise.local=TRUE, -## sleep = 0, verbose.columns = FALSE, -## xcmsPeaks, snthresh = 6.25, maxcharge = 3, -## maxiso = 5, mzIntervalExtension = TRUE) { -## if(nrow(xcmsPeaks) == 0){ -## warning("Warning: There are no features (parameter >xcmsPeaks<) for the prediction of isotope ROIs !\n") -## return(xcmsPeaks) -## } -## if(class(xcmsPeaks) != "xcmsPeaks") -## stop("Error: parameter >xcmsPeaks< is not of class 'xcmsPeaks' ! \n") -## if(any(is.na(match(x = c("scmin", "scmax"), table = colnames(xcmsPeaks))))) -## stop("Error: peak list >xcmsPeaks< is missing the columns 'scmin' and 'scmax' ! Please set parameter >verbose.columns< to TRUE for peak picking with 'centWave' and try again ! \n") - -## ############################################################################## -## ## predict new ROIs -## newROI.list <- do_predictIsotopeROIs(object, xcmsPeaks, ppm, maxcharge, -## maxiso, mzIntervalExtension) -## if(length(newROI.list) == 0) -## return(xcmsPeaks) - -## ## HOOK_1 -## ## return(newROI.list) -## ############################################################################## -## ## perform peak picking for predicted ROIs -## roiScales <- unlist(lapply(X = newROI.list, FUN = function(x){x$scale})) -## xcmsPeaks2 <- findPeaks.centWave( -## object = object, ppm=ppm, peakwidth=peakwidth, snthresh=snthresh, -## prefilter=prefilter, mzCenterFun=mzCenterFun, integrate=integrate, mzdiff=mzdiff, -## fitgauss=fitgauss, scanrange=scanrange, noise=noise, ## noise.local=noise.local, -## sleep=sleep, verbose.columns=verbose.columns, ROI.list=newROI.list, firstBaselineCheck=FALSE, roiScales=roiScales -## ) - -## ## HOOK_2 -## ## return(xcmsPeaks2) -## if(nrow(xcmsPeaks2) > 0){ -## ## remove NaN values -## rowsWithNaN <- which(apply(X = xcmsPeaks2[, c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax")], MARGIN = 1, FUN = function(x){any(is.na(x))})) -## if(length(rowsWithNaN) > 0) -## xcmsPeaks2 <- xcmsPeaks2[-rowsWithNaN, ] - -## noArea <- which((xcmsPeaks2[, "mzmax"] - xcmsPeaks2[, "mzmin"]) == 0 || (xcmsPeaks2[, "rtmax"] - xcmsPeaks2[, "rtmin"]) == 0) -## if(length(noArea) > 0) -## xcmsPeaks2 <- xcmsPeaks2[-noArea, ] -## } - -## ## HOOK_3 -## ## return(xcmsPeaks2) ## Compare these results - -## ## make present peaks and new peaks distinct by removing overlapping peaks -## if(nrow(xcmsPeaks2) > 0){ -## ## remove ROIs which are already there -## overlapProportionThreshold <- 0.01 -## drop <- apply(X = xcmsPeaks2, MARGIN = 1, FUN = function(x){ -## roiInt <- x[["into"]] -## peakInt <- xcmsPeaks[, "into"] -## roiMzMin <- x[["mzmin"]] -## roiMzMax <- x[["mzmax"]] -## peakMzMin <- xcmsPeaks[, "mzmin"] -## peakMzMax <- xcmsPeaks[, "mzmax"] -## roiMzCenter = (roiMzMin + roiMzMax ) / 2; -## peakMzCenter = (peakMzMin + peakMzMax) / 2; -## roiMzRadius = (roiMzMax - roiMzMin ) / 2; -## peakMzRadius = (peakMzMax - peakMzMin) / 2; -## overlappingmz <- abs(peakMzCenter - roiMzCenter) <= (roiMzRadius + peakMzRadius) - -## roiRtMin <- x[["rtmin"]] -## roiRtMax <- x[["rtmax"]] -## peakRtMin <- xcmsPeaks[, "rtmin"] -## peakRtMax <- xcmsPeaks[, "rtmax"] -## roiRtCenter = (roiRtMin + roiRtMax ) / 2; -## peakRtCenter = (peakRtMin + peakRtMax) / 2; -## roiRtRadius = (roiRtMax - roiRtMin ) / 2; -## peakRtRadius = (peakRtMax - peakRtMin) / 2; -## overlappingrt <- abs(peakRtCenter - roiRtCenter) <= (roiRtRadius + peakRtRadius) - -## overlapping <- overlappingmz & overlappingrt - -## overlappingPeaks <- which(overlapping) -## overlappingPeaksInt <- peakInt[overlappingPeaks] - -## removeROI <- FALSE -## peaksToRemove <- NULL -## if(any(overlapping)){ -## if(any(overlappingPeaksInt > roiInt)) -## return(TRUE) -## else -## return(overlappingPeaks) -## } else { -## ## no overlap -## return(FALSE) -## } - -## ## Will never reach the condition below. -## ## return(isOverlap) -## }) - -## removeROI <- unlist(lapply(X = drop, FUN = function(x){ -## if(is.logical(x)){ -## return(x) -## } else { -## return(FALSE) -## } -## })) -## removePeaks <- unique(unlist(lapply(X = drop, FUN = function(x){ -## if(is.logical(x)){ -## return(NULL) -## } else { -## return(x) -## } -## }))) - -## if(length(removePeaks) > 0) -## xcmsPeaks <- xcmsPeaks[-removePeaks, ] -## xcmsPeaks2 <- xcmsPeaks2[!removeROI, ] -## } - -## ## merge result with present results -## if(!verbose.columns) -## xcmsPeaks <- xcmsPeaks[, c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "intb", "maxo", "sn")] - -## xcmsPeaks <- rbind(xcmsPeaks, xcmsPeaks2) - -## invisible(new("xcmsPeaks", xcmsPeaks)) -## } - -## ## Original code: TODO REMOVE ME once method is validated. -## removeROIsOutOfRange <- function(object, roi.matrix){ -## ## c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity") -## numberOfROIs <- nrow(roi.matrix) - -## minMz <- min(object@env$mz) -## maxMz <- max(object@env$mz) -## minScanRange <- 1 -## maxScanRange <- length(object@scantime) -## #minScanRange <- min(object@scantime) -## #maxScanRange <- max(object@scantime) - -## roiWithinRange <- rep(x = TRUE, times = numberOfROIs) -## roiWithinRange <- roiWithinRange & (roi.matrix[, "mzmin"] >= minMz) -## roiWithinRange <- roiWithinRange & (roi.matrix[, "mzmax"] <= maxMz) -## roiWithinRange <- roiWithinRange & (roi.matrix[, "scmin"] >= minScanRange) -## roiWithinRange <- roiWithinRange & (roi.matrix[, "scmax"] <= maxScanRange) - -## roi.matrix <- roi.matrix[roiWithinRange, ] - -## return(roi.matrix) -## } -## ## Original code: TODO REMOVE ME once method is validated. -## removeROIsWithoutSignal <- function(object, roi.matrix, intensityThreshold){ -## ## c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity") -## numberOfROIs <- nrow(roi.matrix) -## sufficientSignalThere <- rep(x = TRUE, times = numberOfROIs) -## for(roiIdx in seq_len(numberOfROIs)){ -## mzrange <- c(roi.matrix[[roiIdx, "mzmin"]], roi.matrix[[roiIdx, "mzmax"]]) -## scanrange <- c(roi.matrix[[roiIdx, "scmin"]], roi.matrix[[roiIdx, "scmax"]]) -## mzROI.EIC <- rawEIC(object, mzrange=mzrange, scanrange=scanrange) -## sumOfIntensities <- sum(mzROI.EIC$intensity) - -## if(sumOfIntensities < intensityThreshold) -## sufficientSignalThere[[roiIdx]] <- FALSE -## } -## roi.matrix <- roi.matrix[sufficientSignalThere, ] - -## return(roi.matrix) -## } - -## ## Original code: TODO REMOVE ME once method is validated. -## createAdditionalROIs <- function(object, ROI.list, ppm, addNewIsotopeROIs, maxcharge, maxiso, mzIntervalExtension, addNewAdductROIs, polarity){ -## ############################################################################################### -## ## isotope ROIs -## if(addNewIsotopeROIs){ -## ## init -## isotopeDistance <- 1.0033548378 -## charges <- 1:maxcharge -## isos <- 1:maxiso - -## isotopeStepSizesForCharge <- list() -## for(charge in charges) -## isotopeStepSizesForCharge[[charge]] <- isotopeDistance / charge - -## isotopeStepSizes <- list() -## for(charge in charges) -## isotopeStepSizes[[charge]] <- list() - -## for(charge in charges) -## for(iso in isos) -## isotopeStepSizes[[charge]][[iso]] <- isotopeStepSizesForCharge[[charge]] * iso - -## isotopePopulationMz <- list() -## for(charge in charges) -## for(iso in isos) -## isotopePopulationMz[[length(isotopePopulationMz) + 1]] <- isotopeStepSizes[[charge]][[iso]] -## isotopePopulationMz <- unlist(unique(isotopePopulationMz)) - -## numberOfIsotopeROIs <- length(ROI.list) * length(isotopePopulationMz) -## isotopeROIs.matrix <- matrix(nrow = numberOfIsotopeROIs, ncol = 8) -## colnames(isotopeROIs.matrix) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity", "scale") - -## ## complement found ROIs -## for(roiIdx in 1:(length(ROI.list))){ -## for(mzIdx in 1:length(isotopePopulationMz)){ -## ## create new ROI! -## mzDifference <- isotopePopulationMz[[mzIdx]] -## if(mzIntervalExtension) -## ## extend m/z interval for weak peaks -## #mzIntervalExtension <- ROI.list[[roiIdx]]$mz * ppm / 1E6 -## mzIntervalExtension <- (ROI.list[[roiIdx]]$mzmax - ROI.list[[roiIdx]]$mzmin) * 2 -## else -## mzIntervalExtension <- 0 - -## idx <- (roiIdx - 1) * length(isotopePopulationMz) + mzIdx -## isotopeROIs.matrix[idx, ] <- c( -## ROI.list[[roiIdx]]$mz + mzDifference,## XXX not used! -## ROI.list[[roiIdx]]$mzmin + mzDifference - mzIntervalExtension, -## ROI.list[[roiIdx]]$mzmax + mzDifference + mzIntervalExtension, -## ROI.list[[roiIdx]]$scmin, -## ROI.list[[roiIdx]]$scmax, -## ROI.list[[roiIdx]]$length,## XXX not used! -## -1, #ROI.list[[roiIdx]]$intensity ## XXX not used! -## ROI.list[[roiIdx]]$scale -## ) -## } -## } -## } else { -## ## no isotope ROIs -## isotopeROIs.matrix <- matrix(nrow = 0, ncol = 8) -## colnames(isotopeROIs.matrix) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity", "scale") -## } -## ############################################################################################### -## ## adduct ROIs -## if(addNewAdductROIs){ -## ## considered adduct distances -## ## reference: Huang N.; Siegel M.M.1; Kruppa G.H.; Laukien F.H.; J Am Soc Mass Spectrom 1999, 10, 1166–1173; Automation of a Fourier transform ion cyclotron resonance mass spectrometer for acquisition, analysis, and e-mailing of high-resolution exact-mass electrospray ionization mass spectral data -## ## see also for contaminants: Interferences and contaminants encountered in modern mass spectrometry (Bernd O. Keller, Jie Sui, Alex B. Young and Randy M. Whittal, ANALYTICA CHIMICA ACTA, 627 (1): 71-81) - -## mH <- 1.0078250322 -## mNa <- 22.98976928 -## mK <- 38.96370649 -## mC <- 12 -## mN <- 14.003074004 -## mO <- 15.994914620 -## mS <- 31.972071174 -## mCl <- 34.9688527 -## mBr <- 78.918338 -## mF <- 18.998403163 -## mDMSO <- mC*2+mH*6+mS+mO # dimethylsulfoxid -## mACN <- mC*2+mH*3+mN # acetonitril -## mIsoProp <- mC*3+mH*8+mO # isopropanol -## mNH4 <- mN+mH*4 # ammonium -## mCH3OH <- mC+mH*3+mO+mH # methanol -## mH2O <- mH*2+mO # water -## mFA <- mC+mH*2+mO*2 # formic acid -## mHAc <- mC+mH*3+mC+mO+mO+mH # acetic acid -## mTFA <- mC+mF*3+mC+mO+mO+mH # trifluoroacetic acid - -## switch(polarity, -## "positive"={ -## adductPopulationMz <- unlist(c( -## ## [M+H]+ to [M+H]+ (Reference) -## function(mass){ mass-mH+mNH4 }, ## [M+H]+ to [M+NH4]+ -## function(mass){ mass-mH+mNa }, ## [M+H]+ to [M+Na]+ -## function(mass){ mass+mCH3OH }, ## [M+H]+ to [M+CH3OH+H]+ -## function(mass){ mass-mH+mK }, ## [M+H]+ to [M+K]+ -## function(mass){ mass+mACN }, ## [M+H]+ to [M+ACN+H]+ -## function(mass){ mass-2*mH+2*mNa }, ## [M+H]+ to [M+2Na-H]+ -## function(mass){ mass+mIsoProp }, ## [M+H]+ to [M+IsoProp+H]+ -## function(mass){ mass-mH+mACN+mNa }, ## [M+H]+ to [M+ACN+Na]+ -## function(mass){ mass-2*mH+2*mK }, ## [M+H]+ to [M+2K-H]+ -## function(mass){ mass+mDMSO }, ## [M+H]+ to [M+DMSO+H]+ -## function(mass){ mass+2*mACN }, ## [M+H]+ to [M+2*ACN+H]+ -## function(mass){ mass+mIsoProp+mNa }, ## [M+H]+ to [M+IsoProp+Na+H]+ TODO double-charged? -## function(mass){ (mass-mH)*2+mH }, ## [M+H]+ to [2M+H]+ -## function(mass){ (mass-mH)*2+mNH4 }, ## [M+H]+ to [2M+NH4]+ -## function(mass){ (mass-mH)*2+mNa }, ## [M+H]+ to [2M+Na]+ -## function(mass){ (mass-mH)*2+mK }, ## [M+H]+ to [2M+K]+ -## function(mass){ (mass-mH)*2+mACN+mH }, ## [M+H]+ to [2M+ACN+H]+ -## function(mass){ (mass-mH)*2+mACN+mNa }, ## [M+H]+ to [2M+ACN+Na]+ -## function(mass){((mass-mH)*2+3*mH2O+2*mH)/2 }, ## [M+H]+ to [2M+3*H2O+2*H]2+ -## function(mass){ (mass+mH)/2 }, ## [M+H]+ to [M+2*H]2+ -## function(mass){ (mass+mNH4)/2 }, ## [M+H]+ to [M+H+NH4]2+ -## function(mass){ (mass+mNa)/2 }, ## [M+H]+ to [M+H+Na]2+ -## function(mass){ (mass+mK)/2 }, ## [M+H]+ to [M+H+K]2+ -## function(mass){ (mass+mACN+mH)/2 }, ## [M+H]+ to [M+ACN+2*H]2+ -## function(mass){ (mass-mH+2*mNa)/2 }, ## [M+H]+ to [M+2*Na]2+ -## function(mass){ (mass+2*mACN+mH)/2 }, ## [M+H]+ to [M+2*ACN+2*H]2+ -## function(mass){ (mass+3*mACN+mH)/2 }, ## [M+H]+ to [M+3*ACN+2*H]2+ -## function(mass){ (mass+2*mH)/3 }, ## [M+H]+ to [M+3*H]3+ -## function(mass){ (mass+mH+mNa)/3 }, ## [M+H]+ to [M+2*H+Na]3+ -## function(mass){ (mass+2*mNa)/3 }, ## [M+H]+ to [M+H+2*Na]3+ -## function(mass){ (mass-mH+3*mNa)/3 } ## [M+H]+ to [M+3*Na]3+ -## )) -## }, -## "negative"={ -## adductPopulationMz <- unlist(c( -## ## [M-H]+ to [M-H]+ (Reference) -## function(mass){ mass-mH2O }, ## [M-H]+ to [M-H2O-H]+ -## function(mass){ mass-mH+mNa }, ## [M-H]+ to [M+Na-2*H]+ -## function(mass){ mass+mH+mCl }, ## [M-H]+ to [M+Cl]+ -## function(mass){ mass-mH+mK }, ## [M-H]+ to [M+K-2*H]+ -## function(mass){ mass+mFA }, ## [M-H]+ to [M+FA-H]+ -## function(mass){ mass+mHAc }, ## [M-H]+ to [M+HAc-H]+ -## function(mass){ mass+mH+mBr }, ## [M-H]+ to [M+Br]+ -## function(mass){ mass+mTFA }, ## [M-H]+ to [M+TFA-H]+ -## function(mass){ (mass+mH)*2-mH }, ## [M-H]+ to [2M-H]+ -## function(mass){ (mass+mH)*2+mFA-mH }, ## [M-H]+ to [2M+FA-H]+ -## function(mass){ (mass+mH)*2+mHAc-mH }, ## [M-H]+ to [2M+HAc-H]+ -## function(mass){ (mass+mH)*3-mH }, ## [M-H]+ to [3M-H]+ -## function(mass){ (mass-mH)/2 }, ## [M-H]+ to [M-2*H]2+ -## function(mass){ (mass-2*mH)/3 } ## [M-H]+ to [M-3*H]3+ -## )) -## }, -## "unknown"={ -## warning(paste("Unknown polarity! No adduct ROIs have been added.", sep = "")) -## }, -## stop(paste("Unknown polarity (", polarity, ")!", sep = "")) -## ) - -## numberOfAdductROIs <- length(ROI.list) * length(adductPopulationMz) -## adductROIs.matrix <- matrix(nrow = numberOfAdductROIs, ncol = 8) -## colnames(adductROIs.matrix) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity", "scale") - -## for(roiIdx in 1:(length(ROI.list))){ -## for(mzIdx in 1:length(adductPopulationMz)){ -## ## create new ROI! -## mzDifference <- adductPopulationMz[[mzIdx]](ROI.list[[roiIdx]]$mz) -## idx <- (roiIdx - 1) * length(adductPopulationMz) + mzIdx -## if(ROI.list[[roiIdx]]$mzmin + mzDifference > 0){ -## adductROIs.matrix[idx, ] <- c( -## ROI.list[[roiIdx]]$mz + mzDifference,## XXX not used! -## ROI.list[[roiIdx]]$mzmin + mzDifference, -## ROI.list[[roiIdx]]$mzmax + mzDifference, -## ROI.list[[roiIdx]]$scmin, -## ROI.list[[roiIdx]]$scmax, -## ROI.list[[roiIdx]]$length,## XXX not used! -## -1, #ROI.list[[roiIdx]]$intensity ## XXX not used! -## ROI.list[[roiIdx]]$scale -## ) -## } -## } -## } -## } else { -## ## no adduct ROIs -## adductROIs.matrix <- matrix(nrow = 0, ncol = 8) -## colnames(adductROIs.matrix) <- c("mz", "mzmin", "mzmax", "scmin", "scmax", "length", "intensity", "scale") -## } - -## numberOfAdditionalIsotopeROIsUnfiltered <- nrow(isotopeROIs.matrix) -## numberOfAdditionalAdductROIsUnfiltered <- nrow(adductROIs.matrix ) -## numberOfAdditionalROIsUnfiltered <- numberOfAdditionalIsotopeROIsUnfiltered + numberOfAdditionalAdductROIsUnfiltered -## newROI.matrixUnfiltered <- rbind(isotopeROIs.matrix, adductROIs.matrix) - -## ############################################################################################### -## ## filter out m/z's out of range and without sufficient intensity -## intensityThreshold <- 10 - -## if(addNewIsotopeROIs) isotopeROIs.matrix <- removeROIsOutOfRange(object, isotopeROIs.matrix) -## if(addNewAdductROIs) adductROIs.matrix <- removeROIsOutOfRange(object, adductROIs.matrix) -## if(addNewIsotopeROIs) isotopeROIs.matrix <- removeROIsWithoutSignal(object, isotopeROIs.matrix, intensityThreshold) -## if(addNewAdductROIs) adductROIs.matrix <- removeROIsWithoutSignal(object, adductROIs.matrix, intensityThreshold) - -## numberOfAdditionalIsotopeROIs <- nrow(isotopeROIs.matrix) -## numberOfAdditionalAdductROIs <- nrow(adductROIs.matrix ) -## numberOfAdditionalROIs <- numberOfAdditionalIsotopeROIs + numberOfAdditionalAdductROIs - -## ############################################################################################### -## ## box -## newROI.matrix <- rbind(isotopeROIs.matrix, adductROIs.matrix) - -## resultObj <- list() -## ## unfiltered -## resultObj$newROI.matrixUnfiltered <- newROI.matrixUnfiltered -## resultObj$numberOfAdditionalROIsUnfiltered <- numberOfAdditionalROIsUnfiltered -## resultObj$numberOfAdditionalIsotopeROIsUnfiltered <- numberOfAdditionalIsotopeROIsUnfiltered -## resultObj$numberOfAdditionalAdductROIsUnfiltered <- numberOfAdditionalAdductROIsUnfiltered -## ## filtered -## resultObj$newROI.matrix <- newROI.matrix -## resultObj$numberOfAdditionalROIs <- numberOfAdditionalROIs -## resultObj$numberOfAdditionalIsotopeROIs <- numberOfAdditionalIsotopeROIs -## resultObj$numberOfAdditionalAdductROIs <- numberOfAdditionalAdductROIs - -## return(resultObj) -## } - - ############################################################ ## findPeaks.MSW -##' @title Feature detection for single-spectrum non-chromatography MS data +##' @title Peak detection for single-spectrum non-chromatography MS data ##' @aliases findPeaks.MSW ##' -##' @description This method performs feature detection in mass spectrometry +##' @description This method performs peak detection in mass spectrometry ##' direct injection spectrum using a wavelet based algorithm. ##' ##' @details This is a wrapper around the peak picker in Bioconductor's @@ -1476,30 +1043,30 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", ##' \code{\link[MassSpecWavelet]{peakDetectionCWT}} and ##' \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. ##' -##' @inheritParams featureDetection-MSW -##' @inheritParams featureDetection-centWave -##' @param object The \code{\linkS4class{xcmsRaw}} object on which feature +##' @inheritParams findPeaks-MSW +##' @inheritParams findChromPeaks-centWave +##' @param object The \code{\linkS4class{xcmsRaw}} object on which peak ##' detection should be performed. -##' @param verbose.columns Logical whether additional feature meta data columns +##' @param verbose.columns Logical whether additional peak meta data columns ##' should be returned. ##' ##' @return -##' A matrix, each row representing an intentified feature, with columns: +##' A matrix, each row representing an intentified peak, with columns: ##' \describe{ -##' \item{mz}{m/z value of the feature at the centroid position.} -##' \item{mzmin}{Minimum m/z of the feature.} -##' \item{mzmax}{Maximum m/z of the feature.} +##' \item{mz}{m/z value of the peak at the centroid position.} +##' \item{mzmin}{Minimum m/z of the peak.} +##' \item{mzmax}{Maximum m/z of the peak.} ##' \item{rt}{Always \code{-1}.} ##' \item{rtmin}{Always \code{-1}.} ##' \item{rtmax}{Always \code{-1}.} -##' \item{into}{Integrated (original) intensity of the feature.} -##' \item{maxo}{Maximum intensity of the feature.} +##' \item{into}{Integrated (original) intensity of the peak.} +##' \item{maxo}{Maximum intensity of the peak.} ##' \item{intf}{Always \code{NA}.} -##' \item{maxf}{Maximum MSW-filter response of the feature.} +##' \item{maxf}{Maximum MSW-filter response of the peak.} ##' \item{sn}{Signal to noise ratio.} ##' } ##' @seealso \code{\link{MSW}} for the new user interface, -##' \code{\link{do_detectFeatures_MSW}} for the downstream analysis +##' \code{\link{do_findPeaks_MSW}} for the downstream analysis ##' function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the ##' \code{MassSpecWavelet} for details on the algorithm and additionally supported ##' parameters. @@ -1507,11 +1074,11 @@ setMethod("findPeaks.addPredictedIsotopeFeatures", ##' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer setMethod("findPeaks.MSW", "xcmsRaw", function(object, snthresh=3, verbose.columns = FALSE, ...) { - res <- do_detectFeatures_MSW(mz = object@env$mz, - int = object@env$intensity, - snthresh = snthresh, - verboseColumns = verbose.columns, - ...) + res <- do_findPeaks_MSW(mz = object@env$mz, + int = object@env$intensity, + snthresh = snthresh, + verboseColumns = verbose.columns, + ...) invisible(new("xcmsPeaks", res)) }) @@ -1583,72 +1150,11 @@ setMethod("findPeaks", "xcmsRaw", function(object, method=getOption("BioC")$xcms invisible(do.call(method, list(object, ...))) }) -############################################################ -## getPeaks setMethod("getPeaks", "xcmsRaw", function(object, peakrange, step = 0.1) { - - profFun <- match.profFun(object) - if (all(c("mzmin","mzmax","rtmin","rtmax") %in% colnames(peakrange))) - peakrange <- peakrange[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE] - stime <- object@scantime - -### Create EIC buffer - mrange <- range(peakrange[,1:2]) - mass <- seq(floor(mrange[1]/step)*step, ceiling(mrange[2]/step)*step, by = step) - bufsize <- min(100, length(mass)) - buf <- profFun(object@env$mz, object@env$intensity, object@scanindex, - bufsize, mass[1], mass[bufsize], TRUE, object@profparam) - bufidx <- integer(length(mass)) - idxrange <- c(1, bufsize) - bufidx[idxrange[1]:idxrange[2]] <- 1:bufsize - - cnames <- c("mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", "into", "maxo") - rmat <- matrix(nrow = nrow(peakrange), ncol = length(cnames)) - colnames(rmat) <- cnames - - for (i in order(peakrange[,1])) { - imz <- findRange(mass, c(peakrange[i,1]-.5*step, peakrange[i,2]+.5*step), TRUE) - iret <- findRange(stime, peakrange[i,3:4], TRUE) - -### Update EIC buffer if necessary - if (bufidx[imz[2]] == 0) { - bufidx[idxrange[1]:idxrange[2]] <- 0 - idxrange <- c(max(1, imz[1]), min(bufsize+imz[1]-1, length(mass))) - bufidx[idxrange[1]:idxrange[2]] <- 1:(diff(idxrange)+1) - buf <- profFun(object@env$mz, object@env$intensity, object@scanindex, - diff(idxrange)+1, mass[idxrange[1]], mass[idxrange[2]], - TRUE, object@profparam) - } - ymat <- buf[bufidx[imz[1]:imz[2]],iret[1]:iret[2],drop=FALSE] - ymax <- colMax(ymat) - iymax <- which.max(ymax) - - pwid <- diff(stime[iret])/diff(iret) - - rosm <- rowSums(ymat) - limz <- length(imz[1]:imz[2]) - if (length(rosm) != limz) { ## that happens for some reason - warning("weighted.mean : x and w must have the same length \n") - rosm <- rep(1, limz) ## fallback to mean - } - rmat[i,1] <- weighted.mean(mass[imz[1]:imz[2]], rosm) - if (is.nan(rmat[i,1]) || is.na(rmat[i,1])) ## R2.11 : weighted.mean() results in NA (not NaN) for zero weights - rmat[i,1] <- mean(peakrange[i,1:2]) - - rmat[i,2:3] <- peakrange[i,1:2] - rmat[i,4] <- stime[iret[1]:iret[2]][iymax] - rmat[i,5:6] <- peakrange[i,3:4] - - if (peakrange[i,3] < stime[1] || peakrange[i,4] > stime[length(stime)] || is.nan(pwid)) { - warning("getPeaks: Peak m/z:",peakrange[i,1],"-",peakrange[i,2], ", RT:",peakrange[i,3],"-",peakrange[i,4], - "is out of retention time range for this sample (",object@filepath,"), using zero intensity value.\n") - rmat[i,7:8] <- 0 - } else { - rmat[i,7] <- pwid*sum(ymax) - rmat[i,8] <- ymax[iymax] - } - } - invisible(rmat) + if (useOriginalCode()) + return(.getPeaks_orig(object, peakrange, step = step)) + else + return(.getPeaks_new(object, peakrange, step = step)) }) ############################################################ @@ -1706,6 +1212,8 @@ setMethod("rawMat", "xcmsRaw", function(object, log = log) }) ## @jo TODO LLL replace that with an implementation in C. +## Note: this function silently drops retention times for which no intensity-mz +## pair was measured. .rawMat <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), rtrange = numeric(), scanrange = numeric, log = FALSE) { @@ -1738,6 +1246,38 @@ setMethod("rawMat", "xcmsRaw", function(object, intensity = int) } +.rawMat2 <- function(mz, int, scantime, valsPerSpect, mzrange = numeric(), + rtrange = numeric(), scanrange = numeric, + log = FALSE) { + if (length(rtrange) >= 2) { + rtrange <- range(rtrange) + scanrange <- range(which((scantime >= rtrange[1]) & + (scantime <= rtrange[2]))) + } + if (length(scanrange) < 2) + scanrange <- c(1, length(valsPerSpect)) + else scanrange <- range(scanrange) + if (scanrange[1] == 1) + startidx <- 1 + else + startidx <- sum(valsPerSpect[1:(scanrange[1]-1)]) + 1 + endidx <- sum(valsPerSpect[1:scanrange[2]]) + scans <- rep(scanrange[1]:scanrange[2], + valsPerSpect[scanrange[1]:scanrange[2]]) + masses <- mz[startidx:endidx] + massidx <- 1:length(masses) + if (length(mzrange) >= 2) { + mzrange <- range(mzrange) + massidx <- massidx[(masses >= mzrange[1] & (masses <= mzrange[2]))] + } + int <- int[startidx:endidx][massidx] + if (log && (length(int) > 0)) + int <- log(int + max(1 - min(int), 0)) + cbind(time = scantime[scans[massidx]], + mz = masses[massidx], + intensity = int) +} + ############################################################ ## plotRaw @@ -2128,90 +1668,6 @@ setMethod("findKalmanROI", "xcmsRaw", function(object, mzrange=c(0.0,0.0), ## PACKAGE ='xcms' ) }) -## ############################################################ -## ## This should be replaced soon; is only for testing purposes. -## setGeneric("findPeaks.massifquant_orig", function(object, ...) -## standardGeneric("findPeaks.massifquant_orig")) -## setMethod("findPeaks.massifquant_orig", "xcmsRaw", -## function(object, -## ppm=10, -## peakwidth=c(20,50), -## snthresh=10, -## prefilter=c(3,100), -## mzCenterFun="wMean", -## integrate=1, -## mzdiff=-0.001, -## fitgauss=FALSE, -## scanrange= numeric(), -## noise=0, ## noise.local=TRUE, -## sleep=0, -## verbose.columns=FALSE, -## criticalValue = 1.125, -## consecMissedLimit = 2, -## unions = 1, -## checkBack = 0, -## withWave = 0) { - -## cat("\n Massifquant, Copyright (C) 2013 Brigham Young University."); -## cat("\n Massifquant comes with ABSOLUTELY NO WARRANTY. See LICENSE for details.\n"); -## flush.console(); -## ## Seems we're not considering scanrange here at all. - -## ##keeep this check since massifquant doesn't check internally -## if (!isCentroided(object)) -## warning("It looks like this file is in profile mode. massifquant can process only centroid mode data !\n") - -## cat("\n Detecting mass traces at",ppm,"ppm ... \n"); flush.console(); -## massifquantROIs = findKalmanROI(object, minIntensity = prefilter[2], -## minCentroids = peakwidth[1], -## criticalVal = criticalValue, -## consecMissedLim = consecMissedLimit, -## segs = unions, -## scanBack = checkBack, -## ppm=ppm) - -## if (withWave == 1) { -## featlist = findPeaks.centWave(object, ppm, peakwidth, snthresh, -## prefilter, mzCenterFun, integrate, mzdiff, fitgauss, -## scanrange, noise, sleep, verbose.columns, ROI.list= massifquantROIs); -## } -## else { -## basenames <- c("mz","mzmin","mzmax","rtmin","rtmax","rt", "into") -## if (length(massifquantROIs) == 0) { -## cat("\nNo peaks found !\n"); -## nopeaks <- new("xcmsPeaks", matrix(nrow=0, ncol=length(basenames))); -## colnames(nopeaks) <- basenames; -## return(invisible(nopeaks)); -## } - -## p <- t(sapply(massifquantROIs, unlist)); -## colnames(p) <- basenames; - -## #get the max intensity for each feature -## maxo <- sapply(seq_len(nrow(p)), function(i) { -## raw <- rawMat(object, mzrange = p[i,c("mzmin", "mzmax")], -## scanrange = p[i,c("rtmin", "rtmax")]) -## max(raw[,3]) -## }) -## p <- cbind(p, maxo) - -## #calculate median index -## p[,"rt"] = as.integer(p[,"rtmin"] + ( (p[,"rt"] + 1) / 2 ) - 1); -## #convert from index into actual time -## p[,"rtmin"] = object@scantime[p[,"rtmin"]]; -## p[,"rtmax"] = object@scantime[p[,"rtmax"]]; -## p[,"rt"] = object@scantime[p[,"rt"]]; - -## uorder <- order(p[,"into"], decreasing=TRUE); -## pm <- as.matrix(p[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE]); - -## uindex <- rectUnique(pm,uorder,mzdiff,ydiff = -0.00001) ## allow adjacent peaks; -## featlist <- p[uindex,,drop=FALSE]; -## cat("\n",dim(featlist)[1]," Peaks.\n"); -## invisible(new("xcmsPeaks", featlist)); -## } -## return(invisible(featlist)); -## }) ############################################################ ## findPeaks.massifquant: Note the original code returned, if withWave = 1, @@ -2274,7 +1730,7 @@ setMethod("findPeaks.massifquant", "xcmsRaw", function(object, warning("It looks like this file is in profile mode.", " Massifquant can process only centroid mode data !\n") vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_detectFeatures_massifquant(mz = object@env$mz, + res <- do_findChromPeaks_massifquant(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = vps, @@ -2354,7 +1810,7 @@ setMethod("findPeaks.massifquant", "xcmsRaw", function(object, warning("It looks like this file is in profile mode.", " Massifquant can process only centroid mode data !\n") vps <- diff(c(object@scanindex, length(object@env$mz))) - res <- do_detectFeatures_massifquant(mz = object@env$mz, + res <- do_findChromPeaks_massifquant(mz = object@env$mz, int = object@env$intensity, scantime = object@scantime, valsPerSpect = vps, @@ -2846,17 +2302,35 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { ob@tic<-object@tic ob@profparam<-list() - arr<-array(dim=c(2,max(diff(ob@scanindex)), length(ob@scanindex))) + ## Array [x, y, z] with + ## - x: mz and intensity + ## - y: spectrum (1: max measurements within one of the spectra) + ## - z: scans (1: number of spectra) + arr <- array(dim = c(2, max(diff(ob@scanindex)), length(ob@scanindex))) if(lockMass[1] == 1){ lockMass<-lockMass[3:length(lockMass)] } + + ## Remove the last lock mass if it is too close by the end + if ((lockMass[length(lockMass)] + 2) > length(ob@scanindex)) + lockMass <- lockMass[1:(length(lockMass) - 1)] + + ## If the number of lockMass values is not even splitting them into a + ## two-column matrix is not OK (causes also the first lockMass spectrum to + ## be overwritten twice. That's to get rid of the warning in issue #173. + if (length(lockMass) %% 2) + lockMass <- c(lockMass, -99) lockMass<-matrix(lockMass, ncol=2, byrow=TRUE) - if((lockMass[nrow(lockMass),2]+2) > length(ob@scanindex)){ - lockMass<-lockMass[1:(nrow(lockMass)-1),] - } + ## if((lockMass[nrow(lockMass),2]+2) > length(ob@scanindex)){ + ## lockMass<-lockMass[1:(nrow(lockMass)-1),] + ## } + ## We're looping from 1 to length - 1, thus we have to fill in the last + ## scan later. for(i in 1:(length(ob@scanindex)-1)){ - if(any(i == lockMass[,1])){ + if(any(i == lockMass[, 1])){ + ## Place mz and intensity values from the previous scan into the + ## array and fill the rest with NA. arr[1,,i] <-c(object@env$mz[(object@scanindex[(i-1)]+1):object@scanindex[i]], rep(NA, (max(diff(object@scanindex))- length((object@scanindex[(i-1)]+1):object@scanindex[i])) )) @@ -2865,7 +2339,8 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { rep(NA, (max(diff(object@scanindex)) - length((object@scanindex[(i-1)]+1):object@scanindex[i])) )) - } else if(any(i == lockMass[,2])){ + } else if(any(i == lockMass[, 2])){ + ## Place mz and intensity values from the next scan into the array. arr[1,,i] <-c(object@env$mz[(object@scanindex[i+1]+1):object@scanindex[(i+2)]], rep(NA, (max(diff(object@scanindex)) - length((object@scanindex[i+1]+1):object@scanindex[(i+2)])) )) @@ -2875,6 +2350,7 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { length((object@scanindex[i+1]+1):object@scanindex[(i+2)])) )) } else{ + ## Just fill with the actual values. arr[1,,i] <-c(object@env$mz[(object@scanindex[i]+1):object@scanindex[i+1]], rep(NA, (max(diff(object@scanindex))- length((object@scanindex[i]+1):object@scanindex[i+1])) )) @@ -2896,6 +2372,12 @@ setMethod("stitch.xml", "xcmsRaw", function(object, lockMass) { ob@scanindex[i]<-as.integer(length(na.omit(arr[1,,(i-1)]))+ob@scanindex[(i-1)]) } } + ## Fix for #173: fill also values for the last scan. + last_i <- length(ob@scanindex) + fetch_idx <- (object@scanindex[last_i] + 1):length(object@env$mz) + put_idx <- 1:length(fetch_idx) + arr[1, put_idx, length(ob@scanindex)] <- object@env$mz[fetch_idx] + arr[2, put_idx, length(ob@scanindex)] <- object@env$intensity[fetch_idx] NAidx<-is.na(arr[1,,]) ob@env$mz<-as.numeric(arr[1,,][!NAidx]) @@ -3133,7 +2615,7 @@ setMethod("[", signature(x = "xcmsRaw", ##' @title The profile matrix ##' -##' @aliases profile-matrix profMat +##' @aliases profile-matrix profMat profMat,xcmsRaw-method ##' ##' @description The \emph{profile} matrix is an n x m matrix, n (rows) ##' representing equally spaced m/z values (bins) and m (columns) the @@ -3197,11 +2679,14 @@ setMethod("[", signature(x = "xcmsRaw", ##' @seealso \code{\linkS4class{xcmsRaw}}, \code{\link{binYonX}} and ##' \code{\link{imputeLinInterpol}} for the employed binning and ##' missing value imputation methods, respectively. +##' \code{\link{profMat,XCMSnExp-method}} for the method on \code{\link{XCMSnExp}} +##' objects. ##' ##' @return \code{profMat} returns the profile matrix (rows representing scans, ##' columns equally spaced m/z values). ##' ##' @author Johannes Rainer +##' ##' @examples ##' file <- system.file('cdf/KO/ko15.CDF', package = "faahKO") ##' ## Load the data without generating the profile matrix (profstep = 0) @@ -3217,6 +2702,9 @@ setMethod("[", signature(x = "xcmsRaw", ##' profMethod(xraw) <- "binlin" ##' profmat_2 <- profMat(xraw, step = 0.3) ##' all.equal(profmat, profmat_2) +##' +##' @rdname profMat-xcmsSet +##' @name profMat-xcmsSet setMethod("profMat", signature(object = "xcmsRaw"), function(object, method, step, baselevel, diff --git a/R/methods-xcmsSet.R b/R/methods-xcmsSet.R index 073bb7e25..5082d47a4 100644 --- a/R/methods-xcmsSet.R +++ b/R/methods-xcmsSet.R @@ -32,7 +32,7 @@ setMethod("show", "xcmsSet", function(object) { scanrange(object)[2], "\n") } } - errs <- .getProcessErrors(object, PROCSTEP = .PROCSTEP.FEATURE.DETECTION) + errs <- .getProcessErrors(object, PROCSTEP = .PROCSTEP.PEAK.DETECTION) if (length(errs) > 0) { cat(" o Detection errors: ", length(errs), " files failed.\n", " Use method 'showError' to list the error(s).\n\n", sep ="") @@ -340,98 +340,20 @@ setMethod("groupnames", "xcmsSet", function(object, mzdec = 0, rtdec = 0, ############################################################ ## group.density -setMethod("group.density", "xcmsSet", function(object, bw = 30, minfrac = 0.5, minsamp = 1, - mzwid = 0.25, max = 50, sleep = 0) { - - samples <- sampnames(object) - classlabel <- sampclass(object) - classnames <- as.character(unique(sampclass(object))) - classlabel <- as.vector(unclass(classlabel)) - classnum <- table(classlabel) - - peakmat <- peaks(object) - porder <- order(peakmat[,"mz"]) - peakmat <- peakmat[porder,, drop=FALSE] - rownames(peakmat) <- NULL - retrange <- range(peakmat[,"rt"]) - - mass <- seq(peakmat[1,"mz"], peakmat[nrow(peakmat),"mz"] + mzwid, by = mzwid/2) - masspos <- findEqualGreaterM(peakmat[,"mz"], mass) - - groupmat <- matrix(nrow = 512, ncol = 7 + length(classnum)) - groupindex <- vector("list", 512) - - endidx <- 0 - num <- 0 - gcount <- integer(length(classnum)) - for (i in seq(length = length(mass)-2)) { - if (i %% 500 == 0) { - cat(round(mass[i]), "") - flush.console() - } - startidx <- masspos[i] - endidx <- masspos[i+2]-1 - if (endidx - startidx < 0) - next - speakmat <- peakmat[startidx:endidx,,drop=FALSE] - den <- density(speakmat[,"rt"], bw, from = retrange[1]-3*bw, to = retrange[2]+3*bw) - maxden <- max(den$y) - deny <- den$y - gmat <- matrix(nrow = 5, ncol = 2+length(classnum)) - snum <- 0 - while (deny[maxy <- which.max(deny)] > maxden/20 && snum < max) { - grange <- descendMin(deny, maxy) - deny[grange[1]:grange[2]] <- 0 - gidx <- which(speakmat[,"rt"] >= den$x[grange[1]] & speakmat[,"rt"] <= den$x[grange[2]]) - gnum <- classlabel[unique(speakmat[gidx,"sample"])] - for (j in seq(along = gcount)) - gcount[j] <- sum(gnum == j) - if (! any(gcount >= classnum*minfrac & gcount >= minsamp)) - next - snum <- snum + 1 - num <- num + 1 -### Double the size of the output containers if they're full - if (num > nrow(groupmat)) { - groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat))) - groupindex <- c(groupindex, vector("list", length(groupindex))) - } - groupmat[num, 1] <- median(speakmat[gidx, "mz"]) - groupmat[num, 2:3] <- range(speakmat[gidx, "mz"]) - groupmat[num, 4] <- median(speakmat[gidx, "rt"]) - groupmat[num, 5:6] <- range(speakmat[gidx, "rt"]) - groupmat[num, 7] <- length(gidx) - groupmat[num, 7+seq(along = gcount)] <- gcount - groupindex[[num]] <- sort(porder[(startidx:endidx)[gidx]]) - } - if (sleep > 0) { - plot(den, main = paste(round(min(speakmat[,"mz"]), 2), "-", round(max(speakmat[,"mz"]), 2))) - for (i in seq(along = classnum)) { - idx <- classlabel[speakmat[,"sample"]] == i - points(speakmat[idx,"rt"], speakmat[idx,"into"]/max(speakmat[,"into"])*maxden, col = i, pch=20) - } - for (i in seq(length = snum)) - abline(v = groupmat[num-snum+i, 5:6], lty = "dashed", col = i) - Sys.sleep(sleep) - } - } - cat("\n") - - colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", - "npeaks", classnames) - - groupmat <- groupmat[seq(length = num),,drop=FALSE] - groupindex <- groupindex[seq(length = num)] - - ## Remove groups that overlap with more "well-behaved" groups - numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE]) - uorder <- order(-numsamp, groupmat[,"npeaks"]) - - uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE], - uorder) - - groups(object) <- groupmat[uindex,,drop=FALSE] - groupidx(object) <- groupindex[uindex] - +setMethod("group.density", "xcmsSet", function(object, bw = 30, minfrac = 0.5, + minsamp = 1, mzwid = 0.25, + max = 50, sleep = 0) { + ## Using now the do_groupChromPeaks_density function: + res <- do_groupChromPeaks_density(peaks(object), + sampleGroups = sampclass(object), + bw = bw, + minFraction = minfrac, + minSamples = minsamp, + binSize = mzwid, + maxFeatures = max) + + groups(object) <- res$featureDefinitions + groupidx(object) <- res$peakIndex object }) @@ -441,172 +363,32 @@ setMethod("group.mzClust", "xcmsSet", function(object, mzppm = 20, mzabs = 0, minsamp = 1, - minfrac=0.5) - { - samples <- sampnames(object) - classlabel <- sampclass(object) - peaks <- peaks(object) - groups <- mzClustGeneric(peaks[,c("mz","sample")], - sampclass=classlabel, - mzppm=mzppm,mzabs=mzabs, - minsamp=minsamp, - minfrac=minfrac) - - if(is.null(nrow(groups$mat))) { - matColNames <- names(groups$mat) - groups$mat <- matrix(groups$mat, - ncol=length(groups$mat),byrow=F); - colnames(groups$mat) <- matColNames - } - - rt <- c(rep(-1,nrow(groups$mat))) - - groups(object) <- cbind(groups$mat[,(1:3),drop=FALSE],rt,rt,rt,groups$mat[,(4:ncol(groups$mat)),drop=FALSE]) - colnames(groups(object)) <- c(colnames(groups$mat[,(1:3),drop=FALSE]), "rtmed", "rtmin", "rtmax", colnames(groups$mat[,(4:ncol(groups$mat)),drop=FALSE])) - groupidx(object) <- groups$idx - - object - }) + minfrac=0.5) { + + res <- do_groupPeaks_mzClust(peaks = peaks(object), + sampleGroups = sampclass(object), + ppm = mzppm, + absMz = mzabs, + minFraction = minfrac, + minSamples = minsamp) + groups(object) <- res$featureDefinitions + groupidx(object) <- res$peakIndex + object +}) ############################################################ ## group.nearest setMethod("group.nearest", "xcmsSet", function(object, mzVsRTbalance=10, mzCheck=0.2, rtCheck=15, kNN=10) { - ## If ANN is available ... - ##RANN = "RANN" - ##if (!require(RANN)) { - ## stop("RANN is not installed") - ##} - - ## classlabel <- sampclass(object) - classlabel <- as.vector(unclass(sampclass(object))) - - samples <- sampnames(object) - gcount <- integer(length(unique(sampclass(object)))) - - peakmat <- peaks(object) - parameters <- list(mzVsRTBalance = mzVsRTbalance, mzcheck = mzCheck, rtcheck = rtCheck, knn = kNN) - - ptable <- table(peaks(object)[,"sample"]) - pord <- ptable[order(ptable, decreasing = TRUE)] - sid <- as.numeric(names(pord)) - pn <- as.numeric(pord) - - samples <- sampnames(object) - cat("sample:", basename(samples[sid[1]]), " ") - - mplenv <- new.env(parent = .GlobalEnv) - mplenv$mplist <- matrix(0, pn[1], length(sid)) - mplenv$mplist[, sid[1]] <- which(peakmat[,"sample"] == sid[1]) - mplenv$mplistmean <- data.frame(peakmat[which(peakmat[,"sample"] == sid[1]),c("mz","rt")]) - mplenv$peakmat <- peakmat - assign("peakmat", peakmat, envir = mplenv) - - sapply(sid[2:length(sid)], function(sample, mplenv, object){ - ## require(parallel) - ## cl <- makeCluster(getOption("cl.cores", nSlaves)) - ## clusterEvalQ(cl, library(RANN)) - ## parSapply(cl, 2:length(samples), function(sample,mplenv, object){ - for(mml in seq(mplenv$mplist[,1])){ - mplenv$mplistmean[mml,"mz"] <- mean(mplenv$peakmat[mplenv$mplist[mml,],"mz"]) - mplenv$mplistmean[mml,"rt"] <- mean(mplenv$peakmat[mplenv$mplist[mml,],"rt"]) - } - - cat("sample:",basename(samples[sample])," ") - mplenv$peakIdxList <- data.frame(peakidx=which(mplenv$peakmat[,"sample"]==sample), - isJoinedPeak=FALSE) - if(length(mplenv$peakIdxList$peakidx)==0){ - cat("Warning: No peaks in sample\n") - } - ## scoreList <- data.frame(score=numeric(0),peak=integer(0),mpListRow=integer(0), - ## isJoinedPeak=logical(0), isJoinedRow=logical(0)) - ## - ## for(currPeak in mplenv$peakIdxList$peakidx){ - ## pvrScore <- patternVsRowScore(currPeak,parameters,mplenv) ## does the actual NN - ## scoreList <- rbind(scoreList,pvrScore) - ## } - ## this really doesn't take a long time not worth parallel version here. - ## but make an apply loop now faster even with rearranging the data :D : PB - scoreList <- sapply(mplenv$peakIdxList$peakidx, function(currPeak, para, mplenv){ - patternVsRowScore(currPeak,para,mplenv) - }, parameters, mplenv) - if(is.list(scoreList)){ - idx<-which(scoreList != "NULL") - scoreList<-matrix(unlist(scoreList[idx]), ncol=5, nrow=length(idx), byrow=T) - colnames(scoreList)<-c("score", "peak", "mpListRow", "isJoinedPeak", "isJoinedRow") - } else { - scoreList <- data.frame(score=unlist(scoreList["score",]), peak=unlist(scoreList["peak",]), mpListRow= - unlist(scoreList["mpListRow",]), isJoinedPeak=unlist(scoreList["isJoinedPeak",]), - isJoinedRow=unlist(scoreList["isJoinedRow",])) - } - - ## Browse scores in order of descending goodness-of-fit - scoreListcurr <- scoreList[order(scoreList[,"score"]),] - if (nrow(scoreListcurr) > 0) - for (scoreIter in 1:nrow(scoreListcurr)) { - - iterPeak <-scoreListcurr[scoreIter, "peak"] - iterRow <- scoreListcurr[scoreIter, "mpListRow"] - - ## Check if master list row is already assigned with peak - if (scoreListcurr[scoreIter, "isJoinedRow"]==TRUE) { - next - } - - ## Check if peak is already assigned to some master list row - if (scoreListcurr[scoreIter, "isJoinedPeak"]==TRUE) { next } - - ## Check if score good enough - ## Assign peak to master peak list row - mplenv$mplist[iterRow,sample] <- iterPeak - - ## Mark peak as joined - setTrue <- which(scoreListcurr[,"mpListRow"] == iterRow) - scoreListcurr[setTrue,"isJoinedRow"] <- TRUE - setTrue <- which(scoreListcurr[,"peak"] == iterPeak) - scoreListcurr[setTrue, "isJoinedPeak"] <- TRUE - mplenv$peakIdxList[which(mplenv$peakIdxList$peakidx==iterPeak),]$isJoinedPeak <- TRUE - } - - notJoinedPeaks <- mplenv$peakIdxList[which(mplenv$peakIdxList$isJoinedPeak==FALSE),]$peakidx - for(notJoinedPeak in notJoinedPeaks) { - mplenv$mplist <- rbind(mplenv$mplist,matrix(0,1,dim(mplenv$mplist)[2])) - mplenv$mplist[length(mplenv$mplist[,1]),sample] <- notJoinedPeak - } - - ## Clear "Joined" information from all master peaklist rows - rm(list = "peakIdxList", envir=mplenv) - - ## updateProgressInfo - object@progressInfo$group.nearest <- (sample - 1) / (length(samples) - 1) - progressInfoUpdate(object) - }, mplenv, object) - ## stopCluster(cl) - gc() - - groupmat <- matrix(0,nrow(mplenv$mplist), 7+length(levels(sampclass(object)))) - colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", - "npeaks", levels(sampclass(object))) - groupindex <- vector("list", nrow(mplenv$mplist)) - for (i in 1:nrow(mplenv$mplist)) { - groupmat[i, "mzmed"] <- median(peakmat[mplenv$mplist[i,],"mz"]) - groupmat[i, c("mzmin", "mzmax")] <- range(peakmat[mplenv$mplist[i,],"mz"]) - groupmat[i, "rtmed"] <- median(peakmat[mplenv$mplist[i,],"rt"]) - groupmat[i, c("rtmin", "rtmax")] <- range(peakmat[mplenv$mplist[i,],"rt"]) - - groupmat[i, "npeaks"] <- length(which(peakmat[mplenv$mplist[i,]]>0)) - - gnum <- classlabel[unique(peakmat[mplenv$mplist[i,],"sample"])] - for (j in seq(along = gcount)) - gcount[j] <- sum(gnum == j) - groupmat[i, 7+seq(along = gcount)] <- gcount - - groupindex[[i]] <- mplenv$mplist[i, (which(mplenv$mplist[i,]>0))] - } - - groups(object) <- groupmat - groupidx(object) <- groupindex + res <- do_groupChromPeaks_nearest(peaks = peaks(object), + sampleGroups = sampclass(object), + mzVsRtBalance = mzVsRTbalance, + absMz = mzCheck, + absRt = rtCheck, + kNN = kNN) + groups(object) <- res$featureDefinitions + groupidx(object) <- res$peakIndex invisible(object) }) @@ -671,7 +453,13 @@ setMethod("retcor", "xcmsSet", function(object, method=getOption("BioC")$xcms$re ...) { ## Backward compatibility for old "methods" if (method == "linear" || method == "loess") { - return(invisible(do.call(retcor.peakgroups, alist(object, smooth=method, ...)))) + args <- list(...) + if (any(names(args) == "smooth")) + warning("Provided argument 'smooth' will be replaced with the ", + "value of 'method', i.e. with ", method) + args$smooth <- method + ## Overwriting eventually provided smooth parameter. + return(invisible(do.call(retcor.peakgroups, c(list(object), args)))) } method <- match.arg(method, getOption("BioC")$xcms$retcor.methods) @@ -682,14 +470,16 @@ setMethod("retcor", "xcmsSet", function(object, method=getOption("BioC")$xcms$re invisible(do.call(method, alist(object, ...))) }) + ############################################################ ## retcor.peakgroups setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = 1, - smooth = c("loess", "linear"), span = .2, + smooth = c("loess", "linear"), + span = .2, family = c("gaussian", "symmetric"), plottype = c("none", "deviation", "mdevden"), col = NULL, ty = NULL) { - + peakmat <- peaks(object) groupmat <- groups(object) if (length(groupmat) == 0) @@ -713,14 +503,141 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = object@rt <- list(raw = rtcor, corrected = rtcor) } + minFr <- (n - missing) / n + res <- do_adjustRtime_peakGroups(peaks = peakmat, + peakIndex = object@groupidx, + rtime = rtcor, + minFraction = minFr, + extraPeaks = extra, + smooth = smooth, + span = span, + family = family) + rtdevsmo <- vector("list", n) + for (i in 1:n) { + rtdevsmo[[i]] <- rtcor[[i]] - res[[i]] + } + ## rtdevsmo <- mapply(FUN = function(a, b) { + ## return(a - b) + ## }, rtcor, res) + + if (plottype == "mdevden") { + split.screen(matrix(c(0, 1, .3, 1, 0, 1, 0, .3), ncol = 4, byrow = TRUE)) + screen(1) + par(mar = c(0, 4.1, 4.1, 2), xaxt = "n") + } + + if (plottype %in% c("deviation", "mdevden")) { + ## Need also the 'rt' matrix: + rt <- .getPeakGroupsRtMatrix(peakmat, object@groupidx, n, + missing, extra) + rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) + + ## define the colors and line types and returns a list of + ## mypal, col and ty. Uses the original code if no colors are + ## submitted. Supports manually selected colors (e.g. in hex) + vals <- defineColAndTy(col, ty, classlabel) + col <- vals$col + mypal <- vals$mypal + ty <- vals$ty + + rtrange <- range(do.call(c, rtcor)) + devrange <- range(do.call(c, rtdevsmo)) + + plot(0, 0, type="n", xlim = rtrange, ylim = devrange, + main = "Retention Time Deviation vs. Retention Time", + xlab = "Retention Time", ylab = "Retention Time Deviation") + legend(rtrange[2], devrange[2], samples, col = mypal[col], lty = ty, + pch = ceiling(1:n/length(mypal)), xjust = 1) + + for (i in 1:n) { + points(data.frame(rt = rt[,i], rtdev = rtdev[,i]), + col = mypal[col[i]], pch = ty[i], type = "p") + points(rtcor[[i]], rtdevsmo[[i]], type="l", col = mypal[col[i]], + lty = ty[i]) + } + } + + if (plottype == "mdevden") { + + screen(2) + par(mar = c(5.1, 4.1, 0, 2), yaxt = "n") + allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, + from = rtrange[1], to = rtrange[2])[c("x","y")] + corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], + to = rtrange[2], na.rm = TRUE)[c("x","y")] + allden$y <- allden$y / sum(allden$y) + corden$y <- corden$y / sum(corden$y) + maxden <- max(allden$y, corden$y) + plot(c(0,0), xlim = rtrange, ylim = c(0, maxden), type = "n", + main = "", xlab = "Retention Time", ylab = "Peak Density") + points(allden, type = "l", col = 1) + points(corden, type = "l", col = 2) + abline(h = 0, col = "grey") + legend(rtrange[2], maxden, c("All", "Correction"), col = 1:2, + lty = c(1,1), xjust = 1) + close.screen(all.screens = TRUE) + } + + for (i in 1:n) { + cfun <- stepfun(rtcor[[i]][-1] - diff(rtcor[[i]]) / 2, + rtcor[[i]] - rtdevsmo[[i]]) + rtcor[[i]] <- rtcor[[i]] - rtdevsmo[[i]] + + sidx <- which(corpeaks[,"sample"] == i) + corpeaks[sidx, c("rt", "rtmin", "rtmax")] <- + cfun(corpeaks[sidx, c("rt", "rtmin", "rtmax")]) + } + + object@rt$corrected <- rtcor + peaks(object) <- corpeaks + groups(object) <- matrix(nrow = 0, ncol = 0) + groupidx(object) <- list() + invisible(object) +}) +## The original code! +.retcor.peakgroups_orig <- function(object, missing = 1, extra = 1, + smooth = c("loess", "linear"), span = .2, + family = c("gaussian", "symmetric"), + plottype = c("none", "deviation", "mdevden"), + col = NULL, ty = NULL) { + + peakmat <- peaks(object) + groupmat <- groups(object) + if (length(groupmat) == 0) + stop("No group information found") + samples <- sampnames(object) + classlabel <- as.vector(unclass(sampclass(object))) + n <- length(samples) + corpeaks <- peakmat + smooth <- match.arg(smooth) + plottype <- match.arg(plottype) + family <- match.arg(family) + if (length(object@rt) == 2) + rtcor <- object@rt$corrected + else { + fnames <- filepaths(object) + rtcor <- vector("list", length(fnames)) + for (i in seq(along = fnames)) { + xraw <- xcmsRaw(fnames[i]) + rtcor[[i]] <- xraw@scantime + } + object@rt <- list(raw = rtcor, corrected = rtcor) + } + nsamp <- rowSums(groupmat[,match("npeaks", colnames(groupmat))+unique(classlabel),drop=FALSE]) idx <- which(nsamp >= n-missing & groupmat[,"npeaks"] <= nsamp + extra) if (length(idx) == 0) stop("No peak groups found for retention time correction") - idx <- idx[order(groupmat[idx,"rtmed"])] + ## Ordering the peaks by the rtmed might not represent the ordering + ## of the below selected "representative" peak for each peak. + ## See issue #110 + ## idx <- idx[order(groupmat[idx,"rtmed"])] rt <- groupval(object, "maxint", "rt")[idx,, drop=FALSE] + ## And now order them by median retention time: issue #110 + rt <- rt[order(rowMedians(rt, na.rm = TRUE)), , drop = FALSE] + cat("Retention Time Correction Groups:", nrow(rt), "\n") rtdev <- rt - apply(rt, 1, median, na.rm = TRUE) @@ -804,8 +721,11 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = rtrange <- range(do.call(c, rtcor)) devrange <- range(do.call(c, rtdevsmo)) - plot(0, 0, type="n", xlim = rtrange, ylim = devrange, main = "Retention Time Deviation vs. Retention Time", xlab = "Retention Time", ylab = "Retention Time Deviation") - legend(rtrange[2], devrange[2], samples, col = mypal[col], lty = ty, pch = ceiling(1:n/length(mypal)), xjust = 1) + plot(0, 0, type="n", xlim = rtrange, ylim = devrange, + main = "Retention Time Deviation vs. Retention Time", + xlab = "Retention Time", ylab = "Retention Time Deviation") + legend(rtrange[2], devrange[2], samples, col = mypal[col], lty = ty, + pch = ceiling(1:n/length(mypal)), xjust = 1) for (i in 1:n) { points(data.frame(rt = rt[,i], rtdev = rtdev[,i]), col = mypal[col[i]], pch = ty[i], type="p") @@ -817,8 +737,10 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = screen(2) par(mar = c(5.1, 4.1, 0, 2), yaxt = "n") - allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")] - corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2], na.rm = TRUE)[c("x","y")] + allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, + from = rtrange[1], to = rtrange[2])[c("x","y")] + corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], + to = rtrange[2], na.rm = TRUE)[c("x","y")] allden$y <- allden$y / sum(allden$y) corden$y <- corden$y / sum(corden$y) maxden <- max(allden$y, corden$y) @@ -844,7 +766,8 @@ setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = groups(object) <- matrix(nrow = 0, ncol = 0) groupidx(object) <- list() invisible(object) -}) +} + ############################################################ ## retcor.obiwarp @@ -908,6 +831,12 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de ## added t automatically find the correct scan range from the xcmsSet object if(length(obj1@scantime) != length(object@rt$raw[[center]])){ + ## This is in case the xcmsSet was read using a scanrange, i.e. if + ## the data was read in with defining a scan range, then we would have a + ## mismatch here. This code essentially ensures that the retention time + ## of the raw object would match the retention time present in the xcmsSet. + ## This was before the days in which @scanrange was added as a slot to + ## xcmsSet. ##figure out the scan time range scantime.start <-object@rt$raw[[center]][1] scantime.end <-object@rt$raw[[center]][length(object@rt$raw[[center]])] @@ -946,6 +875,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de scantime1 <- obj1@scantime scantime2 <- obj2@scantime + ## median difference between spectras' scan times. mstdiff <- median(c(diff(scantime1), diff(scantime2))) rtup1 <- c(1:length(scantime1)) @@ -966,8 +896,11 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de scantime1 <- scantime1[rtup1] scantime2 <- scantime2[rtup2] + ## Drift of measured scan times - expected to be largest at the end. rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)], scantime2[length(scantime2)]))) + ## If the drift is larger than the threshold, cut the matrix up to the + ## max allowed difference. if(rtmaxdiff>(5*mstdiff)){ rtmax <- min(scantime1[length(scantime1)], scantime2[length(scantime2)]) @@ -980,6 +913,7 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de valscantime1 <- length(scantime1) valscantime2 <- length(scantime2) + ## Restrict the profile matrix to columns 1:valscantime if(length(obj1@scantime)>valscantime1) { obj1@env$profile <- obj1@env$profile[,-c((valscantime1+1):length(obj1@scantime))] } @@ -987,11 +921,14 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de obj2@env$profile <- obj2@env$profile[,-c((valscantime2+1):length(obj2@scantime))] } + ## Now ensure that the nrow of the profile matrix matches. + ## Add empty rows at the beginning if(mzmin < obj1@mzrange[1]) { seqlen <- length(seq(mzmin, obj1@mzrange[1], profStep))-1 x <- matrix(0, seqlen,dim(obj1@env$profile)[2]) obj1@env$profile <- rbind(x, obj1@env$profile) } + ## Add emtpy rows at the end. if(mzmax > obj1@mzrange[2]){ seqlen <- length(seq(obj1@mzrange[2], mzmax, profStep))-1 x <- matrix(0, seqlen, dim(obj1@env$profile)[2]) @@ -1008,12 +945,14 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de obj2@env$profile <- rbind(obj2@env$profile, x) } + ## OK, now that the matrices are "aligned" extract the intensities intensity1 <- obj1@env$profile intensity2 <- obj2@env$profile if ((mzval * valscantime1 != length(intensity1)) || (mzval * valscantime2 != length(intensity2))) stop("Dimensions of profile matrices do not match !\n") + ## Would it be possible to supply non-binned data too??? rtimecor[[s]] <-.Call("R_set_from_xcms", valscantime1,scantime1,mzval,mz,intensity1, valscantime2,scantime2,mzval,mz,intensity2, @@ -1022,6 +961,8 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de factorDiag, factorGap, localAlignment, initPenalty) + ## Hm, silently add the raw retention times if we cut the retention time + ## vector above - would merit at least a warning I believe. if(length(obj2@scantime) > valscantime2) { object@rt$corrected[[s]] <- c(rtimecor[[s]], obj2@scantime[(max(rtup2)+1):length(obj2@scantime)]) @@ -1029,6 +970,10 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de object@rt$corrected[[s]] <- rtimecor[[s]] } + ## Why are we rounding here, but NOT in the retcor.peakgroups? + ## -> issue #122 + ## The point is we're using the un-rounded adjusted rt for the rt, BUT + ## use the rounded values for the adjustment of the peak rts. rtdevsmo[[s]] <- round(rtcor[[s]]-object@rt$corrected[[s]],2) rm(obj2) @@ -1041,6 +986,8 @@ setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "de } cat("\n") + ## Why are we rounding here, but NOT in the retcor.peakgroups? + ## -> issue #122 rtdevsmo[[center]] <- round(rtcor[[center]] - object@rt$corrected[[center]], 2) if (plottype == "deviation") { @@ -1130,7 +1077,8 @@ setMethod("plotrt", "xcmsSet", function(object, col = NULL, ty = NULL, leg = TRU screen(2) par(mar = c(5.1, 4.1, 0, 2), yaxt = "n") - allden <- density(object@peaks[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")] + allden <- density(object@peaks[,"rt"], bw = diff(rtrange)/200, + from = rtrange[1], to = rtrange[2])[c("x","y")] plot(allden, xlim = rtrange, type = "l", main = "", xlab = "Retention Time", ylab = "Peak Density") abline(h = 0, col = "grey") close.screen(all.screens = TRUE) @@ -1140,7 +1088,7 @@ setMethod("plotrt", "xcmsSet", function(object, col = NULL, ty = NULL, leg = TRU ############################################################ ## fillPeaks.chrom ## New version using BiocParallel instead of nSlaves and manual setup. -setMethod("fillPeaks.chrom", "xcmsSet", function(object, nSlaves = NULL, +setMethod("fillPeaks.chrom", "xcmsSet", function(object, nSlaves = 0, expand.mz = 1,expand.rt = 1, BPPARAM = bpparam()) { ## development mockup: @@ -1153,10 +1101,13 @@ setMethod("fillPeaks.chrom", "xcmsSet", function(object, nSlaves = NULL, attach(pkgEnv) } - if (!is.null(nSlaves)) - warning("Use of argument 'nSlaves' is deprecated!", - " Please use 'BPPARAM' instead.") - + if (!is.null(nSlaves)) { + if (nSlaves > 0) { + message("Use of argument 'nSlaves' is deprecated,", + " please use 'BPPARAM' instead.") + options(mc.cores = nSlaves) + } + } peakmat <- peaks(object) groupmat <- groups(object) if (length(groupmat) == 0) @@ -1312,16 +1263,30 @@ setMethod("fillPeaks.MSW", "xcmsSet", function(object, mrange=c(0,0), sample=NUL for (g in ngs) { nppos <- nppos+1 - mzpos <- which(abs(lcraw@env$mz - groupmat[g,"mzmed"]) == min(abs(lcraw@env$mz - groupmat[g,"mzmed"]))) - mmzpos <- mzpos[which(lcraw@env$intensity[mzpos] == max(lcraw@env$intensity[mzpos]))] + ## Get the index of the mz value(s) closest to the mzmed of the + ## group; could eventually be more than one. + mzpos <- which(abs(lcraw@env$mz - groupmat[g,"mzmed"]) == + min(abs(lcraw@env$mz - groupmat[g,"mzmed"]))) + ## Get the index of the maximum intensity for the above mz index. + mmzpos <- mzpos[which(lcraw@env$intensity[mzpos] == + max(lcraw@env$intensity[mzpos]))] + ## Eventually increase the range around the mzmed. mmzr <- seq((mmzpos-mrange[1]),(mmzpos+mrange[2])) + ## maxo is the maximum signal for mz values that are closest to + ## the mzmed of the feature. maxo <- max(lcraw@env$intensity[mmzr]) ## this is the new one, summing the scale-range - ## calculating scale, adding intensitiesin this scale + ## calculating scale, adding intensities in this scale medMZmin <- median(peakmat[groupindex[[g]],"mzmin"]) medMZmax <- median(peakmat[groupindex[[g]],"mzmax"]) - minMzpos <- min(which(abs(lcraw@env$mz - medMZmin) == min(abs(lcraw@env$mz - medMZmin)))) - maxMzpos <- max(which(abs(lcraw@env$mz - medMZmax) == min(abs(lcraw@env$mz - medMZmax)))) + ## mz values to be considered: from median mzmin of all peaks in + ## the current peak group/feature to median mzmax. + ## Might eventually be easier to just check for $mz >= medMZmin + minMzpos <- min(which(abs(lcraw@env$mz - medMZmin) == + min(abs(lcraw@env$mz - medMZmin)))) + maxMzpos <- max(which(abs(lcraw@env$mz - medMZmax) == + min(abs(lcraw@env$mz - medMZmax)))) + ## into: the sum of intensities in this range. into = sum(lcraw@env$intensity[minMzpos:maxMzpos]) newpeaks[nppos,] <- c(groupmat[g,"mzmed"],medMZmin,medMZmax,-1,-1,-1,into,maxo,i) } @@ -2101,7 +2066,7 @@ setMethod("specDist", signature(object="xcmsSet"), ##' @title Extract processing errors ##' @aliases showError ##' -##' @description If feature detection is performed with \code{\link{findPeaks}} +##' @description If peak detection is performed with \code{\link{findPeaks}} ##' setting argument \code{stopOnError = FALSE} eventual errors during the ##' process do not cause to stop the processing but are recorded inside of the ##' resulting \code{\linkS4class{xcmsSet}} object. These errors can be accessed diff --git a/R/xcmsSource.R b/R/xcmsSource.R index 232a72215..71d00c058 100644 --- a/R/xcmsSource.R +++ b/R/xcmsSource.R @@ -2,25 +2,15 @@ #' @include AllGenerics.R DataClasses.R setMethod("xcmsSource", "character", function(object) { - ## if (useOriginalCode()) { - ## if (! file.exists(object)) { - ## stop("xcmsSource: file not found: ", object) - ## } else if (mzR:::netCDFIsFile(object)) { - ## new("netCdfSource", object) - ## } else if (mzR:::rampIsFile(object)) { - ## new("rampSource", object) - ## } else { - ## stop("xcmsSource: Could not determine file type for: ", object) - ## } - ## } else { if (!file.exists(object)) { stop("xcmsSource: file not found: ", object) } else if (isCdfFile(object)) { new("netCdfSource", object) } else if (isMzMLFile(object)) { + new("pwizSource", object) + } else if (isRampFile(object)) { new("rampSource", object) } else { stop("xcmsSource: Could not determine file type for: ", object) } - ## } }) diff --git a/inst/NEWS b/inst/NEWS index acc81dbe6..0380de3a0 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,103 +1,233 @@ +CHANGES IN VERSION 2.99.1 +------------------------- + +NEW FEATURES: +- extractMsData to extract raw MS data as a data.frame (issue #120). + +BUG FIXES: +- issue #175: an error is now thrown if no peak group was identified for peak + group retention time correction. +- issue #178: scanrange was collapsed when the adjusted range was reported + (pull request by Jan Stanstrup). +- issue #180: error when both parameters method and smooth are provided in the + retcor method. + + +CHANGES IN VERSION 2.99.0 +------------------------- + +NEW FEATURES: +- plotChromatogram and highlightChromPeaks functions. +- plotChromPeakDensity function. +- clean method for Chromatogram classes. + +USER VISIBLE CHANGES: +- Change default for ppm parameter in chromPeaks method to 0. +- extractChromatograms supports extraction of multiple rt and mz ranges. +- New parameter missing for extractChromatograms allowing to specify the + intensity value to be used for rts for which no signal is available within + the mz range. +- extractChromatograms returns Chromatograms of length equal to the number of + scans within the specified rt range, even if no signals are measured + (intensity values are NA). + + +CHANGES IN VERSION 1.53.1 +-------------------------- + +BUG FIXES: +- Increase parameter n for the density call in the peak density correspondence + method. This enables to separate neighboring peaks using small n (issue #161). + Thanks to Jan Stanstrup. + + +CHANGES IN VERSION 1.51.11 +-------------------------- + +NEW FEATURES: +- Parameter "filled" for featureValues (issue #157). +- Parameters "rt" and "mz" in chromPeaks method allowing to extract + chromatographic peaks from the specified ranges (issue #156). + +BUG FIXES: +- Fixed possible memory problem in obiwarp (issue #159). +- Update getPeaks to use non-deprecated API (issue #163). + + +CHANGES IN VERSION 1.51.10 +-------------------------- + +NEW FEATURES: +- filterRt for Chromatogram class (issue #142). +- adjustRtimePeakGroups function (issue #147). +- adjustRtime,XCMSnExp,PeakGroupsParam and do_adjustRtime_peakGroups support + use of pre-defined matrix to perform alignment (issue #153). +- plotAdjustedRtime to visualize alignment results (issue #141). + +USER VISIBLE CHANGES: +- featureDefinitions and featureValues return DataFrame and matrix with rownames + corresponding to arbitrary feature IDs (issue #148). +- New peakGroupsMatrix slot for PeakGroupsParam class (issue #153). + +BUG FIXES: +- Issue #146: ensure adjusted retention times returned by the peakGroups method + to be in the same order than the raw retention times. + + +CHANGES IN VERSION 1.51.9 +------------------------- + +NEW FEATURES: +- fillChromPeaks, dropFilledChromPeaks methods and FillChromPeaksParam class. +- featureValues method. + +USER VISIBLE CHANGES: +- Extended new_functionality vignette. +- Change default backend for reading mzML files to pwiz. + +BUG FIXES: +- Issue #135: fix peak signal integration for centWave. +- Issue #139: problem with expand.mz and expand.rt in fillPeaks.chrom. +- Issue #137: Error in findChromPeaks if no peaks are found. + + +CHANGES IN VERSION 1.51.8 +------------------------- + +NEW FEATURES: +- Add Chromatogram class and extractChromatograms method. + +BUG FIXES: +- Issue #118: failing unit test on Windows build machine. +- Issue #133: error with c() and xcmsSet without peaks. +- Issue #134: xcmsSet constructor endless loop. + + +CHANGES IN VERSION 1.51.7 +------------------------- + +USER VISIBLE CHANGES: +- Major renaming of methods and classes to follow the naming convention: + - chromatographic peak (chromPeak): the peaks identified in rt dimension. + - feature: mz-rt feature, being the grouped chromatographic peaks within and + across samples. + +BUG FIXES: +- Issue #127: failing unit test on Windows build machine. + + +CHANGES IN VERSION 1.51.6 +------------------------- + +NEW FEATURES: +- groupFeatures and adjustRtime methods for XCMSnExp objects. +- New Param classes for groupFeatures and adjustRtime analysis methods: FeatureDensityParam, + MzClustParam, NearestFeaturesParam, FeatureGroupsParam and ObiwarpParam. + +BUG FIXES: +- Issue #124 (filterRt,XCMSnExp returned empty object). + + CHANGES IN VERSION 1.51.5 ------------------------- NEW FEATURES: -+ MsFeatureData and XCMSnExp objects. -+ features, features<-, adjustedRtime, adjustedRtime<-, featureGroups, +- MsFeatureData and XCMSnExp objects. +- features, features<-, adjustedRtime, adjustedRtime<-, featureGroups, featureGroups<-, hasAlignedFeatures, hasAdjustedRtime and hasDetectedFeatures methods. -+ dropFeatures, dropFeatureGroups and dropAdjustedRtime methods. -+ filterMz, filterRt, filterFile etc implemented. -+ mz, intensity and rtime methods for XCMSnExp allowing to return values grouped +- dropFeatures, dropFeatureGroups and dropAdjustedRtime methods. +- filterMz, filterRt, filterFile etc implemented. +- mz, intensity and rtime methods for XCMSnExp allowing to return values grouped by sample. BUG FIXES: -+ Issue #99 (rtrange outside of retention time range in getEIC,xcmsSet). -+ Issue #101 (xcmsRaw function returns NULL if mslevel = 1 is specified). -+ Issue #102 (centWave returns empty matrix if scales not OK). Thanks to J. Stanstrup. -+ Issue #91 (warning instead of error if no peaks in ROI). Thanks to J. Stanstrup. +- Issue #99 (rtrange outside of retention time range in getEIC,xcmsSet). +- Issue #101 (xcmsRaw function returns NULL if mslevel = 1 is specified). +- Issue #102 (centWave returns empty matrix if scales not OK). Thanks to J. Stanstrup. +- Issue #91 (warning instead of error if no peaks in ROI). Thanks to J. Stanstrup. CHANGES IN VERSION 1.51.4 ------------------------- BUG FIXES: -+ added deepCopy to avoid corrupting the original object, thanks to J. Stanstrup, closes #93 +- added deepCopy to avoid corrupting the original object, thanks to J. Stanstrup, closes #93 CHANGES IN VERSION 1.51.3 ------------------------- NEW FEATURES: -+ binYonX binning function. -+ imputeLinInterpol function providing linear interpolation of missing values. -+ breaks_on_binSize and breaks_on_nBins functions to calculate breaks defining +- binYonX binning function. +- imputeLinInterpol function providing linear interpolation of missing values. +- breaks_on_binSize and breaks_on_nBins functions to calculate breaks defining bins. -+ New vignette "new_functionality.Rmd" describing new and modified functionality +- New vignette "new_functionality.Rmd" describing new and modified functionality in xcms. -+ Add do_detectFeatures_matchedFilter function. -+ Add do_detectFeatures_centWave function. -+ Add do_detectFeatures_centWaveWithPredIsoROIs function and unit test. -+ Implement a new data import function. -+ Add do_detectFeatures_MSW function and unit test. -+ Argument stopOnError in xcmsSet function that allows to perform feature +- Add do_detectFeatures_matchedFilter function. +- Add do_detectFeatures_centWave function. +- Add do_detectFeatures_centWaveWithPredIsoROIs function and unit test. +- Implement a new data import function. +- Add do_detectFeatures_MSW function and unit test. +- Argument stopOnError in xcmsSet function that allows to perform feature detection on all files without stopping on errors. -+ Method showError for xcmsSet objects that list all errors during feature +- Method showError for xcmsSet objects that list all errors during feature detection (if stopOnError = FALSE in the xcmsSet function). -+ [ method to subset xcmsRaw objects by scans. -+ profMat method to extract/create the profile matrix from/for an xcmsRaw. -+ Add new detectFeatures methods for MSnExp and OnDiskMSnExp objects from the +- [ method to subset xcmsRaw objects by scans. +- profMat method to extract/create the profile matrix from/for an xcmsRaw. +- Add new detectFeatures methods for MSnExp and OnDiskMSnExp objects from the MSnbase package. -+ Add new CentWaveParam, MatchedFilterParam, MassifquantParam, MSWParam and +- Add new CentWaveParam, MatchedFilterParam, MassifquantParam, MSWParam and CentWavePredIsoParam parameter class to perform method dispatch in the detectFeatures method. -+ retcor.obiwarp uses the new binning methods for profile matrix generation. -+ scanrange,xcmsRaw reports always a scanrange of 1 and length(object@scantime). -+ scanrange,xcmsSet reports the scanrange eventually specified by the user in +- retcor.obiwarp uses the new binning methods for profile matrix generation. +- scanrange,xcmsRaw reports always a scanrange of 1 and length(object@scantime). +- scanrange,xcmsSet reports the scanrange eventually specified by the user in the xcmsSet function. -+ Fixed bug in rawMat (issue #58). -+ Fix issue #60: findPeaks.massifquant always returns a xcmsPeaks object. +- Fixed bug in rawMat (issue #58). +- Fix issue #60: findPeaks.massifquant always returns a xcmsPeaks object. CHANGES IN VERSION 1.51.2 ------------------------- USER VISIBLE CHANGES: -+ As suggested by Jan Stanstrup, do not error if a centWave ROI +- As suggested by Jan Stanstrup, do not error if a centWave ROI contains no data, closes #90 CHANGES IN VERSION 1.51.1 ------------------------- BUG FIXES: -+ Fix incorrrect indexing getEIC function reported by Will Edmands, closes #92 +- Fix incorrrect indexing getEIC function reported by Will Edmands, closes #92 CHANGES IN VERSION 1.49.7 ------------------------- BUG FIXES: -+ Fix documentation warnings. +- Fix documentation warnings. CHANGES IN VERSION 1.49.6 ------------------------- USER VISIBLE CHANGES: -+ Peak Picking function findPeaks.centWaveWithPredictedIsotopeROIs() and findPeaks.addPredictedIsotopeFeatures(), +- Peak Picking function findPeaks.centWaveWithPredictedIsotopeROIs() and findPeaks.addPredictedIsotopeFeatures(), which allow more sensitive detection of isotope features. CHANGES IN VERSION 1.49.5 ------------------------- USER VISIBLE CHANGES: -+ Some documentation updates. -+ Preparation for a new binning function +- Some documentation updates. +- Preparation for a new binning function CHANGES IN VERSION 1.49.4 ------------------------- BUG FIXES: -+ Fix getXcmsRaw that would prevent retention time correction to be applied +- Fix getXcmsRaw that would prevent retention time correction to be applied (issue #44 reported by Aleksandr). @@ -105,22 +235,22 @@ CHANGES IN VERSION 1.49.3 ------------------------- NEW FEATURE: -+ updateObject method for xcmsSet. +- updateObject method for xcmsSet. USER VISIBLE CHANGES: -+ xcms uses now BiocParallel for parallel processing. All other parallel +- xcms uses now BiocParallel for parallel processing. All other parallel processing functions have been deprecated. BUG FIXES: -+ Added missing package imports. -+ Fix bug in fillPeaksChromPar referencing a non-existing variables i and +- Added missing package imports. +- Fix bug in fillPeaksChromPar referencing a non-existing variables i and object. -+ Fix bug in group.nearest: variable scoreList was mis-spelled (coreList). -+ Remove all DUP = FALSE from the .C calls as they are ignored anyways. +- Fix bug in group.nearest: variable scoreList was mis-spelled (coreList). +- Remove all DUP = FALSE from the .C calls as they are ignored anyways. OTHER CHANGES -+ Re-organization of class, function and method definitions in R-files. -+ Use roxygen2 to manage the DESCRIPTION's collate field. +- Re-organization of class, function and method definitions in R-files. +- Use roxygen2 to manage the DESCRIPTION's collate field. CHANGES IN VERSION 1.49.2 @@ -128,7 +258,7 @@ CHANGES IN VERSION 1.49.2 NEW FEATURE: -+ Initial support for exporint mzTab format. Since Changes are +- Initial support for exporint mzTab format. Since Changes are still to be expected, xcms:::writeMzTab() is not yet exported. CHANGES IN VERSION 1.49.1 @@ -136,46 +266,46 @@ CHANGES IN VERSION 1.49.1 NEW FEATURE: -+ The raw CDF/mzXML/mzData/mzML is assumed to have scans sorted by m/z. +- The raw CDF/mzXML/mzData/mzML is assumed to have scans sorted by m/z. Instead of throwing an "m/z sort assumption violated !" error, the data is re-read and on-demand sorted by m/z. CHANGES IN VERSION 1.47.3 ------------------------- -+ Disable parallel processing in unit tests causing a timeout +- Disable parallel processing in unit tests causing a timeout on BioC build machines CHANGES IN VERSION 1.47.2 ------------------------- BUG FIXES -+ Fix problem in getEIC on xcmsSet objects reported by Alan Smith in issue #7 and +- Fix problem in getEIC on xcmsSet objects reported by Alan Smith in issue #7 and add a RUnit test case to test for this (test.issue7 in runit.getEIC.R). -+ Changed some unnecessary warnings into messages. +- Changed some unnecessary warnings into messages. CHANGES IN VERSION 1.47.2 ------------------------- USER VISIBLE CHANGES: -+ Disabled parallel processing in unit tests +- Disabled parallel processing in unit tests * migrate dependencies from ncdf -> ncdf4 CHANGES IN VERSION 1.45.7 ------------------------- USER VISIBLE CHANGES: -+ Disabled Rmpi support and usage on Windows +- Disabled Rmpi support and usage on Windows CHANGES IN VERSION 1.45.6 ------------------------- NEW FEATURE: -+ J. Rainer implemented a [ method that allows to subset an xcmsSet. +- J. Rainer implemented a [ method that allows to subset an xcmsSet. BUG FIXES: -+ Fixed a problem in split.xcmsSet that did not split the phenoData properly. +- Fixed a problem in split.xcmsSet that did not split the phenoData properly. Added some details to the documentation of xcmsSet-class. @@ -183,15 +313,15 @@ CHANGES IN VERSION 1.45.5 ------------------------- USER VISIBLE CHANGES: -+ The sampclass method for xcmsSet will now return the content of the +- The sampclass method for xcmsSet will now return the content of the column "class" from the data.frame in the phenoData slot, or if not present, the interaction of all factors (columns) of that data.frame. -+ The sampclass<- method replaces the content of the "class" column in +- The sampclass<- method replaces the content of the "class" column in the phenoData data.frame. If a data.frame is submitted, the interaction of its columns is calculated and stored into the "class" column. BUG FIXES: -+ Fixed a bug that resulted in a cryptic error message +- Fixed a bug that resulted in a cryptic error message when no input files are available to the xcmsSet function. @@ -199,7 +329,7 @@ CHANGES IN VERSION 1.45.4 ------------------------- BUG FIXES: -+ Fixed a bug in the levelplot method for xcmsSet. +- Fixed a bug in the levelplot method for xcmsSet. CHANGES IN VERSION 1.45.3 @@ -207,19 +337,19 @@ CHANGES IN VERSION 1.45.3 NEW FEATURE: -+ xcmsSet now allows phenoData to be an AnnotatedDataFrame. -+ new slots for xcmsRaw: +- xcmsSet now allows phenoData to be an AnnotatedDataFrame. +- new slots for xcmsRaw: - mslevel: store the mslevel parameter submitted to xcmsRaw. - scanrange: store the scanrange parameter submitted to xcmsRaw. -+ new slots for xcmsSet: +- new slots for xcmsSet: - mslevel: stores the mslevel argument from the xcmsSet method. - scanrange: to keep track of the scanrange argument of the xcmsSet method. -+ new methods for xcmsRaw: +- new methods for xcmsRaw: - levelplot: similar to the image method, plots m/z vs RT with color coded intensities. - mslevel: returns the value for the .mslevel slot. For downstream compatibility, this method returns NULL if the object does not have the same named slot. - profinfo: same functionality as the profinfo method for xcmsSet. - scanrange: returns the value for the scanrange slot. For downstream compatibility, this method returns NULL if the object does not have the same named slot. -+ new methods for xcmsSet: +- new methods for xcmsSet: - getXcmsRaw: returns a xcmsRaw object for one or more files in the xcmsSet, eventually applying retention time correction etc. - levelplot: similar to the image method, plots m/z vs RT with color coded intensities. Allows in addition to highlight identified peaks. - mslevel: returns the value for the mslevel slot. For downstream compatibility, this method returns NULL if the object does not have the same named slot. @@ -228,15 +358,15 @@ NEW FEATURE: - scanrange: returns the value for the scanrange slot. For downstream compatibility, this method returns NULL if the object does not have the same named slot. USER VISIBLE CHANGES: -+ show method for xcmsSet updated to display also informations about the mslevel and scanrange. -+ Elaborated some documentation entries. -+ rtrange and mzrange for xcmsRaw method plotEIC use by default the full RT and m/z range. -+ Added arguments "lty" and "add" to plotEIC method for xcmsRaw. -+ getEIC without specifying mzrange returns the ion chromatogram for the full m/z range (i.e. the base peak chromatogram). +- show method for xcmsSet updated to display also informations about the mslevel and scanrange. +- Elaborated some documentation entries. +- rtrange and mzrange for xcmsRaw method plotEIC use by default the full RT and m/z range. +- Added arguments "lty" and "add" to plotEIC method for xcmsRaw. +- getEIC without specifying mzrange returns the ion chromatogram for the full m/z range (i.e. the base peak chromatogram). BUG FIXES: -+ Checking if phenoData is a data.frame or AnnotatedDataFrame and throw an error otherwise. -+ xcmsSet getEIC method for water Lock mass corrected files for a subset of files did not evaluate whether the specified files were corrected. +- Checking if phenoData is a data.frame or AnnotatedDataFrame and throw an error otherwise. +- xcmsSet getEIC method for water Lock mass corrected files for a subset of files did not evaluate whether the specified files were corrected. CHANGES IN VERSION 1.45.2 ------------------------- diff --git a/inst/unitTests/runit.Chromatogram.R b/inst/unitTests/runit.Chromatogram.R new file mode 100644 index 000000000..53861f2d1 --- /dev/null +++ b/inst/unitTests/runit.Chromatogram.R @@ -0,0 +1,350 @@ +## Unit tests related to the Chromatogram class. +library(xcms) +library(RUnit) + +test_Chromatogram_class <- function() { + ch <- new("Chromatogram") + ch@mz <- 3 + checkException(validObject(ch)) + ch@mz <- c(1, 3) + ch@precursorMz <- 4 + checkException(validObject(ch)) + ch@precursorMz <- c(4, 4) + ch@productMz <- 5 + checkException(validObject(ch)) + ## + int <- rnorm(100, mean = 200, sd = 2) + rt <- rnorm(100, mean = 300, sd = 3) + ## check exceptions: + checkException(xcms:::Chromatogram(intensity = int)) + chr <- Chromatogram() + chr@rtime <- rt + chr@intensity <- int + checkException(validObject(chr)) + ## issue #145: values are ordered based on rtime + chr <- Chromatogram(intensity = int, rtime = rt) + checkEquals(rtime(chr), sort(rt)) + checkEquals(intensity(chr), int[order(rt)]) + rt <- sort(rt) + ch <- xcms:::Chromatogram(intensity = int, rtime = rt) + checkEquals(rtime(ch), rt) + checkEquals(intensity(ch), int) + checkException(xcms:::Chromatogram(aggregationFun = "other")) + ch@aggregationFun <- "max" + checkTrue(validObject(ch)) + checkEquals(aggregationFun(ch), "max") + ch@aggregationFun <- "sum" + checkTrue(validObject(ch)) + checkEquals(aggregationFun(ch), "sum") + ch@aggregationFun <- "mean" + checkTrue(validObject(ch)) + checkEquals(aggregationFun(ch), "mean") + ch@aggregationFun <- "min" + checkTrue(validObject(ch)) + checkEquals(aggregationFun(ch), "min") + ch@fromFile <- 3L + checkTrue(validObject(ch)) + checkEquals(fromFile(ch), 3L) + checkEquals(length(ch), length(rt)) + ## as.data.frame + df <- as.data.frame(ch) + checkEquals(df, data.frame(rtime = rt, intensity = int)) + ch <- xcms:::Chromatogram(mz = c(1, 3)) + checkEquals(ch@mz, c(1, 3)) + checkEquals(mz(ch), c(1, 3)) + checkEquals(mz(ch, filter = TRUE), c(NA_real_, NA_real_)) + ch <- xcms:::Chromatogram(filterMz = c(1, 3)) + checkEquals(ch@filterMz, c(1, 3)) + checkEquals(mz(ch, filter = TRUE), c(1, 3)) + checkEquals(mz(ch, filter = FALSE), c(NA_real_, NA_real_)) + ch <- xcms:::Chromatogram(precursorMz = 123) + checkEquals(ch@precursorMz, c(123, 123)) + checkEquals(precursorMz(ch), c(123, 123)) + ch <- xcms:::Chromatogram(productMz = 123) + checkEquals(ch@productMz, c(123, 123)) + checkEquals(productMz(ch), c(123, 123)) +} + +test_filterRt_Chromatogram <- function() { + int <- rnorm(100, mean = 200, sd = 2) + rt <- rnorm(100, mean = 300, sd = 3) + chr <- Chromatogram(intensity = int, rtime = sort(rt)) + + chr_2 <- filterRt(chr, rt = c(200, 300)) + checkTrue(all(rtime(chr_2) >= 200)) + checkTrue(all(rtime(chr_2) <= 300)) + ints <- intensity(chr_2) + checkEquals(ints, intensity(chr)[rtime(chr) >= 200 & rtime(chr) <= 300]) + + ## No rt + checkEquals(chr, filterRt(chr)) + + ## Outside range + chr_2 <- filterRt(chr, rt = c(400, 500)) + checkTrue(length(chr_2) == 0) + checkEquals(intensity(chr_2), numeric()) + checkEquals(rtime(chr_2), numeric()) +} + +test_extractChromatograms <- function() { + ## OnDiskMSnExp + ## TIC + chrs <- extractChromatograms(filterFile(od_x, file = 2)) + plotChromatogram(chrs) + spctr <- spectra(filterFile(od_x, file = 2)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## BPC + chrs <- extractChromatograms(filterFile(od_x, file = 2), + aggregationFun = "max") + ints <- unlist(lapply(spctr, function(z) + return(max(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## XCMSnExp + xod_x <- faahko_xod + chrs <- extractChromatograms(filterFile(xod_x, file = 2)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## BPC + chrs <- extractChromatograms(filterFile(xod_x, file = 2), + aggregationFun = "max") + ints <- unlist(lapply(spctr, function(z) + return(max(intensity(z))))) + chrs_2 <- xcms:::.extractMultipleChromatograms(filterFile(xod_x, file = 2), + aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), intensity(chrs_2[[1]])) + + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## with adjusted retention times. + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2), + adjustedRtime = FALSE, aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + aggregationFun = "max") + checkEquals(intensity(chrs[[1]]), ints) + checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + ## Subset to certain mz range in all files. + chrs_adj <- extractChromatograms(xod_xgr, mz = c(300, 330)) + chrs_raw <- extractChromatograms(xod_x, mz = c(300, 330)) + checkTrue(sum(rtime(chrs_adj[[1]]) != rtime(chrs_raw[[1]])) > + length(chrs_raw[[1]]) / 2) + checkEquals(rtime(chrs_adj[[1]]), rtime(xod_xgr, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs_adj[[2]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs_adj[[3]]), rtime(xod_xgr, bySample = TRUE)[[3]]) + + ## Now subsetting for mz: + tmp <- filterFile(od_x, file = 2) + chrs <- extractChromatograms(tmp, mz = c(300, 400)) + checkEquals(mz(chrs[[1]], filter = TRUE), c(300, 400)) + suppressWarnings(spctr <- spectra(filterMz(tmp, mz = c(300, 400)))) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + ints2 <- intensity(chrs[[1]]) + ints2[is.na(ints2)] <- 0 + checkEquals(ints2, ints) + checkEquals(rtime(chrs[[1]]), unlist(lapply(spctr, rtime))) + ## with adjusted retention times + chrs <- extractChromatograms(filterFile(xod_xgr, file = 2, + keepAdjustedRtime = TRUE), + mz = c(300, 400)) + ints <- unlist(lapply(spctr, function(z) + return(sum(intensity(z))))) + ints2 <- intensity(chrs[[1]]) + ints2[is.na(ints2)] <- 0 + checkEquals(ints2, ints) + checkEquals(rtime(chrs[[1]]), rtime(xod_xgr, bySample = TRUE)[[2]]) + + ## Now subsetting for rt: + chrs <- extractChromatograms(od_x, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterRt(od_x, rt = c(2700, 2900))) + ints <- split(unlist(lapply(spctr, function(z) sum(intensity(z)))), + f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Using adjusted rt: + chrs2 <- extractChromatograms(xod_xgr, rt = c(2700, 2900)) + checkTrue(all(rtime(chrs2[[1]]) >= 2700 & rtime(chrs2[[1]]) <= 2900)) + checkTrue(all(rtime(chrs2[[2]]) >= 2700 & rtime(chrs2[[2]]) <= 2900)) + checkTrue(all(rtime(chrs2[[3]]) >= 2700 & rtime(chrs2[[3]]) <= 2900)) + checkTrue(length(chrs[[1]]) != length(chrs2[[1]])) + checkTrue(length(chrs[[2]]) == length(chrs2[[2]])) + checkTrue(length(chrs[[3]]) != length(chrs2[[3]])) + tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) + checkEquals(rtime(chrs2[[1]]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs2[[2]]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs2[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + ## Check the values... + keepSp <- which(adjustedRtime(xod_xgr) >= 2700 & + adjustedRtime(xod_xgr) <= 2900) + tmp <- xod_xgr[keepSp] + ints <- unlist(lapply(spectra(tmp), function(z) sum(intensity(z)))) + intsL <- split(ints, fromFile(tmp)) + checkEquals(intensity(chrs2[[1]]), intsL[[1]]) + checkEquals(intensity(chrs2[[2]]), intsL[[2]]) + checkEquals(intensity(chrs2[[3]]), intsL[[3]]) + + ## Now subsetting for rt and mz: + chrs <- extractChromatograms(od_x, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterMz(filterRt(od_x, rt = c(2700, 2900)), mz = 335)) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Using adjusted rt: LLLL + chrs <- extractChromatograms(xod_xgr, rt = c(2700, 2900), mz = 335) + checkTrue(all(rtime(chrs[[1]]) >= 2700 & rtime(chrs[[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]]) >= 2700 & rtime(chrs[[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[3]]) >= 2700 & rtime(chrs[[3]]) <= 2900)) + spctr <- spectra(filterMz(filterRt(xod_xgr, rt = c(2700, 2900)), mz = 335)) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]])) + checkEquals(ints[[3]], intensity(chrs[[3]])) + ## Check the rtime. + tmp <- filterRt(xod_xgr, rt = c(2700, 2900)) + checkEquals(rtime(chrs[[1]]), rtime(tmp, bySample = TRUE)[[1]]) + checkEquals(rtime(chrs[[2]]), rtime(tmp, bySample = TRUE)[[2]]) + checkEquals(rtime(chrs[[3]]), rtime(tmp, bySample = TRUE)[[3]]) + + ## What if we're completely off? + chrs <- extractChromatograms(od_x, rt = c(5000, 5500)) + checkTrue(length(chrs) == 0) + ## Now rt is within range, but mz is completely off. We expect Chromatograms + ## with same length than there are spectra in the rt range, but all NA + ## values. + chrs <- extractChromatograms(od_x, rt = c(2600, 2700), mz = 12000) + rts <- split(rtime(od_x), f = fromFile(od_x)) + rts <- lapply(rts, function(z) z[z >= 2600 & z <= 2700]) + checkEquals(lengths(chrs), lengths(chrs)) + ## All have to be NA. + checkTrue(all(unlist(lapply(chrs, function(z) is.na(intensity(z)))))) + + ## Multiple ranges. + rtr <- matrix(c(2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + + checkTrue(all(rtime(chrs[[1]][[1]]) >= 2700 & rtime(chrs[[1]][[1]]) <= 2900)) + checkTrue(all(rtime(chrs[[1]][[2]]) >= 2700 & rtime(chrs[[1]][[2]]) <= 2900)) + checkTrue(all(rtime(chrs[[1]][[3]]) >= 2700 & rtime(chrs[[1]][[3]]) <= 2900)) + checkTrue(all(rtime(chrs[[2]][[1]]) >= 2600 & rtime(chrs[[2]][[1]]) <= 2800)) + checkTrue(all(rtime(chrs[[2]][[2]]) >= 2600 & rtime(chrs[[2]][[2]]) <= 2800)) + checkTrue(all(rtime(chrs[[2]][[3]]) >= 2600 & rtime(chrs[[2]][[3]]) <= 2800)) + spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[1, ]), + mz = mzr[1, ])) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[1]][[1]])) + checkEquals(ints[[2]], intensity(chrs[[1]][[2]])) + checkEquals(ints[[3]], intensity(chrs[[1]][[3]])) + spctr <- spectra(filterMz(filterRt(od_x, rt = rtr[2, ]), + mz = mzr[2, ])) + ints <- split(unlist(lapply(spctr, function(z) { + if (z@peaksCount) + return(sum(intensity(z))) + else return(NA) + })), f = unlist(lapply(spctr, fromFile))) + checkEquals(ints[[1]], intensity(chrs[[2]][[1]])) + checkEquals(ints[[2]], intensity(chrs[[2]][[2]])) + checkEquals(ints[[3]], intensity(chrs[[2]][[3]])) + + ## Multiple ranges with complete off ranges. + rtr <- matrix(c(2700, 2900, 5000, 5500, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 500, 500, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + checkTrue(length(chrs) == 3) + checkTrue(all(lengths(chrs[[2]]) == 0)) + + rtr <- matrix(c(2700, 2900, 2700, 2900, 2600, 2800), ncol = 2, byrow = TRUE) + mzr <- matrix(c(355, 355, 100000, 100000, 344, 344), ncol = 2, byrow = TRUE) + chrs <- extractChromatograms(od_x, rt = rtr, mz = mzr) + checkTrue(length(chrs) == 3) + ## All values in the 2nd Chromosome object have to be NA. + checkTrue(all(unlist(lapply(chrs[[2]], function(z) is.na(intensity(z)))))) +} + +test_clean_chromatogram <- function() { + chr <- Chromatogram( + rtime = 1:12, + intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(2, 3, 4, 6, 7, 8, 9,10)) + + chr_clnd <- clean(chr, all = TRUE) + checkTrue(length(chr_clnd) == 4) + checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) + + ## With NA + chr <- Chromatogram( + rtime = 1:12, + intensity = c(0, NA, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(3, 4, 6, 7, 8, 9, 10)) + chr <- Chromatogram( + rtime = 1:12, + intensity = c(NA, NA, 20, NA, NA, NA, 123, 124343, 3432, NA, NA, NA)) + chr_clnd <- clean(chr) + checkEquals(rtime(chr_clnd), c(3, 7, 8, 9)) +} + +dontrun_test_with_MRM <- function() { + ## Test how we could read the data. + ## chromatogramsInfo + library(msdata) + fls <- proteomics(full.names = TRUE) + + library(mzR) + msf <- mzR::openMSfile(fls[2], "pwiz") + chrs <- chromatograms(msf) + chrsI <- chromatogram(msf) + ## The same essentially. + nChrom(msf) + length(chrs) + nrow(chrs[[1]]) + mzR::close(msf) + ## + msf <- mzR::openMSfile(fls[1], "pwiz") + chrs <- chromatograms(msf) + chrs <- chromatograms(msf) + nChrom(msf) + length(chrs) + nrow(chrs[[1]]) + + ## Now, we've got the following info: cvParam + ## accession="MS:1000235" name="total ion current chromatogram" value="" + ## Check http://proteowizard.sourceforge.net/dox/namespacepwiz_1_1msdata.html + ## Potentially interesting: + ## o ChromatogramIdentity nope, no header info. + ## OK, have to look for chromatogram with index="1", then within + ## for cvParam accession="MS:1000827" and its value -> Q1 or precursorMz + ## then within for cvParam accession="MS:1000827" and its value + ## -> Q3. + + ## https://sourceforge.net/p/proteowizard/mailman/message/27571266/ +} diff --git a/inst/unitTests/runit.Param-classes.R b/inst/unitTests/runit.Param-classes.R index 23b957d7a..6264bd505 100644 --- a/inst/unitTests/runit.Param-classes.R +++ b/inst/unitTests/runit.Param-classes.R @@ -479,3 +479,356 @@ test_CentWavePredIsoParam <- function() { L <- xcms:::.param2list(p) checkEquals(L$snthresh, 123) } + +test_PeakDensityParam <- function() { + ## Check getter/setter methods: + p <- new("PeakDensityParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) + checkEquals(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) + sampleGroups(p) <- 1:4 + checkEquals(sampleGroups(p), 1:4) + p <- PeakDensityParam(sampleGroups = c("a", "a", "b")) + checkEquals(sampleGroups(p), c("a", "a", "b")) + + p <- new("PeakDensityParam", bw = 3) + checkEquals(bw(p), 3) + bw(p) <- 20 + checkEquals(bw(p), 20) + p <- PeakDensityParam(bw = 33) + checkEquals(bw(p), 33) + checkException(PeakDensityParam(bw = -4)) + + ## minFraction + p <- new("PeakDensityParam", minFraction = 0.7) + checkEquals(minFraction(p), 0.7) + minFraction(p) <- 0.2 + checkEquals(minFraction(p), 0.2) + p <- PeakDensityParam(minFraction = 0.4) + checkEquals(minFraction(p), 0.4) + checkException(PeakDensityParam(minFraction = -4)) + checkException(minFraction(p) <- c(0.3, 0.2, 0.4)) + checkException(minFraction(p) <- 2) + + ## minSamples + p <- new("PeakDensityParam", minSamples = 3) + checkEquals(minSamples(p), 3) + minSamples(p) <- 20 + checkEquals(minSamples(p), 20) + p <- PeakDensityParam(minSamples = 33) + checkEquals(minSamples(p), 33) + checkException(PeakDensityParam(minSamples = -4)) + + ## binSize + p <- new("PeakDensityParam", binSize = 3) + checkEquals(binSize(p), 3) + binSize(p) <- 20 + checkEquals(binSize(p), 20) + p <- PeakDensityParam(binSize = 0.3) + checkEquals(binSize(p), 0.3) + checkException(PeakDensityParam(binSize = -4)) + + ## maxFeatures + p <- new("PeakDensityParam", maxFeatures = 3) + checkEquals(maxFeatures(p), 3) + maxFeatures(p) <- 20 + checkEquals(maxFeatures(p), 20) + p <- PeakDensityParam(maxFeatures = 33) + checkEquals(maxFeatures(p), 33) + checkException(PeakDensityParam(maxFeatures = -4)) +} + +test_MzClustParam <- function() { + ## Check getter/setter methods: + p <- new("MzClustParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) + checkEquals(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) + sampleGroups(p) <- 1:4 + checkEquals(sampleGroups(p), 1:4) + p <- MzClustParam(sampleGroups = c("a", "a", "b")) + checkEquals(sampleGroups(p), c("a", "a", "b")) + + p <- new("MzClustParam", ppm = 3) + checkEquals(ppm(p), 3) + ppm(p) <- 20 + checkEquals(ppm(p), 20) + p <- MzClustParam(ppm = 33) + checkEquals(ppm(p), 33) + checkException(MzClustParam(ppm = -4)) + + p <- new("MzClustParam", absMz = 3) + checkEquals(absMz(p), 3) + absMz(p) <- 20 + checkEquals(absMz(p), 20) + p <- MzClustParam(absMz = 33) + checkEquals(absMz(p), 33) + checkException(MzClustParam(absMz = -4)) + + ## minFraction + p <- new("MzClustParam", minFraction = 0.7) + checkEquals(minFraction(p), 0.7) + minFraction(p) <- 0.2 + checkEquals(minFraction(p), 0.2) + p <- MzClustParam(minFraction = 0.4) + checkEquals(minFraction(p), 0.4) + checkException(MzClustParam(minFraction = -4)) + checkException(minFraction(p) <- c(0.3, 0.2, 0.4)) + checkException(minFraction(p) <- 2) + + ## minSamples + p <- new("MzClustParam", minSamples = 3) + checkEquals(minSamples(p), 3) + minSamples(p) <- 20 + checkEquals(minSamples(p), 20) + p <- MzClustParam(minSamples = 33) + checkEquals(minSamples(p), 33) + checkException(MzClustParam(minSamples = -4)) +} + +test_NearestPeaksParam <- function() { + ## Check getter/setter methods: + p <- new("NearestPeaksParam", sampleGroups = c(1, 1, 1, 2, 2, 3, 4)) + checkEquals(sampleGroups(p), c(1, 1, 1, 2, 2, 3, 4)) + sampleGroups(p) <- 1:4 + checkEquals(sampleGroups(p), 1:4) + p <- NearestPeaksParam(sampleGroups = c("a", "a", "b")) + checkEquals(sampleGroups(p), c("a", "a", "b")) + + p <- new("NearestPeaksParam", mzVsRtBalance = 3) + checkEquals(mzVsRtBalance(p), 3) + mzVsRtBalance(p) <- 20 + checkEquals(mzVsRtBalance(p), 20) + p <- NearestPeaksParam(mzVsRtBalance = 33) + checkEquals(mzVsRtBalance(p), 33) + checkException(NearestPeaksParam(mzVsRtBalance = -4)) + checkException(NearestPeaksParam(mzVsRtBalance = 1:4)) + + p <- new("NearestPeaksParam", absMz = 3) + checkEquals(absMz(p), 3) + absMz(p) <- 20 + checkEquals(absMz(p), 20) + p <- NearestPeaksParam(absMz = 33) + checkEquals(absMz(p), 33) + checkException(NearestPeaksParam(absMz = -4)) + checkException(NearestPeaksParam(absMz = 1:3)) + + p <- new("NearestPeaksParam", absRt = 3) + checkEquals(absRt(p), 3) + absRt(p) <- 20 + checkEquals(absRt(p), 20) + p <- NearestPeaksParam(absRt = 33) + checkEquals(absRt(p), 33) + checkException(NearestPeaksParam(absRt = -4)) + checkException(NearestPeaksParam(absRt = 1:3)) + + p <- new("NearestPeaksParam", kNN = 3) + checkEquals(kNN(p), 3) + kNN(p) <- 20 + checkEquals(kNN(p), 20) + p <- NearestPeaksParam(kNN = 33) + checkEquals(kNN(p), 33) + checkException(NearestPeaksParam(kNN = -4)) + checkException(NearestPeaksParam(kNN = 1:3)) +} + +test_PeakGroupsParam <- function() { + ## Check getter/setter methods: + p <- new("PeakGroupsParam", minFraction = 0.8) + checkEquals(minFraction(p), 0.8) + minFraction(p) <- 0.3 + checkEquals(minFraction(p), 0.3) + p <- PeakGroupsParam(minFraction = 0.7) + checkEquals(minFraction(p), 0.7) + checkException(minFraction(p) <- c(2, 2)) + checkException(minFraction(p) <- -1) + checkException(minFraction(p) <- 3) + + p <- new("PeakGroupsParam", extraPeaks = 2) + checkEquals(extraPeaks(p), 2) + extraPeaks(p) <- 0.3 + checkEquals(extraPeaks(p), 0.3) + p <- PeakGroupsParam(extraPeaks = 7) + checkEquals(extraPeaks(p), 7) + checkException(extraPeaks(p) <- c(2, 2)) + checkException(extraPeaks(p) <- -1) + + p <- new("PeakGroupsParam", span = 0.5) + checkEquals(span(p), 0.5) + span(p) <- 0.3 + checkEquals(span(p), 0.3) + p <- PeakGroupsParam(span = 7) + checkEquals(span(p), 7) + checkException(span(p) <- c(2, 2)) + checkException(span(p) <- -1) + + p <- new("PeakGroupsParam", smooth = "linear") + checkEquals(smooth(p), "linear") + smooth(p) <- "loess" + checkEquals(smooth(p), "loess") + p <- PeakGroupsParam(smooth = "linear") + checkEquals(smooth(p), "linear") + checkException(smooth(p) <- "other") + checkException(smooth(p) <- c("linear", "loess")) + + p <- new("PeakGroupsParam", family = "symmetric") + checkEquals(family(p), "symmetric") + family(p) <- "gaussian" + checkEquals(family(p), "gaussian") + p <- PeakGroupsParam(family = "symmetric") + checkEquals(family(p), "symmetric") + checkException(family(p) <- "other") + checkException(family(p) <- c("symmetric", "gaussian")) + + mt <- matrix(1:4, 1:4) + p <- new("PeakGroupsParam", peakGroupsMatrix = mt) + checkEquals(peakGroupsMatrix(p), mt) + peakGroupsMatrix(p) <- mt + 2 + checkEquals(peakGroupsMatrix(p), mt + 2) + p <- PeakGroupsParam(peakGroupsMatrix = mt) + checkEquals(peakGroupsMatrix(p), mt) +} + + +test_ObiwarpParam <- function() { + library(xcms) + library(RUnit) + ## Check getter/setter methods: + p <- new("ObiwarpParam", binSize = 0.8) + checkEquals(binSize(p), 0.8) + binSize(p) <- 0.3 + checkEquals(binSize(p), 0.3) + p <- ObiwarpParam(binSize = 0.7) + checkEquals(binSize(p), 0.7) + checkException(binSize(p) <- c(2, 2)) + checkException(binSize(p) <- -1) + + p <- new("ObiwarpParam", centerSample = 2L) + checkEquals(centerSample(p), 2L) + centerSample(p) <- 1 + checkEquals(centerSample(p), 1L) + p <- ObiwarpParam(centerSample = 7) + checkEquals(centerSample(p), 7) + checkException(centerSample(p) <- c(2, 2)) + checkException(centerSample(p) <- -1) + + p <- new("ObiwarpParam", response = 3L) + checkEquals(response(p), 3L) + response(p) <- 5 + checkEquals(response(p), 5L) + p <- ObiwarpParam(response = 7) + checkEquals(response(p), 7) + checkException(response(p) <- c(2, 2)) + checkException(response(p) <- -1) + checkException(response(p) <- 200) + + p <- new("ObiwarpParam", distFun = "euc") + checkEquals(distFun(p), "euc") + checkEquals(gapInit(p), 0.9) + checkEquals(gapExtend(p), 1.8) + distFun(p) <- "cor" + checkEquals(distFun(p), "cor") + checkEquals(gapInit(p), 0.3) + checkEquals(gapExtend(p), 2.4) + distFun(p) <- "cov" + checkEquals(distFun(p), "cov") + checkEquals(gapInit(p), 0) + checkEquals(gapExtend(p), 11.7) + distFun(p) <- "prd" + checkEquals(distFun(p), "prd") + checkEquals(gapInit(p), 0) + checkEquals(gapExtend(p), 7.8) + p <- ObiwarpParam(distFun = "cov") + checkEquals(distFun(p), "cov") + checkException(distFun(p) <- c("a", "cov")) + checkException(distFun(p) <- "other") + + p <- new("ObiwarpParam", gapInit = 4.2) + checkEquals(gapInit(p), 4.2) + gapInit(p) <- 5.2 + checkEquals(gapInit(p), 5.2) + p <- ObiwarpParam(gapInit = 3.1) + checkEquals(gapInit(p), 3.1) + checkException(gapInit(p) <- c(2, 2)) + checkException(gapInit(p) <- -1) + + p <- new("ObiwarpParam", gapExtend = 4.2) + checkEquals(gapExtend(p), 4.2) + gapExtend(p) <- 5.2 + checkEquals(gapExtend(p), 5.2) + p <- ObiwarpParam(gapExtend = 3.1) + checkEquals(gapExtend(p), 3.1) + checkException(gapExtend(p) <- c(2, 2)) + checkException(gapExtend(p) <- -1) + + p <- new("ObiwarpParam", factorDiag = 4.2) + checkEquals(factorDiag(p), 4.2) + factorDiag(p) <- 1.2 + checkEquals(factorDiag(p), 1.2) + p <- ObiwarpParam(factorDiag = 3.1) + checkEquals(factorDiag(p), 3.1) + checkException(factorDiag(p) <- c(2, 2)) + checkException(factorDiag(p) <- -1) + + p <- new("ObiwarpParam", factorGap = 4.2) + checkEquals(factorGap(p), 4.2) + factorGap(p) <- 4.2 + checkEquals(factorGap(p), 4.2) + p <- ObiwarpParam(factorGap = 3.1) + checkEquals(factorGap(p), 3.1) + checkException(factorGap(p) <- c(2, 2)) + checkException(factorGap(p) <- -1) + + p <- new("ObiwarpParam", localAlignment = TRUE) + checkEquals(localAlignment(p), TRUE) + localAlignment(p) <- FALSE + checkEquals(localAlignment(p), FALSE) + p <- ObiwarpParam(localAlignment = TRUE) + checkEquals(localAlignment(p), TRUE) + checkException(localAlignment(p) <- c(TRUE, FALSE)) + + p <- new("ObiwarpParam", initPenalty = 4.2) + checkEquals(initPenalty(p), 4.2) + initPenalty(p) <- 2.2 + checkEquals(initPenalty(p), 2.2) + p <- ObiwarpParam(initPenalty = 3.1) + checkEquals(initPenalty(p), 3.1) + checkException(factorGap(p) <- c(2, 2)) + checkException(factorGap(p) <- -1) +} + +test_GenericParam <- function() { + prm <- GenericParam(fun = "mean") + checkEquals(prm@fun, "mean") + ## Errors + checkException(GenericParam(args = list(na.rm = TRUE))) + checkException(GenericParam(fun = c("a", "b"))) +} + +test_FillChromPeaksParam <- function() { + library(xcms) + library(RUnit) + ## Check getter/setter methods: + p <- new("FillChromPeaksParam", expandMz = 0.8) + checkEquals(expandMz(p), 0.8) + expandMz(p) <- 0.3 + checkEquals(expandMz(p), 0.3) + p <- FillChromPeaksParam(expandMz = 0.7) + checkEquals(expandMz(p), 0.7) + checkException(expandMz(p) <- c(2, 2)) + checkException(expandMz(p) <- -2) + + p <- new("FillChromPeaksParam", expandRt = 0.8) + checkEquals(expandRt(p), 0.8) + expandRt(p) <- 0.3 + checkEquals(expandRt(p), 0.3) + p <- FillChromPeaksParam(expandRt = 0.7) + checkEquals(expandRt(p), 0.7) + checkException(expandRt(p) <- c(2, 2)) + checkException(expandRt(p) <- -2) + + p <- new("FillChromPeaksParam", ppm = 8) + checkEquals(ppm(p), 8) + ppm(p) <- 3 + checkEquals(ppm(p), 3) + p <- FillChromPeaksParam(ppm = 7) + checkEquals(ppm(p), 7) + checkException(ppm(p) <- c(2, 2)) + checkException(ppm(p) <- -2) +} diff --git a/inst/unitTests/runit.XCMSnExp.R b/inst/unitTests/runit.XCMSnExp.R index 063186ab6..2c09bb369 100644 --- a/inst/unitTests/runit.XCMSnExp.R +++ b/inst/unitTests/runit.XCMSnExp.R @@ -1,11 +1,13 @@ ## tests related to the new XCMSnExp object. -library(RUnit) -cwp <- CentWaveParam(noise = 10000, snthresh = 40) -## od <- filterRt(od, rt = c(3000, 4000)) -od_x <- detectFeatures(faahko_od, param = cwp) -xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), method = "centWave", - noise = 10000, snthresh = 40) +## od_x <- faahko_od +## xod_x <- faahko_xod +## xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) +## xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) +## xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) + +xs <- faahko_xs + xs_2 <- group(xs) suppressWarnings( xs_2 <- retcor(xs_2) @@ -18,8 +20,8 @@ od_fa <- faahko_od .checkCreationOfEmptyObject <- function() { x <- new("XCMSnExp") checkTrue(!hasAdjustedRtime(x)) - checkTrue(!hasAlignedFeatures(x)) - checkTrue(!hasDetectedFeatures(x)) + checkTrue(!hasFeatures(x)) + checkTrue(!hasChromPeaks(x)) } test_XCMSnExp_class <- function() { @@ -33,16 +35,16 @@ test_XCMSnExp_class <- function() { xod <- as(od_fa, "XCMSnExp") checkTrue(validObject(xod)) ## MsFeatureData error: environment is locked - checkException(xod@msFeatureData$features <- 3) + checkException(xod@msFeatureData$chromPeaks <- 3) checkException(xod@msFeatureData$bla <- 4) checkTrue(validObject(xod)) .checkCreationOfEmptyObject() - ## xod@msFeatureData$features <- xs_2@peaks + ## xod@msFeatureData$chromPeaks <- xs_2@peaks ## checkTrue(validObject(xod)) - ## xod@msFeatureData$features[1, "sample"] <- 40 + ## xod@msFeatureData$chromPeaks[1, "sample"] <- 40 ## checkException(validObject(xod)) - ## xod@msFeatureData$features[1, "sample"] <- 3 + ## xod@msFeatureData$chromPeaks[1, "sample"] <- 3 ## xod@msFeatureData$adjustedRtime <- xs_2@rt$corrected ## checkTrue(validObject(xod)) ## xod@msFeatureData$adjustedRtime[[2]] <- 1:4 @@ -58,13 +60,17 @@ test_XCMSnExp_rtime <- function() { rts_2 <- rtime(od_x) checkEquals(rts, rts_2) ## Test with bySample. - rts_3 <- rtime(od_x, bySample = TRUE) + rts_3 <- rtime(xod_x, bySample = TRUE) checkEquals(rts_3, split(rts, f = fromFile(faahko_od))) ## Check if rtimes are correctly ordered for bySample rts_4 <- rtime(filterFile(faahko_od, file = 2)) checkEquals(rts_4, rts_3[[2]]) rts_4 <- rtime(filterFile(faahko_od, file = 3)) checkEquals(rts_4, rts_3[[3]]) + ## Compare with the values we get from an xcmsSet: + rtx <- faahko_xs@rt$raw + checkEquals(unlist(rtx, use.names = FALSE), + unlist(rtime(faahko_xod, bySample = TRUE), use.names = FALSE)) } test_XCMSnExp_mz <- function() { @@ -72,7 +78,7 @@ test_XCMSnExp_mz <- function() { ## The check below has to work, since we're calling the mz,OnDiskMSnExp. ## mzs_2 <- mz(od_x) ## checkEquals(mzs, mzs_2) - mzs_2 <- mz(od_x, bySample = TRUE) + mzs_2 <- mz(xod_x, bySample = TRUE) tmp <- split(mzs, fromFile(faahko_od)) checkEquals(lapply(tmp, unlist, use.names = FALSE), mzs_2) ## Check if mz are correctly ordered for bySample @@ -85,7 +91,7 @@ test_XCMSnExp_intensity <- function() { ## The check below has to work, since we're calling the intensity,OnDiskMSnExp. ## ints_2 <- intensity(od_x) ## checkEquals(ints, ints_2) - ints_2 <- intensity(od_x, bySample = TRUE) + ints_2 <- intensity(xod_x, bySample = TRUE) tmp <- split(ints, fromFile(faahko_od)) checkEquals(lapply(tmp, unlist, use.names = FALSE), ints_2) ## Check if mz are correctly ordered for bySample @@ -98,55 +104,139 @@ test_XCMSnExp_spectra <- function() { res <- spectra(xod) res_2 <- spectra(xod, bySample = TRUE) checkEquals(split(res, fromFile(xod)), res_2) + ## xod_x + tmp <- filterRt(xod_x, rt = c(2700, 2900)) + res <- spectra(tmp) + rts <- unlist(lapply(res, rtime)) + checkEquals(rts, rtime(tmp)) + ## Check with adjusted retention times. + tmp2 <- filterRt(xod_xgr, rt = c(2700, 2900)) + res2 <- spectra(tmp2) + rts2 <- unlist(lapply(res2, rtime)) + checkEquals(rts2, rtime(tmp2)) + ## Now do it on one file: + tmp <- filterFile(xod_x, file = 2) + res <- spectra(tmp) + checkEquals(rtime(tmp), unlist(lapply(res, rtime))) + tmp2 <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) + res2 <- spectra(tmp2) + checkEquals(rtime(tmp2), unlist(lapply(res2, rtime))) + checkTrue(sum(unlist(lapply(res2, rtime)) == + unlist(lapply(res, rtime))) < length(rtime(tmp)) / 4) + res3 <- spectra(tmp2, adjusted = FALSE) + checkEquals(res, res3) } test_XCMSnExp_class_accessors <- function() { .checkCreationOfEmptyObject() ## Filling with data... xod <- as(od_fa, "XCMSnExp") - ## features - checkTrue(!hasDetectedFeatures(xod)) - features(xod) <- xs_2@peaks - checkTrue(hasDetectedFeatures(xod)) - checkEquals(features(xod), xs_2@peaks) - checkException(features(xod) <- 4) - tmp <- features(xod, bySample = TRUE) + ## peaks + checkTrue(!hasChromPeaks(xod)) + chromPeaks(xod) <- xs_2@peaks + checkTrue(hasChromPeaks(xod)) + checkEquals(chromPeaks(xod), xs_2@peaks) + checkException(chromPeaks(xod) <- 4) + tmp <- chromPeaks(xod, bySample = TRUE) checkTrue(length(tmp) == length(fileNames(xod))) tmp <- do.call(rbind, tmp) rownames(tmp) <- NULL - checkEquals(tmp, features(xod)) + checkEquals(tmp, chromPeaks(xod)) + ## chromPeaks with rt + all_pks <- chromPeaks(xod_x) + pks <- chromPeaks(xod_x, rt = c(2000, 2600), type = "within") + checkTrue(nrow(pks) < nrow(all_pks)) + checkTrue(all(pks[, "rtmin"] >= 2000 & pks[, "rtmax"] <= 2600)) + pks <- chromPeaks(xod_x, rt = c(2000, 2600), bySample = TRUE, + type = "within") + checkTrue(nrow(pks[[2]]) == 0) + pks <- chromPeaks(xod_x, rt = c(2000, 2600), type = "any") + checkTrue(all(pks[, "rtmax"] >= 2000 & pks[, "rtmin"] <= 2600)) + pks <- chromPeaks(xod_x, rt = c(2000, 2200)) + checkTrue(nrow(pks) == 0) + pks <- chromPeaks(xod_x, rt = c(2000, 2200), bySample = TRUE) + checkTrue(all(lengths(pks) == 0)) + ## chromPeaks with mz + pks <- chromPeaks(xod_x, mz = c(280, 281), type = "within") + checkTrue(all(pks[, "mzmin"] >= 280 & pks[, "mzmax"] <= 281)) + pks <- chromPeaks(xod_x, mz = c(280, 281), bySample = TRUE, type = "within") + checkTrue(nrow(pks[[1]]) == 0) + checkTrue(nrow(pks[[3]]) == 0) + checkTrue(nrow(pks[[2]]) == 1) + pks <- chromPeaks(xod_x, mz = c(280, 300), bySample = FALSE, type = "within") + checkTrue(all(pks[, "mzmin"] >= 280 & pks[, "mzmax"] <= 300)) + pks <- chromPeaks(xod_x, mz = c(280, 300), bySample = FALSE, type = "any") + checkTrue(all(pks[, "mzmax"] >= 280 & pks[, "mzmin"] <= 300)) + pks <- chromPeaks(xod_x, mz = c(200, 210), bySample = FALSE) + checkTrue(nrow(pks) == 0) + pks <- chromPeaks(xod_x, mz = c(200, 210), bySample = TRUE) + checkTrue(all(lengths(pks) == 0)) + ## chromPeaks with both + pks <- chromPeaks(xod_x, mz = c(280, 300), rt = c(3000, 3300), + type = "within") + checkTrue(all(pks[, "mzmin"] >= 280 & pks[, "mzmax"] <= 300)) + checkTrue(all(pks[, "rtmin"] >= 3000 & pks[, "rtmax"] <= 3300)) + pks <- chromPeaks(xod_x, mz = c(280, 300), rt = c(3000, 3300), + type = "any") + checkTrue(all(pks[, "mzmax"] >= 280 & pks[, "mzmin"] <= 300)) + checkTrue(all(pks[, "rtmax"] >= 3000 & pks[, "rtmin"] <= 3300)) ## Wrong assignments. pks <- xs_2@peaks pks[1, "sample"] <- 40 - checkException(features(xod) <- pks) - ## featureGroups - checkTrue(!hasAlignedFeatures(xod)) + checkException(chromPeaks(xod) <- pks) + ## featureDefinitions + checkTrue(!hasFeatures(xod)) library(S4Vectors) fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(xod) <- fd - checkTrue(hasDetectedFeatures(xod)) - checkTrue(hasAlignedFeatures(xod)) - checkEquals(featureGroups(xod), fd) + fd$peakidx <- xs_2@groupidx + featureDefinitions(xod) <- fd + checkTrue(hasChromPeaks(xod)) + checkTrue(hasFeatures(xod)) + checkEquals(featureDefinitions(xod), fd) ## adjustedRtime checkTrue(!hasAdjustedRtime(xod)) - adjustedRtime(xod) <- xs_2@rt$corrected - checkTrue(hasAdjustedRtime(xod)) - checkTrue(hasDetectedFeatures(xod)) - checkTrue(hasAlignedFeatures(xod)) - checkEquals(adjustedRtime(xod, bySample = TRUE), xs_2@rt$corrected) + xod2 <- xod + adjustedRtime(xod2) <- xs_2@rt$corrected + checkTrue(hasAdjustedRtime(xod2)) + checkTrue(hasChromPeaks(xod2)) + checkTrue(hasFeatures(xod2)) + checkEquals(adjustedRtime(xod2, bySample = TRUE), xs_2@rt$corrected) + ## The chromatographic peaks should be different to the unadjusted ones. + tmp <- chromPeaks(xod)[, "rt"] == chromPeaks(xod2)[, "rt"] + ## Most of the rts should be different + checkTrue(sum(tmp) < length(tmp)/4) + tmp <- chromPeaks(xod)[, "rtmin"] == chromPeaks(xod2)[, "rtmin"] + checkTrue(sum(tmp) < length(tmp)/4) + tmp <- chromPeaks(xod)[, "rtmax"] == chromPeaks(xod2)[, "rtmax"] + checkTrue(sum(tmp) < length(tmp)/4) + ## rtime should now also return adjusted retention times + checkEquals(rtime(xod2), adjustedRtime(xod2)) + checkEquals(rtime(xod2, adjusted = FALSE), rtime(as(xod2, "OnDiskMSnExp"))) + checkEquals(rtime(xod2, adjusted = FALSE), rtime(xod)) + checkEquals(rtime(xod2, adjusted = TRUE), adjustedRtime(xod2)) ## Indirect test that the ordering of the adjusted retention times matches ## ordering of rtime. - tmp <- unlist(adjustedRtime(xod, bySample = TRUE)) - tmp_diff <- tmp - rtime(xod) - tmp_diff_2 <- adjustedRtime(xod, bySample = FALSE) - rtime(xod) + tmp <- unlist(adjustedRtime(xod2, bySample = TRUE)) + tmp_diff <- tmp - rtime(xod2) + tmp_diff_2 <- adjustedRtime(xod2, bySample = FALSE) - rtime(xod2) checkTrue(max(tmp_diff) > max(tmp_diff_2)) - checkEquals(names(adjustedRtime(xod)), names(rtime(xod))) + checkEquals(names(adjustedRtime(xod2)), names(rtime(xod2))) ## Wrong assignments. - checkException(adjustedRtime(xod) <- xs_2@rt$corrected[1:2]) + checkException(adjustedRtime(xod2) <- xs_2@rt$corrected[1:2]) + ## bracket subset + tmp <- xod2[1] + checkTrue(length(tmp[[1]]) == 1) + checkTrue(length(xod2[[1]]) == 1) .checkCreationOfEmptyObject() } +test_XCMSnExp_findChromPeaks <- function() { + ## Call findChromPeaks on an XCMSnExp + tmp <- findChromPeaks(xod_x, param = CentWaveParam(noise = 10000, + snthresh = 40)) + checkEquals(chromPeaks(tmp), chromPeaks(xod_x)) +} + test_XCMSnExp_processHistory <- function() { ph <- xcms:::ProcessHistory(fileIndex. = 2, info. = "For file 2") @@ -167,141 +257,145 @@ test_XCMSnExp_processHistory <- function() { } test_XCMSnExp_droppers <- function() { - ##checkTrue(FALSE) + ## How are the drop functions expected to work? .checkCreationOfEmptyObject() - - res <- dropFeatures(od_x) - checkTrue(!hasDetectedFeatures(res)) - - ## Add grouping. - od_2 <- od_x - od_2 <- xcms:::addProcessHistory(od_2, - xcms:::ProcessHistory(fileIndex. = 1:3, - type = xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - library(S4Vectors) - fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(od_2) <- fd - ## Add retention time adjustment. - od_3 <- od_2 - od_3 <- xcms:::addProcessHistory(od_3, - xcms:::ProcessHistory(fileIndex. = 1:3, - type = xcms:::.PROCSTEP.RTIME.CORRECTION)) - adjustedRtime(od_3) <- xs_2@rt$corrected - ## and grouping - od_4 <- od_3 - od_4 <- xcms:::addProcessHistory(od_4, - xcms:::ProcessHistory(fileIndex. = 1:3, - type = xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - ## Tests - ## drop features: - res <- dropFeatures(od_2) - checkTrue(!hasDetectedFeatures(res)) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropFeatures(od_3) - checkTrue(!hasDetectedFeatures(res)) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropFeatures(od_4) - checkTrue(!hasDetectedFeatures(res)) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - ## Drop feature groups - ## adjusted retention times are always dropped. - res <- dropFeatureGroups(od_x) - checkTrue(!hasAlignedFeatures(res)) + type_feat_det <- xcms:::.PROCSTEP.PEAK.DETECTION + type_feat_algn <- xcms:::.PROCSTEP.PEAK.GROUPING + type_rt_adj <- xcms:::.PROCSTEP.RTIME.CORRECTION + ## Perform alignment. + ## xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) + checkTrue(hasFeatures(xod_xg)) + checkTrue(hasChromPeaks(xod_x)) + checkTrue(hasChromPeaks(xod_xg)) + checkTrue(!hasAdjustedRtime(xod_xg)) + checkTrue(length(processHistory(xod_xg, type = type_feat_algn)) == 1) + ## Retention time adjustment. + ## xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 1)) + checkTrue(hasChromPeaks(xod_xgr)) + checkTrue(length(processHistory(xod_xgr, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(xod_xgr)) ## These should have been removed + checkTrue(length(processHistory(xod_xgr, type = type_feat_algn)) == 1) + checkTrue(hasAdjustedRtime(xod_xgr)) + checkTrue(length(processHistory(xod_xgr, type = type_rt_adj)) == 1) + ## Most of the retention times are different + checkTrue(sum(chromPeaks(xod_xgr)[, "rt"] != chromPeaks(xod_x)[, "rt"]) > + nrow(chromPeaks(xod_x)) / 2) + checkTrue(sum(rtime(xod_xgr) == rtime(xod_xg)) < length(rtime(xod_xg) / 2)) + ## Alignment after retention time adjustment. + ## xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) + checkTrue(hasChromPeaks(xod_xgrg)) + checkEquals(chromPeaks(xod_xgrg), chromPeaks(xod_xgr)) + checkTrue(hasAdjustedRtime(xod_xgrg)) + checkEquals(rtime(xod_xgrg), rtime(xod_xgr)) + checkEquals(rtime(xod_xgrg, adjusted = FALSE), rtime(od_x)) + checkTrue(length(processHistory(xod_xgr, type = type_feat_algn)) == 1) + checkTrue(hasFeatures(xod_xgrg)) + checkTrue(length(processHistory(xod_xgrg, type = type_feat_algn)) == 2) + + ## 1) dropDetectedFeatures: delete all process history steps and all data. + res <- dropChromPeaks(xod_x) + checkTrue(!hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 0) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropFeatureGroups(od_2) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropFeatureGroups(od_3) - checkTrue(!hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(rtime(res), rtime(od_x)) + ## + res <- dropChromPeaks(xod_xg) + checkTrue(!hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 0) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropFeatureGroups(od_4) - checkTrue(!hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(rtime(res), rtime(od_x)) + ## + res <- dropChromPeaks(xod_xgr) + checkTrue(!hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 0) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - ## Drop aligned rtime - ## Feature alignments are only dropped if they were performed after the - ## retention time adjustments. - res <- dropAdjustedRtime(od_x) - checkTrue(!hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(rtime(res), rtime(od_x)) + ## + res <- dropChromPeaks(xod_xgrg) + checkTrue(!hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 0) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - res <- dropAdjustedRtime(od_2) - checkTrue(hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(rtime(res), rtime(od_x)) + + ## 2) dropFeatureDefinitions: + ## a) drop the feature groups and the latest related process history + ## b) if retention time correction was performed AFTER the latest feature + ## grouping, drop also the retention time correction and all related + ## process histories. + res <- dropFeatureDefinitions(xod_xg) + checkEquals(res, xod_x) + checkTrue(hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - ## Now dropping the adjusted retention time, but not the feature alignment. - res <- dropAdjustedRtime(od_3) - checkTrue(hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(rtime(res), rtime(od_x)) + ## No feature groups - so there is nothing that this function does here. + res <- dropFeatureDefinitions(xod_xgr) + checkEquals(res, xod_xgr) + checkTrue(hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 1) + checkTrue(hasAdjustedRtime(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 1) + ## Remove the latest ones. + res <- dropFeatureDefinitions(xod_xgrg) + checkEquals(res, xod_xgr) + checkTrue(hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 1) + checkTrue(hasAdjustedRtime(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 1) + checkEquals(rtime(res, adjusted = FALSE), rtime(od_x)) + checkEquals(rtime(res, adjusted = TRUE), rtime(xod_xgr)) + + ## 3) dropAdjustedRtime: + ## a) drop the retention time adjustment and related process histories + ## b) if grouping has been performed AFTER retention time correction, + ## drop the feature alignment and all related process histories. + ## c) if grouping has been performed BEFORE retention time correction, + ## do nothing. + res <- dropAdjustedRtime(xod_xg) + checkEquals(res, xod_xg) + ## This drops also the process history for alignment. + res <- dropAdjustedRtime(xod_xgr) + checkTrue(hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - checkTrue(hasAdjustedRtime(od_3)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) - - ## Drop adjusted retention time AND feature alignment. - res <- dropAdjustedRtime(od_4) - checkTrue(!hasAlignedFeatures(res)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(chromPeaks(res), chromPeaks(xod_x)) + checkEquals(res, xod_x) + checkEquals(rtime(res), rtime(xod_x)) + checkEquals(rtime(res), rtime(xod_xgr, adjusted = FALSE)) + ## This drops also the feature alignment performed later. + res <- dropAdjustedRtime(xod_xgrg) + checkTrue(hasChromPeaks(res)) + checkTrue(length(processHistory(res, type = type_feat_det)) == 1) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = type_feat_algn)) == 0) checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasDetectedFeatures(res)) - checkTrue(hasAdjustedRtime(od_4)) - types <- unlist(lapply(processHistory(res), processType)) - checkTrue(any(types == xcms:::.PROCSTEP.FEATURE.DETECTION)) - checkTrue(!any(types == xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - checkTrue(!any(types == xcms:::.PROCSTEP.RTIME.CORRECTION)) + checkTrue(length(processHistory(res, type = type_rt_adj)) == 0) + checkEquals(chromPeaks(res), chromPeaks(xod_x)) + checkEquals(res, xod_x) + checkEquals(rtime(res), rtime(xod_xgrg, adjusted = FALSE)) + .checkCreationOfEmptyObject() } @@ -314,90 +408,91 @@ test_XCMSnExp_inherited_methods <- function() { ## [ tmp_1 <- od_fa[1:10] suppressWarnings( - tmp_2 <- od_x[1:10] + tmp_2 <- xod_x[1:10] ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## bin tmp_1 <- bin(od_fa) suppressWarnings( - tmp_2 <- bin(od_x) + tmp_2 <- bin(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## clean tmp_1 <- clean(od_fa) suppressWarnings( - tmp_2 <- clean(od_x) + tmp_2 <- clean(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## filterAcquisitionNum tmp_1 <- filterAcquisitionNum(od_fa) suppressWarnings( - tmp_2 <- filterAcquisitionNum(od_x) + tmp_2 <- filterAcquisitionNum(xod_x) ) + checkTrue(length(tmp_2[[1]]) > 0) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## filterMsLevel tmp_1 <- filterMsLevel(od_fa) suppressWarnings( - tmp_2 <- filterMsLevel(od_x) + tmp_2 <- filterMsLevel(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## normalize tmp_1 <- normalize(od_fa) suppressWarnings( - tmp_2 <- normalize(od_x) + tmp_2 <- normalize(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## pickPeaks tmp_1 <- pickPeaks(od_fa) suppressWarnings( - tmp_2 <- pickPeaks(od_x) + tmp_2 <- pickPeaks(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## removePeaks tmp_1 <- removePeaks(od_fa) suppressWarnings( - tmp_2 <- removePeaks(od_x) + tmp_2 <- removePeaks(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) ## smooth tmp_1 <- smooth(od_fa) suppressWarnings( - tmp_2 <- smooth(od_x) + tmp_2 <- smooth(xod_x) ) checkTrue(length(processHistory(tmp_2)) == 0) - checkTrue(!hasDetectedFeatures(tmp_2)) + checkTrue(!hasChromPeaks(tmp_2)) tmp_1@processingData <- new("MSnProcess") tmp_2@processingData <- new("MSnProcess") checkEquals(tmp_1, as(tmp_2, "OnDiskMSnExp")) @@ -407,35 +502,46 @@ test_XCMSnExp_inherited_methods <- function() { ## Test XCMSnExp filter methods. test_XCMSnExp_filterFile <- function() { ## filterFile - tmp <- filterFile(od_x, file = 2) + tmp <- filterFile(xod_x, file = 2) checkException(tmp@msFeatureData$bla <- 3) checkTrue(!hasAdjustedRtime(tmp)) - checkTrue(!hasAlignedFeatures(tmp)) - checkTrue(all(features(tmp)[, "sample"] == 1)) - checkEquals(features(tmp)[, -ncol(features(tmp))], - features(od_x)[features(od_x)[, "sample"] == 2, - -ncol(features(od_x))]) + checkTrue(!hasFeatures(tmp)) + checkTrue(all(chromPeaks(tmp)[, "sample"] == 1)) + checkEquals(chromPeaks(tmp)[, -(ncol(chromPeaks(tmp)) - 1)], + chromPeaks(xod_x)[chromPeaks(xod_x)[, "sample"] == 2, + -(ncol(chromPeaks(xod_x)) - 1)]) checkEquals(fileIndex(processHistory(tmp)[[1]]), 1) ## check with other index. - tmp <- filterFile(od_x, file = c(1, 3)) + tmp <- filterFile(xod_x, file = c(1, 3)) + checkTrue(length(tmp[[1]]) == 1) checkTrue(!hasAdjustedRtime(tmp)) - checkTrue(!hasAlignedFeatures(tmp)) - checkTrue(all(features(tmp)[, "sample"] %in% c(1, 2))) - a <- features(tmp) - b <- features(od_x) - checkEquals(a[, -ncol(a)], b[b[, "sample"] %in% c(1, 3), -ncol(b)]) + checkTrue(!hasFeatures(tmp)) + checkTrue(all(chromPeaks(tmp)[, "sample"] %in% c(1, 2))) + a <- chromPeaks(tmp) + b <- chromPeaks(xod_x) + checkEquals(a[, -(ncol(a) - 1)], + b[b[, "sample"] %in% c(1, 3), -(ncol(b) - 1)]) checkEquals(fileIndex(processHistory(tmp)[[1]]), c(1, 2)) ## Errors - checkException(filterFile(od_x, file = 5)) - checkException(filterFile(od_x, file = 1:5)) + checkException(filterFile(xod_x, file = 5)) + checkException(filterFile(xod_x, file = 1:5)) ## Little mockup to check correctness of Process history. - od_2 <- od_x - od_2 <- xcms:::addProcessHistory(od_2, - xcms:::ProcessHistory(type = xcms:::.PROCSTEP.RTIME.CORRECTION)) - od_2 <- xcms:::addProcessHistory(od_2, xcms:::ProcessHistory(type = xcms:::.PROCSTEP.UNKNOWN, fileIndex = 2, info. = "I should be here")) - od_2 <- xcms:::addProcessHistory(od_2, xcms:::ProcessHistory(type = xcms:::.PROCSTEP.UNKNOWN, fileIndex = 1, info. = "EEEEEE")) + od_2 <- xod_x + od_2 <- xcms:::addProcessHistory( + od_2, + xcms:::ProcessHistory( + type = xcms:::.PROCSTEP.RTIME.CORRECTION)) + od_2 <- xcms:::addProcessHistory( + od_2, + xcms:::ProcessHistory(type = xcms:::.PROCSTEP.UNKNOWN, + fileIndex = 2, + info. = "I should be here")) + od_2 <- xcms:::addProcessHistory( + od_2, + xcms:::ProcessHistory(type = xcms:::.PROCSTEP.UNKNOWN, + fileIndex = 1, info. = "EEEEEE")) tmp <- filterFile(od_2, file = 2) ph <- processHistory(tmp) @@ -449,175 +555,344 @@ test_XCMSnExp_filterFile <- function() { processInfo(z) == "EEEEEE" })) checkTrue(!any(b)) + ## Do filterFile on xod_xg + res <- filterFile(xod_xg, file = 2) + checkTrue(hasChromPeaks(res)) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + tmp <- chromPeaks(xod_xg) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + checkEquals(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) + ## Do filterFile on xod_xgr + ## Should remove adjusted rts and revert the original peak rts. + res <- filterFile(xod_xgr, file = 2) + checkTrue(hasChromPeaks(res)) + tmp <- chromPeaks(xod_xg) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + checkEquals(rtime(res), rtime(xod_xg, bySample = TRUE)[[2]]) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 1) + checkEquals(processType(processHistory(res)[[1]]), "Peak detection") + ## The same but keep the adjusted retention times. + res <- filterFile(xod_xgr, file = 2, keepAdjustedRtime = TRUE) + checkTrue(hasChromPeaks(res)) + tmp <- chromPeaks(xod_xgr) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] == 2, -(ncol(tmp) - 1)]) + ## has to be different from the ones in xod_x + tmp <- chromPeaks(xod_x) + checkTrue(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] == 2, "rt"]) < + nrow(tmp) / 4) + checkEquals(rtime(res), rtime(xod_xgr, bySample = TRUE)[[2]]) + checkEquals(adjustedRtime(res), adjustedRtime(xod_xgr, bySample = TRUE)[[2]]) + checkTrue(hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 2) + checkEquals(processType(processHistory(res)[[1]]), "Peak detection") + checkEquals(processType(processHistory(res)[[2]]), "Retention time correction") + ## Do filterFile on xod_xgrg + res <- filterFile(xod_xgrg, file = c(1, 3)) + checkTrue(hasChromPeaks(res)) + tmp <- chromPeaks(xod_x) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) + checkEquals(unname(rtime(res, bySample = TRUE)), + unname(rtime(xod_xg, bySample = TRUE)[c(1, 3)])) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 1) + checkEquals(processType(processHistory(res)[[1]]), "Peak detection") + ## keep adjusted rtime + res <- filterFile(xod_xgrg, file = c(1, 3), keepAdjustedRtime = TRUE) + checkTrue(hasChromPeaks(res)) + tmp <- chromPeaks(xod_xgr) + checkEquals(chromPeaks(res)[, -(ncol(tmp) - 1)], + tmp[tmp[, "sample"] %in% c(1, 3), -(ncol(tmp) - 1)]) + ## has to be different from the ones in xod_x + tmp <- chromPeaks(xod_x) + checkTrue(sum(chromPeaks(res)[, "rt"] == tmp[tmp[, "sample"] %in% c(1, 3), "rt"]) < + nrow(tmp) / 4) + checkEquals(rtime(res, bySample = TRUE), + rtime(xod_xgr, bySample = TRUE)[c(1, 3)]) + checkEquals(adjustedRtime(res, bySample = TRUE), + adjustedRtime(xod_xgr, bySample = TRUE)[c(1, 3)]) + checkTrue(hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 2) + checkEquals(processType(processHistory(res)[[1]]), "Peak detection") + checkEquals(processType(processHistory(res)[[2]]), "Retention time correction") } test_XCMSnExp_filterMz <- function() { - od_x2 <- od_x - new_e <- xcms:::.copy_env(od_x2@msFeatureData) - xcms:::adjustedRtime(od_x2) <- xs_2@rt$corrected - library(S4Vectors) - fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(od_x2) <- fd + ## od_x2 <- od_x + ## new_e <- xcms:::.copy_env(od_x2@msFeatureData) + ## xcms:::adjustedRtime(od_x2) <- xs_2@rt$corrected + ## library(S4Vectors) + ## fd <- DataFrame(xs_2@groups) + ## fd$peakidx <- xs_2@groupidx + ## featureDefinitions(od_x2) <- fd - ## Subset - tmp <- filterMz(od_x2, mz = c(300, 400)) - checkException(tmp@msFeatureData$bla <- 3) - checkTrue(length(tmp@spectraProcessingQueue) == 1) - checkTrue(all(features(tmp)[, "mz"] >= 300 & features(tmp)[, "mz"] <= 400)) - checkTrue(validObject(tmp@msFeatureData)) - checkTrue(all(featureGroups(tmp)$mzmed >= 300 & - featureGroups(tmp)$mzmed <= 400)) - checkEquals(adjustedRtime(tmp), adjustedRtime(od_x2)) - checkTrue(nrow(features(tmp)) < nrow(features(od_x2))) - checkTrue(hasDetectedFeatures(tmp)) - checkTrue(hasAlignedFeatures(tmp)) - ## second - tmp <- filterMz(od_x, mz = c(300, 400)) - checkTrue(hasDetectedFeatures(tmp)) - checkTrue(!hasAlignedFeatures(tmp)) - checkTrue(all(features(tmp)[, "mz"] >= 300 & features(tmp)[, "mz"] <= 400)) - checkTrue(validObject(tmp@msFeatureData)) + ## subset on xod_x + res <- filterMz(xod_x, mz = c(300, 400)) + suppressWarnings( + checkTrue(length(res[[1]]) == 1) + ) + checkTrue(length(res@spectraProcessingQueue) == 1) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 300 & chromPeaks(res)[, "mz"] <= 400)) + checkTrue(nrow(chromPeaks(res)) < nrow(chromPeaks(xod_x))) + idx <- which(chromPeaks(xod_x)[, "mzmin"] >= 300 & + chromPeaks(xod_x)[, "mzmax"] <= 400) + checkEquals(chromPeaks(res), chromPeaks(xod_x)[idx, ]) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + ## subset on xod_xg + res <- filterMz(xod_xg, mz = c(300, 400)) + checkTrue(validObject(res)) + suppressWarnings( + checkTrue(length(res[[1]]) == 1) + ) + checkTrue(length(res@spectraProcessingQueue) == 1) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 300 & chromPeaks(res)[, "mz"] <= 400)) + checkTrue(nrow(chromPeaks(res)) < nrow(chromPeaks(xod_x))) + idx <- which(chromPeaks(xod_xg)[, "mzmin"] >= 300 & + chromPeaks(xod_xg)[, "mzmax"] <= 400) + checkEquals(chromPeaks(res), chromPeaks(xod_xg)[idx, ]) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(hasFeatures(res)) + checkTrue(nrow(featureDefinitions(res)) < nrow(featureDefinitions(xod_xg))) + checkTrue(all(featureDefinitions(res)[, "mzmed"] >= 300 & + featureDefinitions(res)[, "mzmed"] <= 400)) + checkTrue(all(featureDefinitions(res)[, "mzmin"] >= 300 & + featureDefinitions(res)[, "mzmin"] <= 400)) + checkTrue(all(featureDefinitions(res)[, "mzmax"] >= 300 & + featureDefinitions(res)[, "mzmax"] <= 400)) + ## subset on xod_xgr + ## o keep chromPeaks + ## o keep adjusted rtime + res <- filterMz(xod_xgr, mz = c(300, 400)) + checkTrue(validObject(res)) + suppressWarnings( + checkTrue(length(res[[1]]) == 1) + ) + checkTrue(length(res@spectraProcessingQueue) == 1) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 300 & chromPeaks(res)[, "mz"] <= 400)) + checkTrue(nrow(chromPeaks(res)) < nrow(chromPeaks(xod_x))) + idx <- which(chromPeaks(xod_xgr)[, "mzmin"] >= 300 & + chromPeaks(xod_xgr)[, "mzmax"] <= 400) + checkEquals(chromPeaks(res), chromPeaks(xod_xgr)[idx, ]) + checkTrue(hasAdjustedRtime(res)) + checkEquals(adjustedRtime(res), adjustedRtime(xod_xgr)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 3) + ## subset xod_xgrg + res <- filterMz(xod_xgrg, mz = c(300, 400)) + checkTrue(validObject(res)) + suppressWarnings( + checkTrue(length(res[[1]]) == 1) + ) + checkTrue(length(res@spectraProcessingQueue) == 1) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 300 & chromPeaks(res)[, "mz"] <= 400)) + checkTrue(nrow(chromPeaks(res)) < nrow(chromPeaks(xod_xgrg))) + idx <- which(chromPeaks(xod_xgrg)[, "mzmin"] >= 300 & + chromPeaks(xod_xgrg)[, "mzmax"] <= 400) + checkEquals(chromPeaks(res), chromPeaks(xod_xgrg)[idx, ]) + checkTrue(hasAdjustedRtime(res)) + checkEquals(adjustedRtime(res), adjustedRtime(xod_xgrg)) + checkTrue(hasFeatures(res)) + checkTrue(nrow(featureDefinitions(res)) < nrow(featureDefinitions(xod_xgrg))) + checkTrue(all(featureDefinitions(res)[, "mzmed"] >= 300 & + featureDefinitions(res)[, "mzmed"] <= 400)) + checkTrue(all(featureDefinitions(res)[, "mzmin"] >= 300 & + featureDefinitions(res)[, "mzmin"] <= 400)) + checkTrue(all(featureDefinitions(res)[, "mzmax"] >= 300 & + featureDefinitions(res)[, "mzmax"] <= 400)) + ## With groups - no groups within this range + mzr <- c(120, 130) + res <- filterMz(xod_xg, mz = mzr) + checkTrue(!hasFeatures(res)) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 120 & chromPeaks(res)[, "mz"] <= 130)) + res <- filterMz(xod_xgrg, mz = mzr) + checkTrue(!hasFeatures(res)) + checkTrue(hasChromPeaks(res)) + checkTrue(all(chromPeaks(res)[, "mz"] >= 120 & chromPeaks(res)[, "mz"] <= 130)) } test_XCMSnExp_filterRt <- function() { - od_x2 <- od_x - ## Testing with only feature data present: - res <- filterRt(od_x2, rt = c(2700, 2900)) + ## xod_x + res <- filterRt(xod_x, rt = c(2700, 2900)) + ## Check if the object is OK: + checkEquals(pData(res), pData(xod_x)) + spct <- spectra(res) + checkTrue(length(spct) > 0) ## MsFeatureData has to be locked! checkException(res@msFeatureData$bla <- 3) ## Retention time has to be within the range. checkTrue(all(rtime(res) >= 2700 & rtime(res) <= 2900)) - ## features have to be within the range. - checkTrue(all(features(res)[, "rtmin"] >= 2700 & - features(res)[, "rtmax"] <= 2900)) - ## features have to match the subsetted ones. - are_within <- features(od_x2)[, "rtmin"] >= 2700 & - features(od_x2)[, "rtmax"] <= 2900 - checkEquals(features(res), features(od_x2)[are_within,]) + rtm <- unlist(lapply(spct, rtime)) + checkTrue(all(rtm >= 2700 & rtm <= 2900)) + ## peaks have to be within the range. + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_x)[, "rt"] >= 2700 & + chromPeaks(xod_x)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_x)[are_within,]) ## Have a feature detection process history. + checkEquals(length(processHistory(res)), 1) checkEquals(processType(processHistory(res)[[1]]), - xcms:::.PROCSTEP.FEATURE.DETECTION) - ## filter such that we keep some spectra but no features: - res <- filterRt(od_x2, rt = c(4200, 4400)) + xcms:::.PROCSTEP.PEAK.DETECTION) + ## filter such that we keep some spectra but no chromPeaks: + res <- filterRt(xod_x, rt = c(4200, 4400)) checkTrue(all(rtime(res) >= 4200 & rtime(res) <= 4400)) - checkTrue(!hasDetectedFeatures(res)) + checkTrue(!hasChromPeaks(res)) checkTrue(length(processHistory(res)) == 0) ## No rt - res <- filterRt(od_x2, rt = c(10, 20)) + res <- filterRt(xod_x, rt = c(10, 20)) checkTrue(length(res) == 0) - ## With adjusted retention times. - ## new_e <- xcms:::.copy_env(od_x2@msFeatureData) - od_x2 <- od_x - xcms:::adjustedRtime(od_x2) <- xs_2@rt$corrected - od_x2 <- xcms:::addProcessHistory(od_x2, - xcms:::ProcessHistory( - type. = xcms:::.PROCSTEP.RTIME.CORRECTION, - date. = date(), - fileIndex. = 1:length(fileNames(od_x2)))) - ## Filtering for rt with no features drops also the adjusted rt. - res <- filterRt(od_x2, rt = c(4200, 4400)) - checkTrue(!hasDetectedFeatures(res)) + ## xod_xg + ## o keep also the feature groups that are within the window. + res <- filterRt(xod_xg, rt = c(2700, 2900)) + checkTrue(all(rtime(res) >= 2700 & rtime(res) <= 2900)) + checkEquals(hasChromPeaks(res), hasChromPeaks(xod_xg)) + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_x)[, "rt"] >= 2700 & + chromPeaks(xod_x)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_xg)[are_within,]) checkTrue(!hasAdjustedRtime(res)) + checkTrue(hasFeatures(res)) + checkTrue(all(featureDefinitions(res)$rtmed >= 2700 & + featureDefinitions(res)$rtmed <= 2900)) + checkTrue(nrow(featureDefinitions(res)) < nrow(featureDefinitions(xod_xg))) + checkTrue(length(processHistory(res)) == 2) + checkTrue(length(processHistory(res, type = "Peak detection")) == 1) + checkTrue(length(processHistory(res, type = "Peak grouping")) == 1) + ## All feature idx have to match. + checkTrue(all(unlist(featureDefinitions(res)$peakidx) %in% + 1:nrow(chromPeaks(res)))) + ## Filter such that we don't have any chromPeaks. + res <- filterRt(xod_xg, rt = c(4200, 4400)) + checkTrue(all(rtime(res) >= 4200 & rtime(res) <= 4400)) + checkTrue(!hasChromPeaks(res)) + checkTrue(!hasFeatures(res)) checkTrue(length(processHistory(res)) == 0) - ## Correct filtering: - res <- filterRt(od_x2, rt = c(2700, 2900)) - checkEquals(processType(processHistory(res)[[2]]), - xcms:::.PROCSTEP.RTIME.CORRECTION) - checkTrue(hasDetectedFeatures(res)) + ## No rt + res <- filterRt(xod_xg, rt = c(10, 20)) + checkTrue(length(res) == 0) + + ## xod_xgr + res <- filterRt(xod_xgr, rt = c(2700, 2900)) + checkTrue(all(rtime(res) >= 2700 & rtime(res) <= 2900)) + checkEquals(hasChromPeaks(res), hasChromPeaks(xod_xg)) + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_xgr)[, "rt"] >= 2700 & + chromPeaks(xod_xgr)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_xgr)[are_within,]) checkTrue(hasAdjustedRtime(res)) - keep_em <- rtime(od_x2) >= 2700 & rtime(od_x2) <= 2900 - checkEquals(rtime(res), rtime(od_x2)[keep_em]) - checkEquals(adjustedRtime(res), adjustedRtime(od_x2)[keep_em]) - ## Filter using adjusted retention times. - res_2 <- filterRt(od_x2, rt = c(2700, 2900), adjusted = TRUE) - checkEquals(processType(processHistory(res)[[2]]), - xcms:::.PROCSTEP.RTIME.CORRECTION) - checkTrue(hasDetectedFeatures(res)) + checkTrue(all(adjustedRtime(res) >= 2700 & adjustedRtime(res) <= 2900)) + checkTrue(!all(rtime(res, adjusted = FALSE) >= 2700 & + rtime(res, adjusted = FALSE) <= 2900)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res, type = "Peak detection")) == 1) + checkTrue(length(processHistory(res, type = "Peak grouping")) == 1) + checkTrue(length(processHistory(res, type = "Retention time correction")) == 1) + ## Filter such that we don't have any chromPeaks. + res <- filterRt(xod_xgr, rt = c(4200, 4400), adjusted = TRUE) checkTrue(hasAdjustedRtime(res)) - ## That might not be true anymore. - checkTrue(!all(rtime(res_2) >= 2700 & rtime(res_2) <= 2900)) - checkTrue(all(adjustedRtime(res_2) >= 2700 & adjustedRtime(res_2) <= 2900)) - keep_em <- adjustedRtime(od_x2) >= 2700 & adjustedRtime(od_x2) <= 2900 - checkEquals(rtime(res_2), rtime(od_x2)[keep_em]) - checkEquals(adjustedRtime(res_2), adjustedRtime(od_x2)[keep_em]) - - ## Grouping with adjusted retention time. - library(S4Vectors) - fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(od_x2) <- fd - od_x2 <- xcms:::addProcessHistory(od_x2, - xcms:::ProcessHistory( - type. = xcms:::.PROCSTEP.FEATURE.ALIGNMENT, - date. = date(), - fileIndex. = 1:length(fileNames(od_x2)))) - checkTrue(hasDetectedFeatures(od_x2)) - checkTrue(hasAdjustedRtime(od_x2)) - checkTrue(hasAlignedFeatures(od_x2)) - ## empty - res <- filterRt(od_x2, rt = c(4200, 4400)) - checkTrue(!hasDetectedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(length(processHistory(res)) == 0) - ## - res <- filterRt(od_x2, rt = c(2700, 2900)) - checkEquals(processType(processHistory(res)[[3]]), - xcms:::.PROCSTEP.FEATURE.ALIGNMENT) - checkTrue(hasDetectedFeatures(res)) - checkTrue(all(features(res)[, "rtmin"] >= 2700 & - features(res)[, "rtmax"] <= 2900)) + checkTrue(all(adjustedRtime(res) >= 4200 & adjustedRtime(res) <= 4400)) + checkTrue(all(rtime(res) >= 4200 & rtime(res) <= 4400)) + checkTrue(!all(rtime(res, adjusted = FALSE) >= 4200 & + rtime(res, adjusted = FALSE) <= 4400)) + checkTrue(!hasChromPeaks(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 1) + checkTrue(length(processHistory(res, type = "Retention time correction")) == 1) + ## No rt + res <- filterRt(xod_xgr, rt = c(10, 20)) + checkTrue(length(res) == 0) + ## filter using raw rt + res <- filterRt(xod_xgr, rt = c(2700, 2900), adjusted = FALSE) + checkTrue(!all(rtime(res) >= 2700 & rtime(res) <= 2900)) + checkEquals(hasChromPeaks(res), hasChromPeaks(xod_xg)) + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_xgr)[, "rt"] >= 2700 & + chromPeaks(xod_xgr)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_xgr)[are_within,]) checkTrue(hasAdjustedRtime(res)) - keep_em <- rtime(od_x2) >= 2700 & rtime(od_x2) <= 2900 - checkEquals(rtime(res), rtime(od_x2)[keep_em]) - checkEquals(adjustedRtime(res), adjustedRtime(od_x2)[keep_em]) - checkTrue(hasAlignedFeatures(res)) - checkTrue(all(featureGroups(res)[, "rtmin"] >= 2700 & - featureGroups(res)[, "rtmax"] <= 2900)) - validObject(res) - - ## Grouping without adjusted retention time. - od_x2 <- od_x - fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(od_x2) <- fd - od_x2 <- xcms:::addProcessHistory(od_x2, - xcms:::ProcessHistory( - type. = xcms:::.PROCSTEP.FEATURE.ALIGNMENT, - date. = date(), - fileIndex. = 1:length(fileNames(od_x2)))) - checkTrue(hasDetectedFeatures(od_x2)) - checkTrue(!hasAdjustedRtime(od_x2)) - checkTrue(hasAlignedFeatures(od_x2)) - ## empty - res <- filterRt(od_x2, rt = c(4200, 4400)) - checkTrue(!hasDetectedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - checkTrue(!hasAlignedFeatures(res)) - checkTrue(length(processHistory(res)) == 0) - ## - res <- filterRt(od_x2, rt = c(2700, 2900)) - checkEquals(processType(processHistory(res)[[2]]), - xcms:::.PROCSTEP.FEATURE.ALIGNMENT) - checkTrue(hasDetectedFeatures(res)) - checkTrue(all(features(res)[, "rtmin"] >= 2700 & - features(res)[, "rtmax"] <= 2900)) - checkTrue(!hasAdjustedRtime(res)) - checkTrue(hasAlignedFeatures(res)) - checkTrue(all(featureGroups(res)[, "rtmin"] >= 2700 & - featureGroups(res)[, "rtmax"] <= 2900)) - validObject(res) + checkTrue(!all(adjustedRtime(res) >= 2700 & adjustedRtime(res) <= 2900)) + checkTrue(all(rtime(res, adjusted = FALSE) >= 2700 & + rtime(res, adjusted = FALSE) <= 2900)) + checkTrue(!hasFeatures(res)) + + ## xod_xgrg + res <- filterRt(xod_xgrg, rt = c(2700, 2900)) + checkTrue(all(rtime(res) >= 2700 & rtime(res) <= 2900)) + checkEquals(hasChromPeaks(res), hasChromPeaks(xod_xg)) + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_xgrg)[, "rt"] >= 2700 & + chromPeaks(xod_xgr)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_xgrg)[are_within,]) + checkTrue(hasAdjustedRtime(res)) + checkTrue(all(adjustedRtime(res) >= 2700 & adjustedRtime(res) <= 2900)) + checkTrue(!all(rtime(res, adjusted = FALSE) >= 2700 & + rtime(res, adjusted = FALSE) <= 2900)) + checkTrue(length(processHistory(res, type = "Peak detection")) == 1) + checkTrue(length(processHistory(res, type = "Peak grouping")) == 2) + checkTrue(length(processHistory(res, type = "Retention time correction")) == 1) + checkTrue(hasFeatures(res)) + checkTrue(all(featureDefinitions(res)$rtmed >= 2700 & + featureDefinitions(res)$rtmed <= 2900)) + ## Filter such that we don't have any chromPeaks. + res <- filterRt(xod_xgrg, rt = c(4200, 4400), adjusted = TRUE) + checkTrue(hasAdjustedRtime(res)) + checkTrue(all(adjustedRtime(res) >= 4200 & adjustedRtime(res) <= 4400)) + checkTrue(all(rtime(res) >= 4200 & rtime(res) <= 4400)) + checkTrue(!all(rtime(res, adjusted = FALSE) >= 4200 & + rtime(res, adjusted = FALSE) <= 4400)) + checkTrue(!hasChromPeaks(res)) + checkTrue(!hasFeatures(res)) + checkTrue(length(processHistory(res)) == 1) + checkTrue(length(processHistory(res, type = "Retention time correction")) == 1) + ## No rt + res <- filterRt(xod_xgrg, rt = c(10, 20)) + checkTrue(length(res) == 0) + ## filter using raw rt + res <- filterRt(xod_xgrg, rt = c(2700, 2900), adjusted = FALSE) + checkTrue(!all(rtime(res) >= 2700 & rtime(res) <= 2900)) + checkEquals(hasChromPeaks(res), hasChromPeaks(xod_xg)) + checkTrue(all(chromPeaks(res)[, "rt"] >= 2700 & + chromPeaks(res)[, "rt"] <= 2900)) + are_within <- chromPeaks(xod_xgrg)[, "rt"] >= 2700 & + chromPeaks(xod_xgrg)[, "rt"] <= 2900 + checkEquals(chromPeaks(res), chromPeaks(xod_xgrg)[are_within,]) + checkTrue(hasAdjustedRtime(res)) + checkTrue(!all(adjustedRtime(res) >= 2700 & adjustedRtime(res) <= 2900)) + checkTrue(all(rtime(res, adjusted = FALSE) >= 2700 & + rtime(res, adjusted = FALSE) <= 2900)) + checkTrue(hasFeatures(res)) + checkTrue(all(featureDefinitions(res)$rtmed >= 2700 & + featureDefinitions(res)$rtmed <= 2900)) } ## Test the coercion method. test_as_XCMSnExp_xcmsSet <- function() { + od_x <- faahko_xod res <- xcms:::.XCMSnExp2xcmsSet(od_x) res <- as(od_x, "xcmsSet") ## Results should be the same as in xs. - checkEquals(res@peaks, features(od_x)) + checkEquals(res@peaks, chromPeaks(od_x)) checkEquals(res@.processHistory, processHistory(od_x)) checkEquals(phenoData(res), pData(od_x)) checkEquals(filepaths(res), fileNames(od_x)) @@ -627,35 +902,32 @@ test_as_XCMSnExp_xcmsSet <- function() { checkEquals(profStep(res), 0.1) ## Can we further process this? sampclass(res) <- rep("K", 3) - res <- group(res) - res <- fillPeaks(res) + res <- group.density(res, minfrac = 0.5) + ## res <- fillPeaks(res) ## Add groups. - od_2 <- od_x - od_2 <- xcms:::addProcessHistory(od_2, - xcms:::ProcessHistory(fileIndex. = 1:3, - type = xcms:::.PROCSTEP.FEATURE.ALIGNMENT)) - library(S4Vectors) - fd <- DataFrame(xs_2@groups) - fd$featureidx <- xs_2@groupidx - featureGroups(od_2) <- fd - ## Add retention time adjustment. - od_3 <- od_2 - od_3 <- xcms:::addProcessHistory(od_3, - xcms:::ProcessHistory(fileIndex. = 1:3, - type = xcms:::.PROCSTEP.RTIME.CORRECTION)) - adjustedRtime(od_3) <- xs_2@rt$corrected + od_2 <- groupChromPeaks(od_x, param = PeakDensityParam()) + checkEquals(unname(featureDefinitions(od_2)$peakidx), groupidx(res)) + ## rt correction + od_3 <- adjustRtime(od_2, param = PeakGroupsParam(minFraction = 1, + span = 0.4)) ## With groups. res <- as(od_2, "xcmsSet") - checkEquals(res@groups, xs_2@groups) - checkEquals(res@groupidx, xs_2@groupidx) + ftDef <- featureDefinitions(od_2)[, -ncol(featureDefinitions(od_2))] + ftDef <- S4Vectors::as.matrix(ftDef) + rownames(ftDef) <- NULL + checkEquals(res@groups, ftDef) + checkEquals(res@groupidx, unname(featureDefinitions(od_2)$peakidx)) ## With adjusted retention time. + res_2 <- retcor.peakgroups(res, missing = 0, span = 0.4) res <- as(od_3, "xcmsSet") checkTrue(any(unlist(res@rt$raw) != unlist(res@rt$corrected))) - checkEquals(res@rt$corrected, xs_2@rt$corrected) - + checkEquals(res@rt$corrected, res_2@rt$corrected) + checkEquals(chromPeaks(od_3), peaks(res)) + checkEquals(peaks(res_2), peaks(res)) + ## Test with different binning methods: ## o binlin mfp <- MatchedFilterParam(impute = "lin", binSize = 3) @@ -681,76 +953,282 @@ test_MsFeatureData_class_validation <- function() { fd$a <- 5 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) rm("a", envir = fd) - ## Check features - fd$features <- 4 + ## Check chromPeaks + fd$chromPeaks <- 4 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) fdm <- matrix(ncol = 3, nrow = 5) colnames(fdm) <- c("a", "b", "sample") - fd$features <- fdm + fd$chromPeaks <- fdm checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) - rm("features", envir = fd) - ## featureGroups - fd$features <- xs_2@peaks - fd$featureGroups <- 4 + rm("chromPeaks", envir = fd) + ## featureDefinitions + fd$chromPeaks <- xs_2@peaks + fd$featureDefinitions <- 4 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) fg <- DataFrame(fdm) - fd$featureGroups <- fg + fd$featureDefinitions <- fg checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) fg <- DataFrame(xs_2@groups) - fg$featureidx <- xs_2@groupidx + fg$peakidx <- xs_2@groupidx fg_2 <- fg fg_2$mzmin <- "a" - fd$featureGroups <- fg_2 + fd$featureDefinitions <- fg_2 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) fg_2 <- fg - fg_2$featureidx[[1]] <- c(50000, 3) - fd$featureGroups <- fg_2 + fg_2$peakidx[[1]] <- c(50000, 3) + fd$featureDefinitions <- fg_2 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) ## adjustedRtime - fd$featureGroups <- fg + fd$featureDefinitions <- fg fd$adjustedRtime <- 4 checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) fd$adjustedRtime <- list(1:5, "b") checkTrue(!is.logical(xcms:::validateMsFeatureData(fd))) ## Now check that we pass if we put all correct data into the object: fd <- new("MsFeatureData") - fd$features <- xs_2@peaks - checkTrue(xcms:::validateMsFeatureData(fd)) + fd$chromPeaks <- xs_2@peaks + checkTrue(length(xcms:::validateMsFeatureData(fd)) == 0) fd$adjustedRtime <- xs_2@rt$corrected - checkTrue(xcms:::validateMsFeatureData(fd)) + checkTrue(length(xcms:::validateMsFeatureData(fd)) == 0) fg <- DataFrame(xs_2@groups) - fg$featureidx <- xs_2@groupidx - checkTrue(xcms:::validateMsFeatureData(fd)) + fg$peakidx <- xs_2@groupidx + checkTrue(length(xcms:::validateMsFeatureData(fd)) == 0) } test_MsFeatureData_class_accessors <- function() { fd <- new("MsFeatureData") library(S4Vectors) - checkTrue(!hasDetectedFeatures(fd)) + checkTrue(!hasChromPeaks(fd)) checkTrue(!hasAdjustedRtime(fd)) - checkTrue(!hasAlignedFeatures(fd)) - suppressWarnings(checkEquals(features(fd), NULL)) - suppressWarnings(checkEquals(featureGroups(fd), NULL)) + checkTrue(!hasFeatures(fd)) + suppressWarnings(checkEquals(chromPeaks(fd), NULL)) + suppressWarnings(checkEquals(featureDefinitions(fd), NULL)) suppressWarnings(checkEquals(adjustedRtime(fd), NULL)) - ## features - features(fd) <- xs_2@peaks - checkTrue(hasDetectedFeatures(fd)) - checkEquals(features(fd), xs_2@peaks) - ## featureGroups + ## chromPeaks + chromPeaks(fd) <- xs_2@peaks + checkTrue(hasChromPeaks(fd)) + checkEquals(chromPeaks(fd), xs_2@peaks) + ## featureDefinitions fg <- DataFrame(xs_2@groups) - fg$featureidx <- xs_2@groupidx - featureGroups(fd) <- fg - checkTrue(hasAlignedFeatures(fd)) - checkEquals(featureGroups(fd), fg) + fg$peakidx <- xs_2@groupidx + featureDefinitions(fd) <- fg + checkTrue(hasFeatures(fd)) + checkEquals(featureDefinitions(fd), fg) ## adjustedRtime adjustedRtime(fd) <- xs_2@rt$corrected checkTrue(hasAdjustedRtime(fd)) checkEquals(adjustedRtime(fd), xs_2@rt$corrected) } + +## Test extraction of chromatograms. +test_extractChromatograms <- function() { + ## Have: od_x: OnDiskMSNnExp + ## xod_x: XCMSnExp, with detected chromPeaks. + ## xod_xg: with feature groups. + ## xod_xgr: with adjusted retention times (no feature groups) + ## xod_xgrg: adjusted rt and feature groups. + + ## XCMSnExp: TIC - can NOT compare with the reported TIC, as that is + ## different! Eventually some background adjustment performed? + ## BPC - CDF don't habe a BPC. + rtr <- c(2600, 2700) + res <- xcms:::extractChromatograms(xod_x, aggregationFun = "max", rt = rtr) + checkTrue(all(rtime(res[[1]]) >= rtr[1])) + checkTrue(all(rtime(res[[1]]) <= rtr[2])) + checkTrue(all(rtime(res[[2]]) >= rtr[1])) + checkTrue(all(rtime(res[[2]]) <= rtr[2])) + checkTrue(all(rtime(res[[3]]) >= rtr[1])) + checkTrue(all(rtime(res[[3]]) <= rtr[2])) + tmp <- filterRt(filterFile(xod_x, file = 2), rt = rtr) + checkEquals(rtime(tmp), rtime(res[[2]])) + ints <- spectrapply(tmp, function(z) return(max(intensity(z)))) + checkEquals(unlist(ints), intensity(res[[2]])) + ## Check names + checkEquals(names(rtime(res[[1]])), names(intensity(res[[1]]))) + ## Assure we get the same with an OnDiskMSnExp and grouped XCMSnExp + res_2 <- xcms:::extractChromatograms(od_x, aggregationFun = "max", rt = rtr) + checkEquals(res, res_2) + res_3 <- xcms:::extractChromatograms(xod_xg, aggregationFun = "max", rt = rtr) + checkEquals(res, res_3) + + ## XCMSnExp: with mzrange and rtrange: + mzr <- c(120, 130) + tmp <- filterMz(xod_xg, mz = mzr) + featureDefinitions(tmp) + tmp <- filterRt(xod_xg, rt = rtr) + featureDefinitions(tmp) + res_2 <- xcms:::extractChromatograms(xod_xg, rt = rtr, mz = mzr) + ## + + ## XCMSnExp with adjusted rtime +} + +test_signal_integration <- function() { + ## Testing the signal integration of peaks. + ## For centWave + tmp <- xod_xgrg + rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[[1]]) * + ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) + + tmp <- filterFile(xod_xgrg, file = 2) + idxs <- sample(1:nrow(chromPeaks(tmp)), 5) + ## Now, for i = 20, for 6 rt I got an NA. Should I remove these measurements? + ## idxs <- 1:nrow(chromPeaks(tmp)) + for (i in idxs) { + rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + ints <- intensity(chr) + pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) + ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") + checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) + } + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + + ## Now for matchedfilter. + tmp <- findChromPeaks(filterFile(od_x, 2), param = MatchedFilterParam()) + rtr <- chromPeaks(tmp)[1, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[1, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr) + pkInt <- sum(intensity(chr[[1]]) * + ((rtr[2] - rtr[1]) / (length(chr[[1]]) - 1))) + chromPeaks(tmp)[1, "into"] + checkEquals(pkInt, unname(chromPeaks(tmp)[1, "into"])) + idxs <- sample(1:nrow(chromPeaks(tmp)), 5) + ## idxs <- 1:nrow(chromPeaks(tmp)) + for (i in idxs) { + rtr <- chromPeaks(tmp)[i, c("rtmin", "rtmax")] + mzr <- chromPeaks(tmp)[i, c("mzmin", "mzmax")] + chr <- extractChromatograms(tmp, rt = rtr, mz = mzr)[[1]] + ints <- intensity(chr) + pkI <- sum(ints, na.rm = TRUE) * ((rtr[2] - rtr[1]) / (length(ints) - 1)) + ## cat(" ", chromPeaks(tmp)[i, "into"], " - ", pkI, "\n") + checkEquals(unname(pkI), unname(chromPeaks(tmp)[i, "into"])) + } + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) + + ## ## matchedFilter with wide mz bins. + ## ## For matchedFilter I will have to do this on the profile matrix! + ## tmp <- findChromPeaks(filterFile(od_x, 2), + ## param = MatchedFilterParam(binSize = 2)) + ## idxs <- 1:nrow(chromPeaks(tmp)) + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[idxs, , drop = FALSE]) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[idxs, "into"])) +} + +## Test the featureValues method. +test_featureValues <- function() { + od_x <- faahko_xod + xs <- faahko_xs + + fdp <- PeakDensityParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = fdp) + xs <- group(xs, method = "density") + + fvs <- featureValues(od_x, value = "into") + checkEquals(rownames(fvs), rownames(featureDefinitions(od_x))) + rownames(fvs) <- NULL + colnames(fvs) <- NULL + gvs <- groupval(xs, value = "into") + rownames(gvs) <- NULL + colnames(gvs) <- NULL + checkEquals(fvs, gvs) +} + +## Test internal helpers. +test_peakIndex <- function() { + pkI <- xcms:::.peakIndex(xod_xg) + checkEquals(names(pkI), rownames(featureDefinitions(xod_xg))) + checkEquals(unname(pkI), featureDefinitions(xod_xg)$peakidx) +} + +test_adjustRtimePeakGroups <- function() { + pkGrp <- xcms:::adjustRtimePeakGroups(xod_xg, + param = PeakGroupsParam(minFraction = 1)) + checkEquals(colnames(pkGrp), basename(fileNames(xod_xg))) + ## No NAs allowed across samples: + isNa <- apply(pkGrp, MARGIN = 1, function(z) sum(is.na(z))) + checkTrue(all(isNa == 0)) + pkGrp <- xcms:::adjustRtimePeakGroups(xod_xg, + param = PeakGroupsParam(minFraction = 0.5)) + isNa <- apply(pkGrp, MARGIN = 1, function(z) sum(is.na(z))) + checkTrue(max(isNa) == 1) +} + +test_extractMsData <- function() { + ## All the data + ## all <- extractMsData(od_x) + ## checkEquals(length(all), length(fileNames(od_x))) + ## rts <- split(rtime(od_x), f = fromFile(od_x)) + ## checkEquals(lengths(rts), unlist(lapply(all, nrow))) + ## On an OnDiskMSnExp with only mz + mzr <- c(300, 302) + res <- extractMsData(od_x, mz = mzr) + checkEquals(length(res), length(fileNames(od_x))) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + ## On an OnDiskMSnExp with only rt + rtr <- c(2500, 2800) + res <- extractMsData(od_x, rt = rtr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + ## LLLLL TODO Continue here, and then add example to the extractMsData + ## help page. + ## On an OnDiskMSnExp with mz and rt + res <- extractMsData(od_x, rt = rtr, mz = mzr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + + ## XCMSnExp, xod_xgr + ## with adjusted retention times + res <- extractMsData(xod_xgr, rt = rtr, mz = mzr) + checkTrue(all(res[[1]][, "rt"] >= rtr[1] & res[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res[[2]][, "rt"] >= rtr[1] & res[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res[[3]][, "rt"] >= rtr[1] & res[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res[[1]][, "mz"] >= mzr[1] & res[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res[[2]][, "mz"] >= mzr[1] & res[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res[[3]][, "mz"] >= mzr[1] & res[[3]][, "mz"] <= mzr[2])) + ## without adjusted retention times + res_2 <- extractMsData(xod_xgr, adjustedRtime = FALSE, rt = rtr, mz = mzr) + checkTrue(all(res_2[[1]][, "rt"] >= rtr[1] & res_2[[1]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[2]][, "rt"] >= rtr[1] & res_2[[2]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[3]][, "rt"] >= rtr[1] & res_2[[3]][, "rt"] <= rtr[2])) + checkTrue(all(res_2[[1]][, "mz"] >= mzr[1] & res_2[[1]][, "mz"] <= mzr[2])) + checkTrue(all(res_2[[2]][, "mz"] >= mzr[1] & res_2[[2]][, "mz"] <= mzr[2])) + checkTrue(all(res_2[[3]][, "mz"] >= mzr[1] & res_2[[3]][, "mz"] <= mzr[2])) + checkTrue(nrow(res[[1]]) != nrow(res_2[[1]])) + checkTrue(nrow(res[[2]]) != nrow(res_2[[2]])) + checkTrue(nrow(res[[3]]) != nrow(res_2[[3]])) + + ## rt and mzr out of range. + res <- extractMsData(od_x, rt = c(6000, 6300), mz = c(0, 3)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) + res <- extractMsData(od_x, rt = c(6000, 6300)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) + res <- extractMsData(od_x, mz = c(0, 3)) + checkEquals(length(res), 3) + checkTrue(all(unlist(lapply(res, FUN = nrow)) == 0)) +} + ############################################################ ## Test getEIC alternatives. dontrun_getEIC_alternatives <- function() { + library(RUnit) library(xcms) fls <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), @@ -758,7 +1236,49 @@ dontrun_getEIC_alternatives <- function() { system.file('cdf/KO/ko18.CDF', package = "faahKO")) od <- readMSData2(fls) cwp <- CentWaveParam(noise = 10000, snthresh = 40) - od_x <- detectFeatures(od, param = cwp) + od_x <- findChromPeaks(od, param = cwp) + + ## ## with this one we get 3 spectras back, one in each file. + ## rtr <- c(2787, 2788) + ## res <- filterRt(od_x, rt = rtr) + + ## ## ----------- + ## ## That's to test .extractChromatogram + ## mzr <- c(279, 279) + ## chrs <- extractChromatograms(od_x, mzrange = mzr) + ## ## input parameter + ## x <- od_x + ## rm(rtrange) + ## rm(mzrange) + ## mzrange <- mzr + ## aggregationFun <- "sum" + ## ## function call + ## ## ----------- + + ## od_xg <- groupChromPeaks(od_x, param = PeakDensityParam()) + ## od_xgr <- adjustRtime(od_xg, param = PeakGroupsParam(span = 0.4)) + + ## rtr <- as.matrix(featureDefinitions(od_xg)[1:5, c("rtmin", "rtmax")]) + ## mzr <- as.matrix(featureDefinitions(od_xg)[1:5, c("mzmin", "mzmax")]) + + ## system.time( + ## res1 <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res2 <- xcms:::.extractMsData(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res1 <- xcms:::.sliceApply(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + ## system.time( + ## res1 <- xcms:::.sliceApply(od_xgr, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ## ) + + ## library(profvis) + ## profvis(res <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ])) + + + ## Compare with getEIC xs <- as(od_x, "xcmsSet") sampclass(xs) <- rep("KO", 3) xs_2 <- group(xs) @@ -770,12 +1290,47 @@ dontrun_getEIC_alternatives <- function() { rtr <- groups(xs_2)[1:5, c("rtmin", "rtmax")] mzr <- groups(xs_2)[1:5, c("mzmin", "mzmax")] + ## + + register(SerialParam()) + od <- as(od_x, "OnDiskMSnExp") + ## Get all of em. + chrs <- xcms:::.extractMultipleChromatograms(od, rt = rtr, mz = mzr) + for (i in 1:nrow(rtr)) { + chrs1 <- extractChromatograms(od_x, rt = rtr[i, ], mz = mzr[i, ]) + checkEquals(unname(chrs1), unname(chrs[[i]])) + } + + library(microbenchmark) + microbenchmark(xcms:::.extractChromatogram(od, rt = rtr[1, ], mz = mzr[1, ]), + xcms:::.extractMultipleChromatograms(od, rt = rtr[1, , drop = FALSE], + mz = mzr[1, , drop = FALSE]), + times = 10) + + library(profvis) + profvis(xcms:::.extractMultipleChromatograms(od, rt = rtr[1, , drop = FALSE], + mz = mzr[1, , drop = FALSE])) + ## Extract the EIC: system.time( eic <- getEIC(xs_2, rtrange = rtr, mzrange = mzr, rt = "raw") - ) ## 3.7sec + ) ## 5.5 sec + system.time( + eic2 <- xcms:::.extractMultipleChromatograms(od, rt = rtr, mz = mzr) + ) ## 0.13 sec + + + ## Now try to do the same using MSnbase stuff. + system.time( + res <- xcms:::.extractMsData(od, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ) ## 0.7 sec. + system.time( + res <- xcms:::.extractMsData(od_x, rtrange = rtr[1, ], mzrange = mzr[1, ]) + ) ## 0.74 sec. + + rts <- rtime(od_x) idx <- apply(rtr, MARGIN = 1, function(z) { which(rts >= z[1] & rts <= z[2]) @@ -786,43 +1341,6 @@ dontrun_getEIC_alternatives <- function() { scts <- spectra(od_ss) ) ## 0.7 secs. - ## This is somewhat similar to the getEIC, just that it extracts for each - ## mz/rt range pair a data.frame with rt, mz, intensity per sample. - ## This version works on a single rt/mz range pair at a time. - ## CHECK: - ## 1) mz range outside. - ## 2) rt range outside. - extractMsData <- function(x, rtrange, mzrange) { - ## Subset the OnDiskMSnExp - fns <- fileNames(x) - tmp <- filterMz(filterRt(x, rt = rtrange), mz = mzrange) - fromF <- match(fileNames(tmp), fns) - ## Now extract mz-intensity pairs from each spectrum. - ## system.time( - ## suppressWarnings( - ## dfs <- spectrapply(tmp, as.data.frame) - ## ) - ## ) ## 0.73sec - ## system.time( - suppressWarnings( - dfs <- spectrapply(tmp, function(z) { - if (peaksCount(z)) - return(data.frame(rt = rep_len(rtime(z), length(z@mz)), - as.data.frame(z))) - else - return(data.frame(rt = numeric(), mz = numeric(), - i = integer())) - }) - ) - ## ) ## 0.701 - ## dfs[] <- mapply(FUN = function(y, z) { - ## return(cbind(rt = rep.int(z, nrow(y)), y)) - ## }, y = dfs, z = rtime(tmp), SIMPLIFY = FALSE, USE.NAMES = FALSE) - res <- vector(mode = "list", length = length(fns)) - res[fromF] <- split(dfs, f = fromFile(tmp)) - return(lapply(res, do.call, what = rbind)) - } - ## Tests. rtrange <- rtr[3, ] mzrange <- mzr[3, ] @@ -841,9 +1359,21 @@ dontrun_getEIC_alternatives <- function() { ############################################################ ## Alternative: do it by file. + ## IF it's an XCMSnExp: coerce to OnDiskMSnExp by replacing the rtime with + ## the adjusted rtime. ## 1) Subset the od selecting all spectra that fall into the rt ranges. - ## 2) Work on that subsetted od: load into memory. - ## 3) loop over the rtrange and mzrange. + ## keep_logical <- have_rt >= rt[1] & have_rt <= rt[2] + ## tmp <- as(object, "OnDiskMSnExp")[base::which(keep_logical)] + ## 2) Call a spectrapply, passing the matrix of rts and mzs. + ## (Load the spectra (without any filtering now).) + ## 3) spectrapply function loops over the rtrange and mzrange: + ## - select all spectra that are within the range. + ## - lapply on those, apply filterMz with the current mz range. + ## - return a list of Chromatogram classes. + ## 4) We get a list of list of Chromatogram objects. [[files]][[ranges]]. + ## Rearrange the lists: [[ranges]][[files]]. + ## Could also put that into a DataFrame... [ranges, files] + ## For a single one: @@ -853,6 +1383,7 @@ dontrun_getEIC_alternatives <- function() { dfs <- spectrapply(tmp, as.data.frame) ) + ## mz outside: mzrange <- c(600, 601) tmp <- filterMz(filterRt(od, rt = rtrange), mz = mzrange) diff --git a/inst/unitTests/runit.binning.R b/inst/unitTests/runit.binning.R index 91d7d3350..051a86d52 100644 --- a/inst/unitTests/runit.binning.R +++ b/inst/unitTests/runit.binning.R @@ -607,44 +607,51 @@ test_breaks <- function() { ## Test generation of breaks for binning. ## o nBins res <- breaks_on_nBins(1, 10, 4) - checkIdentical(res, seq(1, 10, length.out = 5)) + checkEquals(res, seq(1, 10, length.out = 5)) res <- breaks_on_nBins(2, 8, 20) - checkIdentical(res, seq(2, 8, length.out = 21)) + checkEquals(res, seq(2, 8, length.out = 21)) ## brksR <- seq(200, 600, length.out = 2002) brks <- breaks_on_nBins(200, 600, nBins = 2001) - checkIdentical(brks, brksR) + checkEquals(brks, brksR) ## Simulate shift by half bin size brksR <- seq((200 - 0.1), (600 + 0.1), length.out = 2002) brks <- breaks_on_nBins(200, 600, nBins = 2001, shiftByHalfBinSize = TRUE) - checkIdentical(brks, brksR) + checkEquals(brks, brksR) ## o binSize res <- breaks_on_binSize(1, 10, 0.13) resR <- seq(1, 10, by = 0.13) - checkIdentical(res[-length(res)], resR[-length(resR)]) - checkIdentical(res[length(res)], 10) + checkEquals(res[-length(res)], resR[-length(resR)]) + checkEquals(res[length(res)], 10) ## Will create one bin more. res <- breaks_on_binSize(1, 10, 0.51) resR <- seq(1, 10, by = 0.51) - checkIdentical(res[-length(res)], resR) - checkIdentical(res[length(res)], 10) + checkEquals(res[-length(res)], resR) + checkEquals(res[length(res)], 10) ## brksR <- seq(200, 600, by = 0.2) brks <- breaks_on_binSize(200, 600, binSize = 0.2) - checkIdentical(brks, brksR) + checkEquals(brks, brksR) ## Simulate shift by half bin size brksR <- seq((200 - 0.1), (600 + 0.1), by = 0.2) brks <- breaks_on_binSize((200 - 0.1), (600 + 0.1), binSize = 0.2) - checkIdentical(brks, brksR) + checkEquals(brks, brksR) ## + ## Ultimate fix for issue #118 brksR <- seq((200 - 0.1), (600), by = 0.2) brks <- breaks_on_binSize((200 - 0.1), (600), binSize = 0.2) - checkTrue(length(brks) > length(brksR)) - checkIdentical(brks[-length(brks)], brksR) - checkIdentical(brks[length(brks)], 600) + ## Compare them up to the last value, since in R that will be 600-01, while + ## breaks_on_binSize will ensure that the upper limit (600) is still within + ## the breaks + cmn <- 1:(length(brksR) - 1) + checkEquals(brks[cmn], brksR[cmn]) + ## Now, that below breaks on a windows build machine (issue #127) + ## checkTrue(length(brks) > length(brksR)) + ## checkEquals(brks[-length(brks)], brksR) + checkEquals(brks[length(brks)], 600) } ############################################################ diff --git a/inst/unitTests/runit.createProfileMatrix.R b/inst/unitTests/runit.createProfileMatrix.R index 6e1e84333..c5bb027dc 100644 --- a/inst/unitTests/runit.createProfileMatrix.R +++ b/inst/unitTests/runit.createProfileMatrix.R @@ -4,6 +4,7 @@ ## o profMat method for xcmsRaw. ## o profStep<- method for xcmsRaw. ## o profMethod<- method for xcmsRaw. +## o profMethod for XCMSnExp and OnDiskMSnExp ## library(faahKO) fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") @@ -72,6 +73,24 @@ test_profMat <- function() { checkEquals(xr_3@env$profile, profMat(xr_3)) } +test_profMat_OnDiskMSnExp <- function() { + ## Get it from all 3 files in one go. + res <- profMat(faahko_od, step = 2) + res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2) + checkEquals(res_2, res[[2]]) + res_2 <- profMat(xcmsRaw(faahko_3_files[3], profstep = 0), step = 2) + checkEquals(res_2, res[[3]]) + + res_2 <- profMat(faahko_xod, step = 2) + checkEquals(res, res_2) + + res <- profMat(faahko_od, step = 2, method = "binlin", fileIndex = 2) + res_2 <- profMat(xcmsRaw(faahko_3_files[2], profstep = 0), step = 2, + method = "binlin") + checkEquals(res_2, res[[1]]) +} + + ## profStep<- test_profStepReplace <- function() { ## Profile matrix will be generated/replaced if the step parameter is > 0 diff --git a/inst/unitTests/runit.do_adjustRtime.R b/inst/unitTests/runit.do_adjustRtime.R new file mode 100644 index 000000000..19757fe86 --- /dev/null +++ b/inst/unitTests/runit.do_adjustRtime.R @@ -0,0 +1,526 @@ +## retention time correction methods and functionality related to adjusted +## retention times. + +test_adjustRtime_PeakGroups <- function() { + xod <- faahko_xod + xs <- faahko_xs + + ## Group these + xsg <- group(xs) + xodg <- groupChromPeaks(xod, + param = PeakDensityParam(sampleGroups = xs$class)) + checkEquals(peaks(xsg), chromPeaks(xodg)[, colnames(peaks(xsg))]) + checkEquals(xsg@groupidx, featureDefinitions(xodg)$peakidx) + checkTrue(length(processHistory(xodg, + type = xcms:::.PROCSTEP.PEAK.DETECTION)) == 1) + checkTrue(length(processHistory(xodg, + type = xcms:::.PROCSTEP.PEAK.GROUPING)) == 1) + ## Now do the retention time correction + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 0.3) + ## minFr <- (length(fileNames(xod)) - 1) / length(fileNames(xod)) + p <- PeakGroupsParam(minFraction = 1, span = 0.3) + xodr <- adjustRtime(xodg, param = p) + ## Check that we've got process histories. + checkTrue(validObject(xodr)) + checkTrue(hasChromPeaks(xodr)) + checkTrue(!hasFeatures(xodr)) + ## But we would like to keep the related process history step: + checkTrue(hasAdjustedRtime(xodr)) + checkTrue(hasFeatures(xodg)) + ## We want to keep the process history step of the feature alignment! + checkTrue(length(processHistory(xodr, + type = xcms:::.PROCSTEP.PEAK.GROUPING)) == 1) + checkTrue(length(processHistory(xodr, + type = xcms:::.PROCSTEP.RTIME.CORRECTION)) == 1) + ## Different from original: + checkTrue(sum(chromPeaks(xod)[, "rt"] != chromPeaks(xodr)[, "rt"]) > 200) + checkTrue(sum(chromPeaks(xod)[, "rtmin"] != chromPeaks(xodr)[, "rtmin"]) > 200) + checkTrue(sum(chromPeaks(xod)[, "rtmax"] != chromPeaks(xodr)[, "rtmax"]) > 200) + ## between xcmsSet and XCMSnExp + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) + ## To compare the adjusted retention time we have to extract it by sample! + ## Otherwise the ordering will not be the same, as rtime is ordered by + ## retention time, but @rt$raw by sample. + checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + ## Just to ensure - are the raw rt the same? + checkEquals(unlist(rtime(xod, bySample = TRUE), use.names = FALSE), + unlist(xs@rt$raw, use.names = FALSE)) + ## Check that we get the same by supplying the peakGroupsMatrix. + pgm <- adjustRtimePeakGroups(xodg, param = p) + p_2 <- p + minFraction(p_2) <- 0.5 + extraPeaks(p_2) <- 20 + peakGroupsMatrix(p_2) <- pgm + xodr_2 <- adjustRtime(xodg, param = p_2) + checkEquals(adjustedRtime(xodr), adjustedRtime(xodr_2)) + checkEquals(chromPeaks(xodr), chromPeaks(xodr_2)) + p_got <- processParam( + processHistory(xodr, type = xcms:::.PROCSTEP.RTIME.CORRECTION)[[1]]) + peakGroupsMatrix(p_got) <- matrix(ncol = 0, nrow = 0) + checkEquals(p_got, p) + checkEquals(processParam( + processHistory(xodr_2, type = xcms:::.PROCSTEP.RTIME.CORRECTION)[[1]]), + p_2) + ## Doing an additional grouping + xodrg <- groupChromPeaks(xodr, param = PeakDensityParam(sampleGroups = + xs$class)) + checkTrue(length(processHistory(xodrg, + type = xcms:::.PROCSTEP.PEAK.GROUPING)) == 2) + checkTrue(hasAdjustedRtime(xodrg)) + checkTrue(hasFeatures(xodrg)) + xsrg <- group(xsr) + checkEquals(xsrg@groupidx, featureDefinitions(xodrg)$peakidx) + + ## Mod settings: + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1) + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1)) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) + checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, + smooth = "linear") + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1, + smooth = "linear")) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) + checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + + xsr <- retcor(xsg, method = "peakgroups", missing = 0, span = 1, + family = "symmetric") + xodr <- adjustRtime(xodg, param = PeakGroupsParam(minFraction = 1, + span = 1, + family = "symmetric")) + checkEquals(chromPeaks(xodr)[, colnames(peaks(xsr))], peaks(xsr)) + checkEquals(unlist(adjustedRtime(xodr, bySample = TRUE), use.names = FALSE), + unlist(xsr@rt$corrected, use.names = FALSE)) + ## Dropping results. + tmp <- dropAdjustedRtime(xodr) + checkEquals(tmp, xod) +} + +test_getPeakGroupsRtMatrix <- function() { + param <- PeakGroupsParam() + nSamples <- length(fileNames(xod_xg)) + pkGrp <- xcms:::.getPeakGroupsRtMatrix( + peaks = chromPeaks(xod_xg), + peakIndex = xcms:::.peakIndex(xod_xg), + nSamples = nSamples, + missingSample = nSamples - (nSamples * minFraction(param)), + extraPeaks = extraPeaks(param) + ) + ## checkEquals(colnames(pkGrp), colnames(chromPeaks(xod_xg))) + fts <- featureDefinitions(xod_xg)[rownames(pkGrp), ] + checkTrue(all(pkGrp[, 1] >= fts$rtmin & pkGrp[, 1] <= fts$rtmax)) + checkTrue(all(pkGrp[, 2] >= fts$rtmin & pkGrp[, 2] <= fts$rtmax)) + checkTrue(all(pkGrp[, 3] >= fts$rtmin & pkGrp[, 3] <= fts$rtmax)) +} + +test_plotAdjustedRtime <- function() { + plotAdjustedRtime(xod_xgr) + plotAdjustedRtime(xod_xgrg) + plotAdjustedRtime(xod_x) + plotAdjustedRtime(xod_xg) +} + +dontrun_issue146 <- function() { + ## For some files it can happen that the adjusted retention times are no + ## longer ordered increasingly. + + ## Using my data that caused the problems + library(xcms) + library(RUnit) + load("/Users/jo/R-workspaces/2017/2017-03-Mitra-untargeted/data/RData/mitra-extraction/mitra.RData") + mzWid <- 0.02 + bw_1 <- 1.5 + bw_2 <- 2 + + ## Retention time adjustment using "PeakGroups" + ## First grouping of samples. Setting minFraction + pdp <- PeakDensityParam(sampleGroups = pData(mitra)$extraction_name, + bw = bw_1, binSize = mzWid, minFraction = 0.5, + maxFeatures = 200) + mitra_pg <- groupChromPeaks(mitra, param = pdp) + + ## These are if we want to jump into the do_adjustRtime_peakGroups function. + peaks <- chromPeaks(mitra_pg) + peakIndex <- featureDefinitions(mitra_pg)$peakidx + rtime <- rtime(mitra_pg, adjusted = FALSE, bySample = TRUE) + minFraction <- 0.85 + extraPeaks <- 1 + span <- 0.2 + family <- "gaussian" + + ## Running the original code. + res_o <- xcms:::do_adjustRtime_peakGroups_orig(peaks, peakIndex, rtime = rtime, + minFraction = minFraction, + extraPeaks = extraPeaks) + sum(unlist(lapply(res_o, is.unsorted))) + ## Alternative 1 - uh, does not finish??? + res_2 <- do_adjustRtime_peakGroups(peaks, peakIndex, rtime = rtime, + minFraction = minFraction, + extraPeaks = extraPeaks) + sum(unlist(lapply(res_2, is.unsorted))) + + res <- adjustRtime(mitra_pg, + param = PeakGroupsParam(minFraction = minFraction, + span = 1)) + tmp <- dropAdjustedRtime(res) + checkEquals(chromPeaks(tmp), chromPeaks(mitra)) +} + +## This is to ensure that the original code works with the new one using the +## do_ function +dontrun_test_retcor.peakgroups <- function() { + xs <- faahko + xsg <- group(xs) + + res_1 <- retcor.peakgroups(xsg) + res_2 <- xcms:::.retcor.peakgroups_orig(xsg) + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, missing = 2) + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, missing = 2) + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, extra = 3) + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, extra = 3) + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, smooth = "linear") + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, smooth = "linear") + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, span = 1) + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, span = 1) + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, family = "symmetric") + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, family = "symmetric") + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, plottype = "deviation") + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, plottype = "deviation") + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) + + res_1 <- retcor.peakgroups(xsg, plottype = "mdevden") + res_2 <- xcms:::.retcor.peakgroups_orig(xsg, plottype = "mdevden") + checkEquals(unlist(res_1@rt$corrected, use.names = FALSE), + unlist(res_2@rt$corrected, use.names = FALSE)) + checkEquals(res_1, res_2) +} + +## That's to evaluate the do_ function with the original code. Once the +## retcor.peakgroups calls the do_function we rename it to dontrun. +test_do_adjustRtime_peakGroups_implementation <- function() { + xs <- faahko + xsg <- group(xs) + + misSamp <- 1 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) + + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr) + checkEquals(xsa@rt$corrected, res) + + ## Change settings. + misSamp <- 3 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp) + + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr) + checkEquals(xsa@rt$corrected, res) + + misSamp <- 2 + xtr <- 2 + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr) + + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr) + checkEquals(xsa@rt$corrected, res) + + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + smooth = "linear") + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + smooth = "linear") + checkEquals(xsa@rt$corrected, res) + + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + family = "symmetric") + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + family = "symmetric") + checkEquals(xsa@rt$corrected, res) + + xsa <- retcor(xsg, method = "peakgroups", missing = misSamp, extra = xtr, + span = 1) + minFr <- (length(sampnames(xs)) - misSamp) / length(sampnames(xs)) + res <- do_adjustRtime_peakGroups(peaks = peaks(xs), + peakIndex = xsg@groupidx, + rtime = xsg@rt$raw, + minFraction = minFr, extraPeaks = xtr, + span = 1) + checkEquals(xsa@rt$corrected, res) +} + +dontrun_do_adjustRtime_peakgroups_implementation <- function() { + library(xcms) + library(RUnit) + faahko_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO"), + system.file('cdf/WT/wt15.CDF', package = "faahKO"), + system.file('cdf/WT/wt16.CDF', package = "faahKO"), + system.file('cdf/WT/wt18.CDF', package = "faahKO")) + + od <- readMSData2(faahko_files) + + xod <- findChromPeaks(od, param = CentWaveParam(noise = 100, + snthresh = 20)) + xs <- xcmsSet(faahko_files, profparam = list(step = 0), + method = "centWave", noise = 100, snthresh = 20) + + checkEquals(chromPeaks(xod), peaks(xs)) + ## feature grouping + p <- PeakDensityParam(sampleGroups = rep(c("KO", "WT"), each = 3)) + xod <- groupChromPeaks(xod, param = p) + xs <- group(xs, method = "density") + + ## Feature alignment on those: + xs <- retcor(xs, method = "peakgroups") + + ## + minFr <- 5/6 + res <- xcms:::do_adjustRtime_peakGroups(peaks = chromPeaks(xod), + featureDefinitions(xod)$peakidx, + rtime = rtime(xod, bySample = TRUE), + minFraction = minFr) + a <- unname(unlist(res, use.names = FALSE)) + b <- unlist(xs@rt$corrected, use.names = FALSE) + ## Now, they are slightly different now, because we order by median rt of the + ## actual peaks, and they by the median rt of the whole peak group. + checkEquals(a, b) + checkEquals(res, unname(xs@rt$corrected)) + + ## Manually correcting the guys: + rtr <- rtime(xod, bySample = TRUE)[[1]] + rtc <- res[[1]] + ## rtdevsmo <- rtr - rtc + + ## That's strange! + ## cfun <- stepfun(rtr[-1] - diff(rtr) / 2, rtr - rtdevsmo) + cfun <- stepfun(rtr[-1] - diff(rtr) / 2, rtc) + corFeat <- chromPeaks(xod) + whichSamp <- which(corFeat[, "sample"] == 1) + corFeat[whichSamp, c("rt", "rtmin", "rtmax")] <- + cfun(corFeat[whichSamp, c("rt", "rtmin", "rtmax")]) + + checkEquals(corFeat[whichSamp, ], peaks(xs)[whichSamp, ]) + checkTrue(any(peaks(xs) != chromPeaks(xod))) + + ## Do the backwards correction. + adjFun <- stepfun(rtc[-1] - diff(rtc) / 2, rtr) + origFeats <- corFeat + origFeats[whichSamp, c("rt", "rtmin", "rtmax")] <- + adjFun(corFeat[whichSamp, c("rt", "rtmin", "rtmax")]) + checkEquals(chromPeaks(xod)[whichSamp, ], origFeats[whichSamp, ]) + ## OK. +} + +## Testing the internal .applyRtAdjustment function. +test_applyRtAdjustment <- function() { + xs <- faahko + ## group em. + xsg <- group(xs) + ## align em. + xsa <- retcor(xsg, method = "peakgroups") + + pksAdj <- xcms:::.applyRtAdjToChromPeaks(peaks(xsg), + rtraw = xsa@rt$raw, + rtadj = xsa@rt$corrected) + checkEquals(pksAdj, peaks(xsa)) + ## Reset em. + pksRaw <- xcms:::.applyRtAdjToChromPeaks(pksAdj, + rtraw = xsa@rt$corrected, + rtadj = xsa@rt$raw) + checkEquals(pksRaw, peaks(xsg)) +} + +## Obiwarp: +test_obiwarp <- function() { + + xs <- faahko_xs + od <- faahko_od + xod <- faahko_xod + ## Feature alignment on those: + ## object <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 10000, + ## snthresh = 40)) + prm <- ObiwarpParam(binSize = 1) + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm)) + checkEquals(xs_2@rt$raw[[2]], xs_2@rt$corrected[[2]]) + checkTrue(sum(xs_2@rt$raw[[1]] != xs_2@rt$corrected[[1]]) > 500) + checkTrue(sum(xs_2@rt$raw[[3]] != xs_2@rt$corrected[[3]]) > 500) + + ## And the OnDiskMSnExp implementation: + res <- xcms:::.obiwarp(od, param = prm) + checkEquals(xs_2@rt$corrected, res) + res_2 <- adjustRtime(od, param = prm) + res_3 <- adjustRtime(xod, param = prm) + checkEquals(adjustedRtime(res_3), res_2) + checkEquals(adjustedRtime(res_3, bySample = TRUE), res) + checkEquals(adjustedRtime(res_3, bySample = TRUE), + unname(split(unname(res_2), fromFile(od)))) + ## Check if peaks were corrected correctly + checkTrue(sum(chromPeaks(res_3)[, "rt"] == chromPeaks(xod)) < + nrow(chromPeaks(res_3)) / 2) + ## Dropping the adjusted rtime on these + hasAdjustedRtime(res_3) + tmp <- dropAdjustedRtime(res_3) + checkEquals(chromPeaks(tmp), chromPeaks(xod)) + + ## File issue on that! retcor.obiwarp does use round for the adjustment of + ## the peak! + ## -> issue #122 + ## checkEquals(chromPeaks(res_3), peaks(xs_2)) + + ## Manually specify center Sample + centerSample(prm) <- 3 + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), center = centerSample(prm)) + checkEquals(xs_2@rt$raw[[centerSample(prm)]], + xs_2@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(od, param = prm) + checkEquals(xs_2@rt$corrected, res) + ## change some settings + gapInit(prm) <- 3.1 + gapExtend(prm) <- 0.9 + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), gapInit = gapInit(prm), + center = centerSample(prm), gapExtend = gapExtend(prm)) + checkEquals(xs_2@rt$raw[[centerSample(prm)]], + xs_2@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(od, param = prm) + checkEquals(xs_2@rt$corrected, res) +} + +## Run this test manually to perform an exhaustive test to validate obiwarp +## Results. +exhaustive_test <- function() { + ## Load test files... + faahko_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO"), + system.file('cdf/WT/wt15.CDF', package = "faahKO"), + system.file('cdf/WT/wt16.CDF', package = "faahKO"), + system.file('cdf/WT/wt18.CDF', package = "faahKO")) + library(RUnit) + library(xcms) + ob <- readMSData2(faahko_files) + xs <- xcmsSet(faahko_files, profparam = list(step = 0), method = "centWave", + noise = 10000, snthresh = 40) + prm <- ObiwarpParam(binSize = 1, centerSample = 2) + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm)) + checkEquals(xs_r@rt$raw[[2]], xs_r@rt$corrected[[2]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + ## binSize + binSize(prm) <- 0.2 + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm)) + checkEquals(xs_r@rt$raw[[2]], xs_r@rt$corrected[[2]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + ## centersampe + binSize(prm) <- 2 + centerSample(prm) <- 4 + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm), + center = centerSample(prm)) + checkEquals(xs_r@rt$raw[[centerSample(prm)]], + xs_r@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + ## distFun + distFun(prm) <- "euc" + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm), + center = centerSample(prm), distFunc = distFun(prm)) + checkEquals(xs_r@rt$raw[[centerSample(prm)]], + xs_r@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + ## localAlignment + localAlignment(prm) <- TRUE + distFun(prm) <- "cor" + ## Uh huh! GET AN C ALLOCATION ERROR with local, stepsize 2 and euc + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm), localAlignment = 1, + center = centerSample(prm), distFunc = distFun(prm)) + checkEquals(xs_r@rt$raw[[centerSample(prm)]], + xs_r@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + ## factorDiag + factorDiag(prm) <- 2.7 + localAlignment(prm) <- FALSE + xs_r <- retcor.obiwarp(xs, profStep = binSize(prm), factorDiag = factorDiag(prm), + center = centerSample(prm), distFunc = distFun(prm)) + checkEquals(xs_r@rt$raw[[centerSample(prm)]], + xs_r@rt$corrected[[centerSample(prm)]]) + res <- xcms:::.obiwarp(ob, param = prm) + checkEquals(res, xs_r@rt$corrected) + + ## And all again using some of my own files. + fls <- dir("/Users/jo/data/2016/2016-11/NoSN/", pattern = "mzML", + full.names = TRUE) + if (length(fls)) { + fls <- fls[1:20] + xs <- xcmsSet(fls, profparam = list(step = 0), method = "centWave", + noise = 10000, snthresh = 40) + ## Compare also the timings! + ## HM, why, with binSize 1.2 I get a "Dimension of profile matrices do + ## not match"! + prm <- ObiwarpParam(centerSample = 11, binSize = 1) + system.time( + xs_2 <- retcor.obiwarp(xs, profStep = binSize(prm), center = 11) + ) + od <- readMSData2(fls) + ## ???? dimension of profile matrix does not match??? + ## z <- filterFile(od, file = 1) + ## cntr <- filterFile(od, file = centerSample(prm)) + ## cntrPr <- profMat(cntr, step = binSize(prm), returnBreaks = TRUE)[[1]] + ## parms <- prm + ## + system.time( + res <- xcms:::.obiwarp(od, param = prm) + ) + checkEquals(res, xs_2@rt$corrected) + } +} diff --git a/inst/unitTests/runit.do_detectFeatures_centWave.R b/inst/unitTests/runit.do_detectFeatures_centWave.R deleted file mode 100644 index 56b0ec923..000000000 --- a/inst/unitTests/runit.do_detectFeatures_centWave.R +++ /dev/null @@ -1,321 +0,0 @@ -## Test detectFeatures centWave - -## library(faahKO) -fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), - system.file('cdf/KO/ko16.CDF', package = "faahKO"), - system.file('cdf/KO/ko18.CDF', package = "faahKO"), - system.file('cdf/KO/ko19.CDF', package = "faahKO")) - -## library(msdata) -## mzf <- c(system.file("microtofq/MM14.mzML", package = "msdata"), -## system.file("microtofq/MM8.mzML", package = "msdata")) -## f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") -xr <- deepCopy(faahko_xr_1) -onDisk <- filterFile(faahko_od, file = 1) - -test_do_detectFeatures_centWave <- function() { - ## xr <- xcmsRaw(fs[1], profstep = 0) - ## We expect that changing a parameter has an influence on the result. - mzVals <- xr@env$mz - intVals <- xr@env$intensity - ## Define the values per spectrum: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - res1 <- do_detectFeatures_centWave(mz = mzVals, - int = intVals, - scantime = xr@scantime, - valsPerSpect, - snthresh = 200, - noise = 4000) - res2 <- do_detectFeatures_centWave(mz = mzVals, - int = intVals, - scantime = xr@scantime, - valsPerSpect, - snthresh = 500, - noise = 4000) - checkTrue(nrow(res1) > nrow(res2)) - - ## Check scanrange on findPeaks.centWave. - res_1 <- findPeaks.centWave(xr, scanrange = c(90, 345), noise = 2000) - xr <- xr[90:345] - mzVals <- xr@env$mz - intVals <- xr@env$intensity - ## Define the values per spectrum: - valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - res_2 <- do_detectFeatures_centWave(mz = mzVals, int = intVals, - scantime = xr@scantime, valsPerSpect, - noise = 2000) - checkEquals(res_1@.Data, res_2) -} - -## Evaluate the featureDetection method using the centWave method on -## OnDiskMSnExp and on MSnExp objects. -test_featureDetection_centWave <- function() { - ## Control - library(MSnbase) - ## xr <- xcmsRaw(fs[1], profstep = 0) - ppm <- 40 - snthresh <- 40 - res_x <- findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh, - noise = 100000)@.Data - ## Bypass xcmsRaw - xs <- xcmsSet(fs[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave", - noise = 100000) - checkEquals(xs@peaks[, colnames(res_x)], res_x) - ## OnDiskMSnExp - ## onDisk <- readMSData2(fs[1], msLevel. = 1) - cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh, noise = 100000) - res <- detectFeatures(onDisk, param = cwp, return.type = "list") - checkEquals(res[[1]], peaks(xs)@.Data) - - ## ## MSnExp - ## inMem <- readMSData(f[1], msLevel. = 1) - ## suppressWarnings( - ## res_2 <- detectFeatures(inMem, param = cwp, return.type = "list") - ## ) - ## checkEquals(res_2[[1]], peaks(xs)@.Data) - - ## returning an xcmsSet - res <- detectFeatures(onDisk, param = cwp, return.type = "xcmsSet") - checkEquals(peaks(res), peaks(xs)) - ## suppressWarnings( - ## res <- detectFeatures(inMem, param = cwp, return.type = "xcmsSet") - ## ) - ## checkEquals(peaks(res), peaks(xs)) - - ## Return type XCMSnExp - res <- detectFeatures(onDisk, param = cwp) - checkTrue(hasDetectedFeatures(res)) - checkTrue(!hasAdjustedRtime(res)) - checkTrue(!hasAlignedFeatures(res)) - checkEquals(peaks(xs)@.Data, features(res)) -} - -dontrun_test_benchmark_centWaves <- function() { - library(msdata) - f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") - library(microbenchmark) - library(MSnbase) - library(xcms) - ## - ## xr <- xcmsRaw(f[1], profstep = 0) - ppm <- 40 - snthresh <- 40 - - cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh) - ## onDisk <- readMSData2(f[1], msLevel. = 1) - register(SerialParam()) - system.time( - tmp <- detectFeatures(onDisk, param = cwp) - ) ## 9.7sec - system.time( - tmp <- detectFeatures(onDisk, param = cwp, return.type = "xcmsSet") - ) ## 12sec - system.time( - tmp <- xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave") - ) ## 11.99sec - - inMem <- readMSData(f[1], msLevel. = 1) - register(SerialParam()) - - ## detectFeatures,MSnExp and findPeaks.centWave should be about similar. - microbenchmark(findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh), - detectFeatures(inMem, param = cwp), times = 3) - ## findPeaks.centWave is about 1 second faster. - - ## detectFeatures,OnDiskMSnExp and xcmsSet should be about similar. - microbenchmark(xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave"), - detectFeatures(onDisk, param = cwp), - detectFeatures(inMem, param = cwp), - times = 3) -} - -dontrun_test_benchmark_centWaves <- function() { - library(msdata) - f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") - library(microbenchmark) - library(MSnbase) - library(xcms) - ## - ## xr <- xcmsRaw(f[1], profstep = 0) - ppm <- 40 - snthresh <- 40 - - cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh) - ## onDisk <- readMSData2(f[1], msLevel. = 1) - register(SerialParam()) - system.time( - tmp <- detectFeatures(onDisk, param = cwp) - ) ## 9.7sec - system.time( - tmp <- detectFeatures(onDisk, param = cwp, return.type = "xcmsSet") - ) ## 12sec - system.time( - tmp <- xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave") - ) ## 11.99sec - - inMem <- readMSData(f[1], msLevel. = 1) - register(SerialParam()) - - ## detectFeatures,MSnExp and findPeaks.centWave should be about similar. - microbenchmark(findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh), - detectFeatures(inMem, param = cwp), times = 3) - ## findPeaks.centWave is about 1 second faster. - - ## detectFeatures,OnDiskMSnExp and xcmsSet should be about similar. - microbenchmark(xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, - snthresh = snthresh, method = "centWave"), - detectFeatures(onDisk, param = cwp), - detectFeatures(inMem, param = cwp), - times = 3) -} - - - - -############################################################ -## This is only relevant during development of the do_ function -## to evaluate that results are identical. -dontrun_test_do_detectFeatures_centWave_impl <- function() { - - for (i in 1:length(fs)) { - ppm = 25 - peakwidth = c(20, 50) - snthresh = 10 - prefilter = c(3, 100) - mzCenterFun = "wMean" - integrate = 1 - mzdiff = -0.001 - fitgauss = FALSE - noise = 0 - verboseColumns = FALSE - - xr <- xcmsRaw(fs[i]) - - ## Default settings - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 14.6 sec - ## do_ : 13 sec - - ppm <- 10 - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 15 sec - ## do_ : 13.3 sec - - peakwidth <- c(3, 30) - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 11.4 sec - ## do_ : 9.5 sec - - snthresh <- 15 - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 10.6 sec - ## do_ : 8.8 sec - - fitgauss <- TRUE - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 12.5 sec - ## do_ : 10.7 sec - - verboseColumns <- TRUE - .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) - ## xcms: 12.2 sec - ## do_ : 10.6 sec - } -} - -## That's to compare the functions in version 1.49.7. -.runAndCompare <- function(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, - integrate, mzdiff, fitgauss, noise, verboseColumns) { - require(RUnit) - mz <- xr@env$mz - int <- xr@env$intensity - scantime <- xr@scantime - scanindex <- xr@scanindex - a <- system.time( - ## That's the method called inside do_... - xrDo <- xcms:::.centWave_orig(mz = mz, int = int, scantime = scantime, - valsPerSpect = diff(c(scanindex, length(mz))), - ppm = ppm, peakwidth = peakwidth, - snthresh = snthresh, - prefilter = prefilter, - mzCenterFun = mzCenterFun, - integrate = integrate, - mzdiff = mzdiff, - fitgauss = fitgauss, - noise = noise, - verboseColumns = verboseColumns) - ) ## 12.7 - ## Run the original centWave code on xcmsRaw: - b <- system.time( - xrPeaks <- xcms:::.findPeaks.centWave_orig(xr, - ppm = ppm, - peakwidth = peakwidth, - snthresh = snthresh, - prefilter = prefilter, - mzCenterFun = mzCenterFun, - integrate = integrate, - mzdiff = mzdiff, - fitgauss = fitgauss, - noise = noise, - verbose.columns = verboseColumns) - ) ## 15.4 - ## Compare. - cat("DO: ", a, "\n") - cat("XCMS: ", b, "\n") - if (!checkEquals(new("xcmsPeaks", xrDo), xrPeaks)) - stop("do_ and xcms yield different results!") -} - - -## Some speed tests. -.otherTest <- function() { - Testv <- c(2, 4.2, 34.1, 34.5, 6.4, 6.3, 1.2) - RforM <- matrix(nrow = 0, ncol = length(Testv)) - system.time( - for(i in 1:5000){ - RforM <- rbind(RforM, Testv) - } - ) ## 1.27 - ## with append to list. - RforL <- vector("list", 0) - system.time( - for(i in 1:5000){ - RforL <- c(RforL, Testv) - } - ) ## 1.12 - system.time( - RapplyL <- lapply(1:5000, function(z) {return(Testv)}) - ) ## 0.003 - RM <- matrix(nrow=5000, ncol = length(Testv)) - system.time( - for (i in 1:5000) { - RM[i, ] <- Testv - } - ) ## 0.006 - - ## Compare adding to list instead of adding to existing. [[]] - RexL <- vector("list", 5000) - system.time( - for (i in 1:5000){ - RexL[[i]] <- Testv - } - ) ## 0.005 - ## Dynamically... - RexL <- list() - system.time( - for (i in 1:5000){ - RexL[[i]] <- Testv - } - ) ## 0.005 - -} diff --git a/inst/unitTests/runit.do_detectFeatures_MSW.R b/inst/unitTests/runit.do_findChromPeaks_MSW.R similarity index 50% rename from inst/unitTests/runit.do_detectFeatures_MSW.R rename to inst/unitTests/runit.do_findChromPeaks_MSW.R index 1723c6bab..0b3e259c9 100644 --- a/inst/unitTests/runit.do_detectFeatures_MSW.R +++ b/inst/unitTests/runit.do_findChromPeaks_MSW.R @@ -1,60 +1,69 @@ ############################################################ -## do_detectFeatures_MSW tests +## do_findPeaks_MSW tests -xraw <- deepCopy(microtofq_xr) +## xraw <- deepCopy(microtofq_xr) -test_do_detectFeatures_MSW <- function() { - feats1 <- xcms:::do_detectFeatures_MSW(xraw@env$intensity, - xraw@env$mz, - snthresh = 100) - feats2 <- xcms:::do_detectFeatures_MSW(xraw@env$intensity, - xraw@env$mz, - snthresh = 50) +test_do_findPeaks_MSW <- function() { + first_file <- filterFile(fticr, file = 1) + spctr <- spectra(first_file) + checkTrue(length(spctr) == 1) + mzs <- unname(mz(spctr[[1]])) + ints <- unname(intensity(spctr[[1]])) + feats1 <- do_findPeaks_MSW(mz = mzs[10000:20000], + int = ints[10000:20000], + snthresh = 100) + feats2 <- do_findPeaks_MSW(mz = mzs[10000:20000], + int = ints[10000:20000], + snthresh = 50) checkTrue(nrow(feats2) > nrow(feats1)) } -test_detectFeatures_MSW <- function() { +test_findChromPeaks_MSW <- function() { ## library(MSnbase) ## od <- readMSData2(mzf) od <- microtofq_od ## Restrict to first spectrum od1 <- od[1] sp1 <- od[[1]] - res_1 <- do_detectFeatures_MSW(mz = mz(sp1), int = intensity(sp1)) + res_1 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1)) mp <- MSWParam() - res_2 <- detectFeatures(od1, param = mp) - checkEquals(res_1, features(res_2)[, colnames(res_1), drop = FALSE]) + res_2 <- findChromPeaks(od1, param = mp) + checkEquals(res_1, chromPeaks(res_2)[, colnames(res_1), drop = FALSE]) ## Changing settings. snthresh(mp) <- 1 nearbyPeak(mp) <- FALSE - res_1 <- do_detectFeatures_MSW(mz = mz(sp1), int = intensity(sp1), - snthresh = 1, nearbyPeak = FALSE) - res_2 <- detectFeatures(od1, param = mp, return.type = "list") + res_1 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1), + snthresh = 1, nearbyPeak = FALSE) + res_2 <- findChromPeaks(od1, param = mp, return.type = "list") checkEquals(res_1, res_2[[1]][, colnames(res_1)]) peakThr(mp) <- 200 - res_1 <- do_detectFeatures_MSW(mz = mz(sp1), int = intensity(sp1), - snthresh = 1, nearbyPeak = FALSE, - peakThr = 200) - res_2 <- detectFeatures(od1, param = mp, return.type = "list") + res_1 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1), + snthresh = 1, nearbyPeak = FALSE, + peakThr = 200) + res_2 <- findChromPeaks(od1, param = mp, return.type = "list") checkEquals(res_1, res_2[[1]][, colnames(res_1)]) addParams(mp) <- list(forder = 2) - res_3 <- do_detectFeatures_MSW(mz = mz(sp1), int = intensity(sp1), - snthresh = 1, nearbyPeak = FALSE, - peakThr = 200, forder = 2) - res_4 <- detectFeatures(od1, param = mp, return.type = "list") + res_3 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1), + snthresh = 1, nearbyPeak = FALSE, + peakThr = 200, forder = 2) + res_4 <- findChromPeaks(od1, param = mp, return.type = "list") checkEquals(res_3, res_4[[1]][, colnames(res_3)]) addParams(mp) <- list(forder = 2, dorder = 1) - res_3 <- do_detectFeatures_MSW(mz = mz(sp1), int = intensity(sp1), - snthresh = 1, nearbyPeak = FALSE, - peakThr = 200, forder = 2, dorder = 1) - res_4 <- detectFeatures(od1, param = mp, return.type = "list") + res_3 <- do_findPeaks_MSW(mz = mz(sp1), int = intensity(sp1), + snthresh = 1, nearbyPeak = FALSE, + peakThr = 200, forder = 2, dorder = 1) + res_4 <- findChromPeaks(od1, param = mp, return.type = "list") checkEquals(res_3, res_4[[1]][, colnames(res_3)]) + + ## Compare old vs new: + checkEquals(chromPeaks(fticr_xod)[, -ncol(chromPeaks(fticr_xod))], + peaks(fticr_xs)) } ############################################################ ## Test the implementation of the "do" function -dontrun_test_do_detectFeatures_MSW_impl <- function() { +dontrun_test_do_findPeaks_MSW_impl <- function() { library(xcms) library(RUnit) diff --git a/inst/unitTests/runit.do_findChromPeaks_centWave.R b/inst/unitTests/runit.do_findChromPeaks_centWave.R new file mode 100644 index 000000000..41724742f --- /dev/null +++ b/inst/unitTests/runit.do_findChromPeaks_centWave.R @@ -0,0 +1,776 @@ +## Test findChromPeaks centWave + +## library(faahKO) +fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO"), + system.file('cdf/KO/ko19.CDF', package = "faahKO")) + +## library(msdata) +## mzf <- c(system.file("microtofq/MM14.mzML", package = "msdata"), +## system.file("microtofq/MM8.mzML", package = "msdata")) +## f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") +xr <- deepCopy(faahko_xr_1) +onDisk <- filterFile(faahko_od, file = 1) + +## Ensure that the reported peak integrated signal corresponds to the correct +## data. This is to ensure that issue #135 was fixed correctly. +test_findChromPeaks_centWave_peakIntensity <- function() { + ## Reproduce with msdata files: + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + options(originalCentWave = TRUE) + tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + ## ## Use the getPeakInt2 which uses the rawMat function. + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## ## These fail for the original centWave code. + ## checkTrue(sum(pkI2 != chromPeaks(tmp)[, "into"]) > length(pkI2) / 2) + ## ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + ## ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + ## checkEquals(pkI2, pkI3) + ## Try with new implementation. + options(originalCentWave = FALSE) + tmp2 <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(2, 10))) + ## Find different number of peaks: + checkTrue(nrow(chromPeaks(tmp2)) != nrow(chromPeaks(tmp))) + ## Are the peaks similar? + id_1 <- paste(chromPeaks(tmp)[, "mz"], chromPeaks(tmp)[, "rt"]) + id_2 <- paste(chromPeaks(tmp2)[, "mz"], chromPeaks(tmp2)[, "rt"]) + ## But all of the ones from the old are ALSO in the new one. + checkTrue(all(id_1 %in% id_2)) + ## Are the peaks the same? + cp2 <- chromPeaks(tmp2)[id_2 %in% id_1, ] + cn <- colnames(cp2) + cn <- cn[!(cn %in% c("intb", "into", "rtmin", "rtmax"))] + checkEquals(cp2[, cn], chromPeaks(tmp)[, cn]) + ## Are the values related? + plot(cp2[, "into"], chromPeaks(tmp)[, "into"]) ## Very similar + plot(cp2[, "intb"], chromPeaks(tmp)[, "intb"]) ## Very similar + plot(cp2[, "rtmin"], chromPeaks(tmp)[, "rtmin"]) ## Very similar + plot(cp2[, "rtmax"], chromPeaks(tmp)[, "rtmax"]) ## Very similar + ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI2_2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## pkI3_2 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## ## These fail for the original centWave code. + ## checkEquals(unname(pkI2_2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(unname(pkI3_2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(pkI2_2, pkI3_2) + + + ## The same for one of the test files; this works even with the original + ## centWave code + options(originalCentWave = TRUE) + tmp <- filterFile(xod_xgrg, file = 3) + ## ## Use the getPeakInt2 which uses the rawMat function. + ## pkI2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + ## checkEquals(pkI2, pkI3) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp)[, "into"])) + ## checkEquals(unname(pkI3), unname(chromPeaks(tmp)[, "into"])) + ## New modified centWave. + options(originalCentWave = FALSE) + tmp2 <- findChromPeaks(filterFile(faahko_od, file = 3), + CentWaveParam(noise = 10000, snthresh = 40)) + ## Even the identified peaks are identical! + checkEquals(chromPeaks(tmp), chromPeaks(tmp2)) + ## Use the getPeakInt2 which uses the rawMat function. + ## pkI2 <- xcms:::.getPeakInt2(tmp2, chromPeaks(tmp2)) + ## ## Use the getPeakInt3 which uses the getEIC C function. + ## pkI3 <- xcms:::.getPeakInt3(tmp2, chromPeaks(tmp2)) + ## checkEquals(pkI2, pkI3) + ## checkEquals(unname(pkI2), unname(chromPeaks(tmp2)[, "into"])) + ## checkEquals(unname(pkI3), unname(chromPeaks(tmp2)[, "into"])) + options(originalCentWave = TRUE) +} + +## Exhaustive comparison of the original and the modified centWave. We don't +## expect everything to be the same, but the results should be ideally comparable +dontrun_exhaustive_original_new_centWave_comparison <- function() { + ## faahKO test files. + fl <- system.file("cdf/ko15.CDF", package = "msdata") + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam() + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + ## modified settings: + cwp <- CentWaveParam(ppm = 40, peakwidth = c(4, 40)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + ## Another round + cwp <- CentWaveParam(ppm = 10, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + checkEquals(chromPeaks(orig), chromPeaks(modi)) ## YES + + ## msdata test files. + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + cwp <- CentWaveParam(ppm = 10, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 20)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + ## Are all from orig also in modi? + checkTrue(all(orig_id %in% modi_id)) ## YES + modi_pks <- chromPeaks(modi)[match(orig_id, modi_id), ] + ## Compare the peaks. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], modi_pks[, cn]) ## YES + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], modi_pks[, i]) + checkTrue(cor(chromPeaks(orig)[, i], modi_pks[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + ## Are all from orig also in modi? + checkTrue(all(orig_id %in% modi_id)) ## YES + ## Compare the peaks. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + + ## Other file + fl <- system.file("microtofq/MM8.mzML", package = "msdata") + raw <- readMSData2(fl) + cwp <- CentWaveParam(ppm = 30, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? + checkTrue(nrow(chromPeaks(orig)) == nrow(chromPeaks(modi))) ## YES + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[, i]) > 0.99) + } + ## Different settings. + cwp <- CentWaveParam(ppm = 40, peakwidth = c(3, 30)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.99) + } + + ## own files. + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_12.mzML" + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam() + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.99) + } + ## Different settings + cwp <- CentWaveParam(ppm = 30, peakwidth = c(1, 10)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO, more with modified versions + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.98) + } + ## Different settings + cwp <- CentWaveParam(ppm = 40, peakwidth = c(1, 60)) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + options(originalCentWave = FALSE) + modi <- findChromPeaks(raw, param = cwp) + ## Same number of peaks? NO, more with modified versions + checkTrue(nrow(chromPeaks(orig)) < nrow(chromPeaks(modi))) + ## Peaks the same? NO, but all in orig are in modi + orig_id <- paste(chromPeaks(orig)[, "rt"], chromPeaks(orig)[, "mz"]) + modi_id <- paste(chromPeaks(modi)[, "rt"], chromPeaks(modi)[, "mz"]) + checkTrue(all(orig_id %in% modi_id)) + ## Check peaks and colnames. + cmn <- match(orig_id, modi_id) + cn <- colnames(chromPeaks(orig)) + cn_diff <- c("into", "intb", "rtmin", "rtmax", "maxo") + cn <- cn[!(cn %in% cn_diff)] + checkEquals(chromPeaks(orig)[, cn], chromPeaks(modi)[cmn, cn]) ## YES + ## Check those that are different. + for (i in cn_diff) { + plot(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) + checkTrue(cor(chromPeaks(orig)[, i], chromPeaks(modi)[cmn, i]) > 0.98) + } + + ## Is there something common to the peaks found only by the modified version? + common_pks <- chromPeaks(modi)[cmn, ] + unique_pks <- chromPeaks(modi)[-cmn, ] + cn <- colnames(common_pks) + ## mz + boxplot(list(common = common_pks[, "mz"], unique = unique_pks[, "mz"]), + varwidth = TRUE, main = "mz") + ## OK, average mz of unique peaks is smaller. + + ## mzrange + boxplot(list(common = common_pks[, "mzmax"] - common_pks[, "mzmin"], + unique = unique_pks[, "mzmax"] - unique_pks[, "mzmin"]), + varwidth = TRUE, main = "mz range") + ## Seems to be smaller too. + + ## rt + boxplot(list(common = common_pks[, "rt"], unique = unique_pks[, "rt"]), + varwidth = TRUE, main = "rt") + ## hm, rt larger in unique + + ## rtrange + boxplot(list(common = log2(common_pks[, "rtmax"] - common_pks[, "rtmin"]), + unique = log2(unique_pks[, "rtmax"] - unique_pks[, "rtmin"])), + varwidth = TRUE, main = "rt range") + ## rtrange is same. + + ## into + boxplot(list(common = log2(common_pks[, "into"]), + unique = log2(unique_pks[, "into"])), + varwidth = TRUE, main = "into") + ## same. + + ## sn + boxplot(list(common = log2(common_pks[, "sn"]), + unique = log2(unique_pks[, "sn"])), + varwidth = TRUE, main = "sn") + ## sn is higher in unique + + ## Check chromatogram for some: + chrPlot <- function(i, raw) { + rtr <- common_pks[i, c("rtmin", "rtmax")] + rtr[1] <- rtr[1] - 2 + rtr[2] <- rtr[2] + 2 + chr_cmn <- extractChromatograms(raw, rt = rtr, + mz = common_pks[i, c("mzmin", "mzmax")]) + rtr <- unique_pks[i, c("rtmin", "rtmax")] + rtr[1] <- rtr[1] - 2 + rtr[2] <- rtr[2] + 2 + chr_unq <- extractChromatograms(raw, rt = rtr, + mz = unique_pks[i, c("mzmin", "mzmax")]) + par(mfrow = c(1, 2)) + plot(rtime(chr_cmn[[1]]), intensity(chr_cmn[[1]]), main = "common peak", + type = "l") + abline(v = common_pks[i, c("rtmin", "rtmax")], col = "grey") + plot(rtime(chr_unq[[1]]), intensity(chr_unq[[1]]), main = "unique peak", + type = "l") + abline(v = unique_pks[i, c("rtmin", "rtmax")], col = "grey") + } + chrPlot(1, raw = raw) + + i <- sample(1:nrow(unique_pks), 10) + chrPlot(i[1], raw = raw) + chrPlot(i[2], raw = raw) + chrPlot(i[3], raw = raw) + chrPlot(i[4], raw = raw) + chrPlot(i[5], raw = raw) + chrPlot(i[6], raw = raw) + chrPlot(i[7], raw = raw) + chrPlot(i[8], raw = raw) + chrPlot(i[9], raw = raw) + chrPlot(i[10], raw = raw) + + ## Summary: + ## Same peaks are identified by both methods, but the modified centWave finds + ## eventually more peaks. + ## The into, intb, rtmin and rtmax differ for some peaks found by both + ## methods but values are highly correlated (R > 0.99). +} + + +## Compare what's the difference between the original centWave and the new one +## fixing the peak integration problem (issue #135) +dontrun_compare_orig_new_centWave <- function() { + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res_orig <- xcms:::.centWave_orig(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE) + res_new <- xcms:::.centWave_new(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE) + checkEquals(res_orig, res_new) ## That should always work. + + ## Use the previously defined ones as ROIs - this should simulate the + ## addIsoROIs + ## Slightly increase the mz like in addIsoROIs. + rL <- as.data.frame(res_orig) + ## Extend the mzmin and mzmax if needed. + tittle <- rL[, "mz"] * (25 / 2) / 1E6 + expand_mz <- (rL[, "mzmax"] - rL[, "mzmin"]) < (tittle * 2) + if (any(expand_mz)) { + rL[expand_mz, "mzmin"] <- rL[expand_mz, "mz"] - + tittle[expand_mz] + rL[expand_mz, "mzmax"] <- rL[expand_mz, "mz"] + tittle[expand_mz] + } + rL <- split(rL, f = 1:nrow(rL)) + res_orig2 <- xcms:::.centWave_orig(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE, + roiList = rL, + firstBaselineCheck = FALSE) + res_new2 <- xcms:::.centWave_new(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + noise = 4000, verboseColumns = TRUE, + roiList = rL, + firstBaselineCheck = FALSE) + ## I get more peaks with the modified version: + checkTrue(nrow(res_orig2) < nrow(res_new2)) + ## Sort them by rt and mz + idx <- order(res_orig2[, "rt"], res_orig2[, "mz"]) + res_orig2 <- res_orig2[idx, ] + idx <- order(res_new2[, "rt"], res_new2[, "mz"]) + res_new2 <- res_new2[idx, ] + ## Are identified peaks similar? + id_1 <- paste(res_orig2[, "rt"], res_orig2[, "mz"]) + id_2 <- paste(res_new2[, "rt"], res_new2[, "mz"]) + ## Are all from res_orig2 in res_new2? + checkTrue(length(id_1) == sum(id_1 %in% id_2)) ## YES + ## Are the values for these the same? + same_new2 <- res_new2[match(id_1, id_2), ] + ## check columns: + cn <- colnames(same_new2) + cn <- cn[!(cn %in% c("mzmin"))] + for (i in cn) { + checkEquals(res_orig2[, i], same_new2[, i]) + } + ## So, everything is identical, EXCEPT mzmin: + idx <- which(same_new2[, "mzmin"] != res_orig2[, "mzmin"]) + res_orig2[idx, "mzmin"] + same_new2[idx, "mzmin"] + ## Are the different ones 0? + checkTrue(all(res_orig2[idx, "mzmin"] == 0)) +} + +test_do_findChromPeaks_centWave <- function() { + ## xr <- xcmsRaw(fs[1], profstep = 0) + ## We expect that changing a parameter has an influence on the result. + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res1 <- do_findChromPeaks_centWave(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + snthresh = 200, + noise = 4000) + res2 <- do_findChromPeaks_centWave(mz = mzVals, + int = intVals, + scantime = xr@scantime, + valsPerSpect, + snthresh = 500, + noise = 4000) + checkTrue(nrow(res1) > nrow(res2)) + + ## Check scanrange on findPeaks.centWave. + res_1 <- findPeaks.centWave(xr, scanrange = c(90, 345), noise = 2000) + xr <- xr[90:345] + mzVals <- xr@env$mz + intVals <- xr@env$intensity + ## Define the values per spectrum: + valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) + res_2 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, + scantime = xr@scantime, valsPerSpect, + noise = 2000) + checkEquals(res_1@.Data, res_2) +} + +## Evaluate the peak detection method using the centWave method on +## OnDiskMSnExp and on MSnExp objects. +test_findChromPeaks_centWave <- function() { + ## Control + library(MSnbase) + ## xr <- xcmsRaw(fs[1], profstep = 0) + ppm <- 40 + snthresh <- 40 + res_x <- findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh, + noise = 100000)@.Data + ## Bypass xcmsRaw + xs <- xcmsSet(fs[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave", + noise = 100000) + checkEquals(xs@peaks[, colnames(res_x)], res_x) + ## OnDiskMSnExp + ## onDisk <- readMSData2(fs[1], msLevel. = 1) + cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh, noise = 100000) + res <- findChromPeaks(onDisk, param = cwp, return.type = "list") + checkEquals(res[[1]], peaks(xs)@.Data) + + ## ## MSnExp + ## inMem <- readMSData(f[1], msLevel. = 1) + ## suppressWarnings( + ## res_2 <- findChromPeaks(inMem, param = cwp, return.type = "list") + ## ) + ## checkEquals(res_2[[1]], peaks(xs)@.Data) + + ## returning an xcmsSet + res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") + checkEquals(peaks(res), peaks(xs)) + ## suppressWarnings( + ## res <- findChromPeaks(inMem, param = cwp, return.type = "xcmsSet") + ## ) + ## checkEquals(peaks(res), peaks(xs)) + + ## Return type XCMSnExp + res <- findChromPeaks(onDisk, param = cwp) + checkTrue(hasChromPeaks(res)) + checkTrue(!hasAdjustedRtime(res)) + checkTrue(!hasFeatures(res)) + checkEquals(peaks(xs)@.Data, chromPeaks(res)[, -ncol(chromPeaks(res))]) +} + +dontrun_test_benchmark_centWaves <- function() { + library(msdata) + f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") + library(microbenchmark) + library(MSnbase) + library(xcms) + ## + ## xr <- xcmsRaw(f[1], profstep = 0) + ppm <- 40 + snthresh <- 40 + + cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh) + ## onDisk <- readMSData2(f[1], msLevel. = 1) + register(SerialParam()) + system.time( + tmp <- findChromPeaks(onDisk, param = cwp) + ) ## 9.7sec + system.time( + tmp <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") + ) ## 12sec + system.time( + tmp <- xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave") + ) ## 11.99sec + + inMem <- readMSData(f[1], msLevel. = 1) + register(SerialParam()) + + ## findChromPeaks,MSnExp and findPeaks.centWave should be about similar. + microbenchmark(findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh), + findChromPeaks(inMem, param = cwp), times = 3) + ## findPeaks.centWave is about 1 second faster. + + ## findChromPeaks,OnDiskMSnExp and xcmsSet should be about similar. + microbenchmark(xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave"), + findChromPeaks(onDisk, param = cwp), + findChromPeaks(inMem, param = cwp), + times = 3) +} + +dontrun_test_benchmark_centWaves <- function() { + library(msdata) + f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") + library(microbenchmark) + library(MSnbase) + library(xcms) + ## + ## xr <- xcmsRaw(f[1], profstep = 0) + ppm <- 40 + snthresh <- 40 + + cwp <- CentWaveParam(ppm = ppm, snthresh = snthresh) + ## onDisk <- readMSData2(f[1], msLevel. = 1) + register(SerialParam()) + system.time( + tmp <- findChromPeaks(onDisk, param = cwp) + ) ## 9.7sec + system.time( + tmp <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") + ) ## 12sec + system.time( + tmp <- xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave") + ) ## 11.99sec + + inMem <- readMSData(f[1], msLevel. = 1) + register(SerialParam()) + + ## findChromPeaks,MSnExp and findPeaks.centWave should be about similar. + microbenchmark(findPeaks.centWave(xr, ppm = ppm, snthresh = snthresh), + findChromPeaks(inMem, param = cwp), times = 3) + ## findPeaks.centWave is about 1 second faster. + + ## findChromPeaks,OnDiskMSnExp and xcmsSet should be about similar. + microbenchmark(xcmsSet(f[1], profparam = list(profstep = 0), ppm = ppm, + snthresh = snthresh, method = "centWave"), + findChromPeaks(onDisk, param = cwp), + findChromPeaks(inMem, param = cwp), + times = 3) +} + + + + +############################################################ +## This is only relevant during development of the do_ function +## to evaluate that results are identical. +dontrun_test_do_findChromPeaks_centWave_impl <- function() { + + for (i in 1:length(fs)) { + ppm = 25 + peakwidth = c(20, 50) + snthresh = 10 + prefilter = c(3, 100) + mzCenterFun = "wMean" + integrate = 1 + mzdiff = -0.001 + fitgauss = FALSE + noise = 0 + verboseColumns = FALSE + + xr <- xcmsRaw(fs[i]) + + ## Default settings + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 14.6 sec + ## do_ : 13 sec + + ppm <- 10 + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 15 sec + ## do_ : 13.3 sec + + peakwidth <- c(3, 30) + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 11.4 sec + ## do_ : 9.5 sec + + snthresh <- 15 + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 10.6 sec + ## do_ : 8.8 sec + + fitgauss <- TRUE + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 12.5 sec + ## do_ : 10.7 sec + + verboseColumns <- TRUE + .runAndCompare(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) + ## xcms: 12.2 sec + ## do_ : 10.6 sec + } +} + +## That's to compare the functions in version 1.49.7. +.runAndCompare <- function(xr, ppm, peakwidth, snthresh, prefilter, mzCenterFun, + integrate, mzdiff, fitgauss, noise, verboseColumns) { + require(RUnit) + mz <- xr@env$mz + int <- xr@env$intensity + scantime <- xr@scantime + scanindex <- xr@scanindex + a <- system.time( + ## That's the method called inside do_... + xrDo <- xcms:::.centWave_orig(mz = mz, int = int, scantime = scantime, + valsPerSpect = diff(c(scanindex, length(mz))), + ppm = ppm, peakwidth = peakwidth, + snthresh = snthresh, + prefilter = prefilter, + mzCenterFun = mzCenterFun, + integrate = integrate, + mzdiff = mzdiff, + fitgauss = fitgauss, + noise = noise, + verboseColumns = verboseColumns) + ) ## 12.7 + ## Run the original centWave code on xcmsRaw: + b <- system.time( + xrPeaks <- xcms:::.findPeaks.centWave_orig(xr, + ppm = ppm, + peakwidth = peakwidth, + snthresh = snthresh, + prefilter = prefilter, + mzCenterFun = mzCenterFun, + integrate = integrate, + mzdiff = mzdiff, + fitgauss = fitgauss, + noise = noise, + verbose.columns = verboseColumns) + ) ## 15.4 + ## Compare. + cat("DO: ", a, "\n") + cat("XCMS: ", b, "\n") + if (!checkEquals(new("xcmsPeaks", xrDo), xrPeaks)) + stop("do_ and xcms yield different results!") +} + + +## Some speed tests. +.otherTest <- function() { + Testv <- c(2, 4.2, 34.1, 34.5, 6.4, 6.3, 1.2) + RforM <- matrix(nrow = 0, ncol = length(Testv)) + system.time( + for(i in 1:5000){ + RforM <- rbind(RforM, Testv) + } + ) ## 1.27 + ## with append to list. + RforL <- vector("list", 0) + system.time( + for(i in 1:5000){ + RforL <- c(RforL, Testv) + } + ) ## 1.12 + system.time( + RapplyL <- lapply(1:5000, function(z) {return(Testv)}) + ) ## 0.003 + RM <- matrix(nrow=5000, ncol = length(Testv)) + system.time( + for (i in 1:5000) { + RM[i, ] <- Testv + } + ) ## 0.006 + + ## Compare adding to list instead of adding to existing. [[]] + RexL <- vector("list", 5000) + system.time( + for (i in 1:5000){ + RexL[[i]] <- Testv + } + ) ## 0.005 + ## Dynamically... + RexL <- list() + system.time( + for (i in 1:5000){ + RexL[[i]] <- Testv + } + ) ## 0.005 + +} diff --git a/inst/unitTests/runit.do_detectFeatures_centWave_isotopes.R b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R similarity index 70% rename from inst/unitTests/runit.do_detectFeatures_centWave_isotopes.R rename to inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R index 5298128ce..adf493894 100644 --- a/inst/unitTests/runit.do_detectFeatures_centWave_isotopes.R +++ b/inst/unitTests/runit.do_findChromPeaks_centWave_isotopes.R @@ -10,20 +10,20 @@ mzVals <- xr@env$mz intVals <- xr@env$intensity ## f <- msdata::proteomics(full.names = TRUE, pattern = "TMT_Erwinia") -test_do_detectFeatures_centWaveWithPredIsoROIs <- function() { +test_do_findChromPeaks_centWaveWithPredIsoROIs <- function() { ## initial centWave: valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - feats_1 <- do_detectFeatures_centWave(mz = mzVals, int = intVals, + feats_1 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 1500, verboseColumns = TRUE) - feats_2 <- do_detectFeatures_addPredIsoROIs(mz = mzVals, + feats_2 <- do_findChromPeaks_addPredIsoROIs(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 1500, - features. = feats_1) - all_f <- do_detectFeatures_centWaveWithPredIsoROIs(mz = mzVals, + peaks. = feats_1) + all_f <- do_findChromPeaks_centWaveWithPredIsoROIs(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, @@ -34,9 +34,37 @@ test_do_detectFeatures_centWaveWithPredIsoROIs <- function() { ## checkEquals(all_f, old_all@.Data) } -## Evaluate the featureDetection method using the centWaveWithPreIsoROIs method +## Check the influence of the modified centWave on the isotope centWave +dontrun_test_original_new_centWave_isotopes <- function() { + fl <- system.file("cdf/ko15.CDF", package = "msdata") + raw <- readMSData2(fl) + ## Default settings + cwp <- CentWaveParam(verboseColumns = TRUE) + options(originalCentWave = TRUE) + orig <- findChromPeaks(raw, param = cwp) + + ## Do the isoROIs. + mz <- mz(raw) + vps <- lengths(mz) + options(originalCentWave = TRUE) + iso_orig <- xcms:::do_findChromPeaks_addPredIsoROIs_mod(mz = unlist(mz), + int = unlist(intensity(raw)), + scantime = rtime(raw), + valsPerSpect = vps, + peaks. = chromPeaks(orig)) + options(originalCentWave = FALSE) + iso_modi <- xcms:::do_findChromPeaks_addPredIsoROIs_mod(mz = unlist(mz), + int = unlist(intensity(raw)), + scantime = rtime(raw), + valsPerSpect = vps, + peaks. = chromPeaks(orig)) + + ## LLLL How do peaks look like when they have an mz width of 0? +} + +## Evaluate the peak detection method using the centWaveWithPreIsoROIs method ## on OnDiskMSnExp and on MSnExp objects. -test_detectFeatures_centWaveWithPredIsoROIs <- function() { +test_findChromPeaks_centWaveWithPredIsoROIs <- function() { ## Control library(MSnbase) ##ppm <- 40 @@ -55,33 +83,33 @@ test_detectFeatures_centWaveWithPredIsoROIs <- function() { onDisk <- readMSData2(fs[1], msLevel. = 1) cwp <- CentWavePredIsoParam(snthresh = snth, noise = ns, snthreshIsoROIs = snthIso) - res <- detectFeatures(onDisk, param = cwp, return.type = "list") + res <- findChromPeaks(onDisk, param = cwp, return.type = "list") checkEquals(res[[1]], peaks(xs)@.Data) ## ## MSnExp ## inMem <- readMSData(fs[1], msLevel. = 1) - ## res_2 <- detectFeatures(inMem, param = cwp, return.type = "list") + ## res_2 <- findChromPeaks(inMem, param = cwp, return.type = "list") ## checkEquals(res_2[[1]], peaks(xs)@.Data) ## returning an xcmsSet - res <- detectFeatures(onDisk, param = cwp, return.type = "xcmsSet") + res <- findChromPeaks(onDisk, param = cwp, return.type = "xcmsSet") checkEquals(peaks(res), peaks(xs)) - ## res <- detectFeatures(inMem, param = cwp, return.type = "xcmsSet") + ## res <- findChromPeaks(inMem, param = cwp, return.type = "xcmsSet") ## checkEquals(peaks(res), peaks(xs)) ## Return an XCMSnExp - res <- detectFeatures(onDisk, param = cwp) - checkTrue(hasDetectedFeatures(res)) + res <- findChromPeaks(onDisk, param = cwp) + checkTrue(hasChromPeaks(res)) checkTrue(!hasAdjustedRtime(res)) - checkTrue(!hasAlignedFeatures(res)) - checkEquals(peaks(xs)@.Data, features(res)) + checkTrue(!hasFeatures(res)) + checkEquals(peaks(xs)@.Data, chromPeaks(res)[, colnames(peaks(xs)@.Data)]) ## Check on the full data. ## xs <- xcmsSet(fs, profparam = list(profstep = 0), snthresh = snth, ## method = "centWaveWithPredictedIsotopeROIs", noise = ns, ## snthreshIsoROIs = snthIso) ## onDisk <- readMSData2(fs, msLevel. = 1) - ## res <- detectFeatures(onDisk, param = cwp) + ## res <- findChromPeaks(onDisk, param = cwp) ## checkEquals(features(res), peaks(xs)@.Data) } @@ -90,15 +118,15 @@ test_detectFeatures_centWaveWithPredIsoROIs <- function() { dontrun_test_impl_centWave_add <- function() { ## Using the do functions: valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - do_1 <- do_detectFeatures_centWave(mz = mzVals, int = intVals, + do_1 <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, verboseColumns = TRUE) - do_2 <- do_detectFeatures_addPredIsoROIs(mz = mzVals, int = intVals, + do_2 <- do_findChromPeaks_addPredIsoROIs(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, - features. = do_1) - do_3 <- do_detectFeatures_centWaveWithPredIsoROIs(mz = mzVals, int = intVals, + peaks. = do_1) + do_3 <- do_findChromPeaks_centWaveWithPredIsoROIs(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect) checkEquals(do_2, do_3) @@ -116,7 +144,7 @@ dontrun_test_impl_centWave_add <- function() { checkEquals(fp_2, xs_2) ## - do_4 <- do_detectFeatures_centWaveWithPredIsoROIs(mz = mzVals, int = intVals, + do_4 <- do_findChromPeaks_centWaveWithPredIsoROIs(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 500, diff --git a/inst/unitTests/runit.do_detectFeatures_massifquant.R b/inst/unitTests/runit.do_findChromPeaks_massifquant.R similarity index 92% rename from inst/unitTests/runit.do_detectFeatures_massifquant.R rename to inst/unitTests/runit.do_findChromPeaks_massifquant.R index 13443d2b4..633c7cd83 100644 --- a/inst/unitTests/runit.do_detectFeatures_massifquant.R +++ b/inst/unitTests/runit.do_findChromPeaks_massifquant.R @@ -1,5 +1,5 @@ ############################################################ -## do_detectFeatures_massifquant tests +## do_findChromPeaks_massifquant tests fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), system.file('cdf/KO/ko16.CDF', package = "faahKO"), system.file('cdf/KO/ko18.CDF', package = "faahKO"), @@ -15,18 +15,18 @@ mzf <- c(system.file("microtofq/MM14.mzML", package = "msdata"), ############################################################ ## Simple test comparing results from various massifquant runs and ## centWave analyses. -test_do_detectFeatures_massifquant <- function() { +test_do_findChromPeaks_massifquant <- function() { res <- findPeaks.massifquant(xr, snthresh = 100) mz <- xr@env$mz int <- xr@env$intensity valsPerSpect <- diff(c(xr@scanindex, length(mz))) scantime <- xr@scantime - res_2 <- do_detectFeatures_massifquant(mz = mz, int = int, + res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, valsPerSpect = valsPerSpect, scantime = scantime) checkEquals(res@.Data, res_2) ## With centWave: - res_3 <- do_detectFeatures_massifquant(mz = mz, int = int, + res_3 <- do_findChromPeaks_massifquant(mz = mz, int = int, valsPerSpect = valsPerSpect, scantime = scantime, withWave = TRUE, snthresh = 100, noise = 4000) @@ -42,36 +42,36 @@ test_do_detectFeatures_massifquant <- function() { int <- xsub@env$intensity valsPerSpect <- diff(c(xsub@scanindex, length(mz))) scantime <- xsub@scantime - res_2 <- do_detectFeatures_massifquant(mz = mz, int = int, + res_2 <- do_findChromPeaks_massifquant(mz = mz, int = int, valsPerSpect = valsPerSpect, scantime = scantime) checkIdentical(res_1@.Data, res_2) } -## Evaluate the featureDetection method using massifquant on MSnExp and +## Evaluate the peak detection method using massifquant on MSnExp and ## OnDiskMSnExp objects. -test_featureDetection_massifquant <- function() { +test_findChromPeaks_massifquant <- function() { library(MSnbase) mqp <- MassifquantParam(ppm = 20, criticalValue = 1.2) res <- xcmsSet(mzf[1], method = "massifquant", ppm = 20, criticalValue = 1.2) ## onDisk onDisk <- readMSData2(mzf[1]) - res_o <- detectFeatures(onDisk, param = mqp, return.type = "xcmsSet") + res_o <- findChromPeaks(onDisk, param = mqp, return.type = "xcmsSet") checkEquals(peaks(res_o), peaks(res)) checkEquals(res_o@rt$raw, res@rt$raw, checkNames = FALSE) ## Full data ## onDisk <- readMSData2(mzf) - ## res <- detectFeatures(onDisk, param = mqp) + ## res <- findChromPeaks(onDisk, param = mqp) ## xs <- xcmsSet(mzf, method = "massifquant", ppm = 20, criticalValue = 1.2) - ## checkTrue(hasDetectedFeatures(res)) + ## checkTrue(hasChromPeaks(res)) ## checkTrue(!hasAdjustedRtime(res)) - ## checkTrue(!hasAlignedFeatures(res)) - ## checkEquals(peaks(xs)@.Data, features(res)) + ## checkTrue(!hasFeatures(res)) + ## checkEquals(peaks(xs)@.Data, chromPeaks(res)) ## inMem ## inMem <- readMSData(mzf[1], msLevel. = 1) - ## res_i <- detectFeatures(inMem, param = mqp, return.type = "xcmsSet") + ## res_i <- findChromPeaks(inMem, param = mqp, return.type = "xcmsSet") ## checkEquals(peaks(res_i), peaks(res)) ## checkEquals(res_i@rt$raw, res@rt$raw, checkNames = FALSE) } @@ -80,7 +80,7 @@ test_featureDetection_massifquant <- function() { ############################################################ ## Test the implementation of the "do" function, i.e. whether ## the results are the same between versions and implementations. -dontrun_test_do_detectFeatures_massifquant_impl <- function() { +dontrun_test_do_findChromPeaks_massifquant_impl <- function() { library(xcms) library(RUnit) @@ -110,7 +110,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit <- 2 unions <- 1 withWave <- 0 - ## Now, this method calls do_detectFeatures_massifquant. + ## Now, this method calls do_findChromPeaks_massifquant. a <- findPeaks.massifquant(xr, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, criticalValue = criticalValue, @@ -125,7 +125,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { ## LLL: compare the _orig method ## 1) check if scanrange works between both. ## 2) compare the orig_ with the do - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -156,7 +156,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a@.Data, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -184,7 +184,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -211,7 +211,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -238,7 +238,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a@.Data, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -265,7 +265,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a@.Data, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, @@ -292,7 +292,7 @@ dontrun_test_do_detectFeatures_massifquant_impl <- function() { consecMissedLimit = consecMissedLimit, unions = unions, withWave = withWave) checkEquals(a, b) - d <- do_detectFeatures_massifquant(mz, int, scantime = scantime, + d <- do_findChromPeaks_massifquant(mz, int, scantime = scantime, valsPerSpect = valsPerSpect, ppm = ppm, peakwidth = peakwidth, snthresh = snthresh, diff --git a/inst/unitTests/runit.do_detectFeatures_matchedFilter.R b/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R similarity index 95% rename from inst/unitTests/runit.do_detectFeatures_matchedFilter.R rename to inst/unitTests/runit.do_findChromPeaks_matchedFilter.R index 07e776c1b..aaac3ad79 100644 --- a/inst/unitTests/runit.do_detectFeatures_matchedFilter.R +++ b/inst/unitTests/runit.do_findChromPeaks_matchedFilter.R @@ -1,4 +1,4 @@ -## Testo detectFeatures matchedFilter +## Testo findChromPeaks matchedFilter ## library(xcms) ## library(RUnit) @@ -13,7 +13,7 @@ fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), ## system.file("microtofq/MM8.mzML", package = "msdata")) -test_do_detectFeatures_matchedFilter <- function() { +test_do_findChromPeaks_matchedFilter <- function() { ## xr <- xcmsRaw(fs[1], profstep = 0) xr <- deepCopy(faahko_xr_1) ## We expect that changing a parameter has an influence on the result. @@ -21,19 +21,19 @@ test_do_detectFeatures_matchedFilter <- function() { intVals <- xr@env$intensity ## Define the values per spectrum: valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) - res1 <- do_detectFeatures_matchedFilter(mz = mzVals, + res1 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect, binSize = 10) - res2 <- do_detectFeatures_matchedFilter(mz = mzVals, + res2 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect, binSize = 10, snthresh = 100) checkTrue(nrow(res1) > nrow(res2)) - res2 <- do_detectFeatures_matchedFilter(mz = mzVals, + res2 <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect, @@ -41,10 +41,10 @@ test_do_detectFeatures_matchedFilter <- function() { checkTrue(nrow(res1) > nrow(res2)) } -## Evaluate the featureDetection method using matchedFilter on MSnExp and +## Evaluate the peak detection method using matchedFilter on MSnExp and ## OnDiskMSnExp objects. For now we can't read CDF files, so we have to restrict ## to provided mzML files. -test_featureDetection_matchedFilter <- function() { +test_findChromPeaks_matchedFilter <- function() { library(MSnbase) mfp <- MatchedFilterParam(binSize = 20, impute = "lin") res <- xcmsSet(fs[1], method = "matchedFilter", profmethod = "binlin", @@ -52,23 +52,23 @@ test_featureDetection_matchedFilter <- function() { ## onDisk ## onDisk <- readMSData2(fs[1]) onDisk <- filterFile(faahko_od, file = 1) - res_o <- detectFeatures(onDisk, param = mfp, return.type = "xcmsSet") + res_o <- findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet") checkEquals(peaks(res_o), peaks(res)) checkEquals(res_o@rt$raw, res@rt$raw, checkNames = FALSE) ## inMem ## inMem <- readMSData(mzf, msLevel. = 1) - ## res_i <- detectFeatures(inMem, param = mfp, return.type = "xcmsSet") + ## res_i <- findChromPeaks(inMem, param = mfp, return.type = "xcmsSet") ## checkEquals(peaks(res_i), peaks(res)) ## checkEquals(res_i@rt$raw, res@rt$raw, checkNames = FALSE) ## xs <- xcmsSet(fs, , method = "matchedFilter", profmethod = "binlin", ## step = binSize(mfp)) ## onDisk <- readMSData2(fs) - ## res <- detectFeatures(onDisk, param = mfp) - ## checkTrue(hasDetectedFeatures(res)) + ## res <- findChromPeaks(onDisk, param = mfp) + ## checkTrue(hasChromPeaks(res)) ## checkTrue(!hasAdjustedRtime(res)) - ## checkTrue(!hasAlignedFeatures(res)) - ## checkEquals(peaks(xs)@.Data, features(res)) + ## checkTrue(!hasFeatures(res)) + ## checkEquals(peaks(xs)@.Data, chromPeaks(res)) ## checkEquals(processParam(processHistory(res)[[1]]), mfp) } @@ -81,16 +81,16 @@ dontrun_benchmark_detecfFeatures_matchedFilter <- function() { inMem <- readMSData(mzf, msLevel. = 1) microbenchmark(xcmsSet(mzf, method = "matchedFilter", profmethod = "binlin", step = binSize(mfp)), - detectFeatures(onDisk, param = mfp, return.type = "xcmsSet"), - detectFeatures(inMem, param = mfp, return.type = "xcmsSet"), + findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet"), + findChromPeaks(inMem, param = mfp, return.type = "xcmsSet"), times = 3) ## netCDF. onDisk <- readMSData2(fs) inMem <- readMSData(fs, msLevel. = 1) microbenchmark(xcmsSet(fs, method = "matchedFilter", profmethod = "binlin", step = binSize(mfp)), - detectFeatures(onDisk, param = mfp, return.type = "xcmsSet"), - detectFeatures(inMem, param = mfp, return.type = "xcmsSet"), + findChromPeaks(onDisk, param = mfp, return.type = "xcmsSet"), + findChromPeaks(inMem, param = mfp, return.type = "xcmsSet"), times = 3) } @@ -98,7 +98,7 @@ dontrun_benchmark_detecfFeatures_matchedFilter <- function() { ## Compare each individual function to the original one changing ## settings. ## Comparing each of the functions to the original one: -## A: do_detectFeatures_matchedFilter (original code) +## A: do_findChromPeaks_matchedFilter (original code) ## B: .matchedFilter_binYonX_iter ## C: .matchedFilter_no_iter ## D: .matchedFilter_binYonX_no_iter @@ -106,7 +106,7 @@ dontrun_benchmark_detecfFeatures_matchedFilter <- function() { ## https://github.com/sneumann/xcms/issues/47 ## A description of the results is provided in section "Implementation and ## comparison for matchedFilter" section of "new_functionality.org". -dontrun_test_do_detectFeatures_matchedFilter_impl <- function() { +dontrun_test_do_findChromPeaks_matchedFilter_impl <- function() { library(xcms) library(RUnit) @@ -120,7 +120,7 @@ dontrun_test_do_detectFeatures_matchedFilter_impl <- function() { cat("Comparison of results from different implementations:\n") cat("- orig: the original findPeaks.matchedFilter method.\n") - cat("- A: do_detectFeatures_matchedFilter (containing original code).\n") + cat("- A: do_findChromPeaks_matchedFilter (containing original code).\n") cat(paste0("- B: .matchedFilter_binYonX_iter: new function using binYonX", " for binning and imputeLinInterpol for interpolation. Uses", " iterative buffering like the original code.")) @@ -360,7 +360,7 @@ dontrun_test_do_detectFeatures_matchedFilter_impl <- function() { xr@profparam <- profparam ## The reference is the old code. ## Have to use the _orig method here, since the "official" one uses - ## already do_detectFeatures... + ## already do_findChromPeaks... orig <- xcms:::findPeaks.matchedFilter_orig(xr, fwhm = fwhm, sigma = sigma, diff --git a/inst/unitTests/runit.do_groupChromPeaks.R b/inst/unitTests/runit.do_groupChromPeaks.R new file mode 100644 index 000000000..a4f8a3470 --- /dev/null +++ b/inst/unitTests/runit.do_groupChromPeaks.R @@ -0,0 +1,492 @@ +## Unit tests for all do_groupChromPeaks_* functions and methods/functions related +## to feature grouping. + +## General functions/methods +test_featureValues_XCMSnExp <- function() { + od_x <- faahko_xod + xs <- faahko_xs + + p <- PeakDensityParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = p) + + xs <- group(xs, method = "density") + + checkEquals(unname(groupval(xs, value = "into")), + unname(featureValues(od_x, value = "into"))) + checkEquals(unname(groupval(xs, method = "maxint", value = "into")), + unname(featureValues(od_x, method = "maxint", value = "into"))) + ## Checking errors + checkException(featureValues(od_x, value = "bla")) + +} + +############################################################ +## density +## + +test_groupChromPeaks_PeakDensityParam <- function() { + od_x <- faahko_xod + xs <- faahko_xs + + fdp <- PeakDensityParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = fdp) + xs <- group(xs, method = "density") + checkEquals(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(xs@groups, fg) + checkTrue(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), fdp) + checkEquals(rownames(featureDefinitions(od_x)), + xcms:::.featureIDs(nrow(featureDefinitions(od_x)))) + + fdp2 <- PeakDensityParam(sampleGroups = xs$class, binSize = 2, + minFraction = 0.8) + od_x <- groupChromPeaks(od_x, param = fdp2) + xs <- group(xs, method = "density", minfrac = 0.8, mzwid = 2) + checkEquals(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(xs@groups, fg) + checkTrue(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), fdp2) + checkEquals(rownames(featureDefinitions(od_x)), + xcms:::.featureIDs(nrow(featureDefinitions(od_x)))) +} + +test_do_groupChromPeaks_density <- function() { + fts <- peaks(faahko) + res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) + res_2 <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko), + minFraction = 0.9) + checkTrue(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) +} + +dontrun_do_groupChromPeaks_density_parallel <- function() { + library(xcms) + library(faahKO) + library(RUnit) + data(faahko) + fts <- peaks(faahko) + + res <- xcms:::do_groupChromPeaks_density_par(fts, sampclass(faahko)) + res_2 <- do_groupChromPeaks_density(fts, sampclass(faahko)) + checkEquals(res$featureDefinitions, res_2$featureDefinitions) + checkEquals(res$peakIndex, res_2$peakIndex) + + res <- xcms:::do_groupChromPeaks_density_par(fts, sampclass(faahko), bw = 10) + res_2 <- do_groupChromPeaks_density(fts, sampclass(faahko), bw = 10) + checkEquals(res$featureDefinitions, res_2$featureDefinitions) + checkEquals(res$peakIndex, res_2$peakIndex) + + res <- xcms:::do_groupChromPeaks_density_par(fts, sampclass(faahko), + minFraction = 0.9) + res_2 <- do_groupChromPeaks_density(fts, sampclass(faahko), minFraction = 0.9) + checkEquals(res$featureDefinitions, res_2$featureDefinitions) + checkEquals(res$peakIndex, res_2$peakIndex) +} + +## This is to ensure that the do_groupChromPeaks_density yields identical results +## than the group.density method. Once we're sure of that we rename this to +## "dontrun" and replace the code within the group.density method. +dontrun_do_groupChromPeaks_density_compare <- function() { + xs <- faahko + + fts <- peaks(xs) + ## o Default + smpGrp <- sampclass(xs) + theBw <- 30 + minFr <- 0.5 + minSmp <- 1 + mzW <- 0.25 + maxFts <- 50 + ftGrps <- xcms:::do_groupChromPeaks_density(peaks = fts, + sampleGroups = smpGrp, + bw = theBw, + minFraction = minFr, + minSamples = minSmp, + binSize = mzW, + maxFeatures = maxFts) + xs$class <- smpGrp + res_x <- group.density(xs, bw = theBw, minfrac = minFr, + minsamp = minSmp, mzwid = mzW, max = maxFts) + checkEquals(res_x@groups, ftGrps$featureDefinitions) + checkEquals(res_x@groupidx, ftGrps$peakIndex) + ## o All one group, different bw + smpGrp <- rep(1, length(filepaths(xs))) + theBw <- 10 + minFr <- 0.5 + minSmp <- 1 + mzW <- 0.25 + maxFts <- 50 + ftGrps <- xcms:::do_groupChromPeaks_density(peaks = fts, + sampleGroups = smpGrp, + bw = theBw, + minFraction = minFr, + minSamples = minSmp, + binSize = mzW, + maxFeatures = maxFts) + xs$class <- smpGrp + res_x <- group.density(xs, bw = theBw, minfrac = minFr, + minsamp = minSmp, mzwid = mzW, max = maxFts) + checkEquals(res_x@groups, ftGrps$featureDefinitions) + checkEquals(res_x@groupidx, ftGrps$peakIndex) + ## o Three groups, minfrac + smpGrp <- c(1, 1, 2, 2, 3, 3, 3, 3, 2, 2, 3, 3) + theBw <- 30 + minFr <- 0.3 + minSmp <- 1 + mzW <- 0.4 + maxFts <- 50 + ftGrps <- xcms:::do_groupChromPeaks_density(peaks = fts, + sampleGroups = smpGrp, + bw = theBw, + minFraction = minFr, + minSamples = minSmp, + binSize = mzW, + maxFeatures = maxFts) + xs$class <- smpGrp + res_x <- group.density(xs, bw = theBw, minfrac = minFr, + minsamp = minSmp, mzwid = mzW, max = maxFts) + checkEquals(res_x@groups, ftGrps$featureDefinitions) + checkEquals(res_x@groupidx, ftGrps$peakIndex) + ## o change also minSmp and maxFts + smpGrp <- c(1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3) + theBw <- 30 + minFr <- 0.3 + minSmp <- 2 + mzW <- 0.4 + maxFts <- 10 + ftGrps <- xcms:::do_groupChromPeaks_density(peaks = fts, + sampleGroups = smpGrp, + bw = theBw, + minFraction = minFr, + minSamples = minSmp, + binSize = mzW, + maxFeatures = maxFts) + xs$class <- smpGrp + res_x <- group.density(xs, bw = theBw, minfrac = minFr, + minsamp = minSmp, mzwid = mzW, max = maxFts) + checkEquals(res_x@groups, ftGrps$featureDefinitions) + checkEquals(res_x@groupidx, ftGrps$peakIndex) +} + + +dontrun_groupChromPeaks_density_implementation <- function() { + library(faahKO) + data("faahko") + ## 1) check whether the bins are really overlapping. + pks <- peaks(faahko) + pks <- pks[order(pks[, "mz"]), ] + mzWid <- 2 + mass <- seq(pks[1, "mz"], pks[nrow(pks), "mz"] + mzWid, by = mzWid / 2) + ## masspos is the index in pks for the feature with an mz value >= mass + masspos <- xcms:::findEqualGreaterM(pks[, "mz"], mass) + ## My approach: + idx <- findInterval(pks[, "mz"], mass) + ## That's not working with overlapping though. + pks_my <- split.data.frame(pks, f = idx) + + ## Test 2: + didx <- diff(idx) + nds <- c(which(didx > 0), nrow(pks)) + strt <- c(1, nds[-length(nds)] + 1) + ## Fix the starts, if diff is equals to one include that too. + pks_2 <- mapply(strt, nds, FUN = function(a, b) { + return(pks[a:b, , drop = FALSE]) + }) + library(RUnit) + checkEquals(unname(pks_2), unname(pks_my)) + + ## Idea is to add also the data from the previous bin, if its only an index + ## of one away + tmp <- pks_my + nameNum <- as.numeric(names(pks_my)) + for (i in 2:length(tmp)) { + if ((nameNum[i] - nameNum[i - 1]) == 1) + tmp[[i]] <- rbind(pks_my[[i-1]], pks_my[[i]]) + } + + ## The overlapping mz ranges is tricky - eventually just stick with original + ## code. + + ## xcms: + res_x <- vector("list", length(mass)) + idxs <- res_x + for (i in seq(length = length(mass) - 2)) { + startidx <- masspos[i] + endidx <- masspos[i + 2] - 1 + if (endidx - startidx < 0) + next + idxs[[i]] <- c(startidx, endidx) + res_x[[i]] <- pks[startidx:endidx, , drop = FALSE] + } + res_x <- res_x[lengths(res_x) > 0] + idxs <- idxs[lengths(idxs) > 0] +} + + +############################################################ +## mzClust +## +## library(msdata) +## fticrf <- list.files(system.file("fticr", package = "msdata"), +## recursive = TRUE, full.names = TRUE) + +## ## old +## ## new +## fticr_od <- readMSData2(fticrf[1:2], msLevel. = 1) +## p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, +## SNR.method = "data.mean", winSize.noise = 500) +## fticr_xod <- findChromPeaks(fticr_od, param = p) + +test_do_groupPeaks_mzClust <- function() { + fts <- peaks(fticr_xs) + res <- do_groupPeaks_mzClust(peaks = fts, + sampleGroups = sampclass(fticr_xs)) + res_2 <- do_groupPeaks_mzClust(peaks = fts, + sampleGroups = sampclass(fticr_xs), + minFraction = 0, absMz = 2) + checkTrue(nrow(res$featureDefinitions) > nrow(res_2$featureDefinitions)) + + res_x <- group(fticr_xs, method = "mzClust") + checkEquals(res_x@groups, res$featureDefinitions) + checkEquals(res_x@groupidx, res$peakIndex) +} + +## This is to compare the function to the group.mzClust method. Once all is fine +## rename it to "dontrun" +dontrun_test_groupPeaks_mzClust_compare <- function() { + library(RUnit) + library(xcms) + library(msdata) + mzdatapath <- system.file("fticr", package = "msdata") + mzdatafiles <- list.files(mzdatapath, recursive = TRUE, full.names = TRUE) + + ## old + xs <- xcmsSet(method="MSW", files=mzdatafiles, scales=c(1,7), + SNR.method='data.mean' , winSize.noise=500, + peakThr=80000, amp.Th=0.005) + xsg <- group(xs, method="mzClust") + + ## new + od <- readMSData2(mzdatafiles, msLevel. = 1) + p <- MSWParam(scales = c(1, 7), ampTh = 0.005, peakThr = 80000, + SNR.method = 'data.mean', winSize.noise = 500) + xod <- findChromPeaks(od, param = p) + res <- do_groupPeaks_mzClust(chromPeaks(xod), sampleGroups = sampclass(xs)) + + checkEquals(peaks(xs), chromPeaks(xod)) + checkEquals(res$featureDefinitions, xsg@groups) + checkEquals(res$peakIndex, xsg@groupidx) + + ## Check with different class ordering! + sc_orig <- sampclass(xs) + sc <- c(2, 2, 1, 1, 4, 4, 4, 3, 3, 3) + sampclass(xs) <- factor(sc) + xsg <- group(xs, method="mzClust", minfrac = 0.2) + res <- do_groupPeaks_mzClust(chromPeaks(xod), sampleGroups = sc, + minFraction = 0.2) + checkEquals(res$featureDefinitions, xsg@groups) + checkEquals(res$peakIndex, xsg@groupidx) + + sc <- c("z", "z", "b", "a", "a", "a", "z", "b", "e", "e") + sampclass(xs) <- factor(sc) + xsg <- group(xs, method="mzClust", minfrac = 0.2, mzppm = 40) + res <- do_groupPeaks_mzClust(chromPeaks(xod), sampleGroups = sc, + minFraction = 0.1, ppm = 40) + checkEquals(res$featureDefinitions, xsg@groups) + checkEquals(res$peakIndex, xsg@groupidx) + + xsg <- group(xs, method="mzClust", minfrac = 0.2, mzppm = 40, mzabs = 0.1) + res <- do_groupPeaks_mzClust(chromPeaks(xod), sampleGroups = sc, + minFraction = 0.1, ppm = 40, absMz = 0.1) + checkEquals(res$featureDefinitions, xsg@groups) + checkEquals(res$peakIndex, xsg@groupidx) +} + +test_groupPeaks_MzClustParam <- function() { + + p <- MzClustParam(sampleGroups = sampclass(fticr_xs)) + + fticr_xod2 <- groupChromPeaks(fticr_xod, param = p) + fticr_xs2 <- group(fticr_xs, method = "mzClust") + checkEquals(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) + fg <- featureDefinitions(fticr_xod2) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(fticr_xs2@groups, fg) + checkTrue(length(processHistory(fticr_xod2)) == 2) + ph <- processHistory(fticr_xod2, + type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), p) + checkEquals(rownames(featureDefinitions(fticr_xod2)), + xcms:::.featureIDs(nrow(featureDefinitions(fticr_xod2)))) + + p2 <- MzClustParam(sampleGroups = fticr_xs$class, absMz = 1, + minFraction = 0.8) + fticr_xod2 <- groupChromPeaks(fticr_xod, param = p2) + fticr_xs2 <- group(fticr_xs, method = "mzClust", minfrac = 0.8, mzabs = 1) + checkEquals(fticr_xs2@groupidx, featureDefinitions(fticr_xod2)$peakidx) + fg <- featureDefinitions(fticr_xod2) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(fticr_xs2@groups, fg) + checkTrue(length(processHistory(fticr_xod2)) == 2) + ph <- processHistory(fticr_xod2, + type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), p2) + checkEquals(rownames(featureDefinitions(fticr_xod2)), + xcms:::.featureIDs(nrow(featureDefinitions(fticr_xod2)))) +} + +############################################################ +## nearest +## + +test_do_groupChromPeaks_nearest <- function() { + xs <- faahko + features <- peaks(xs) + sampleGroups <- sampclass(xs) + mzVsRtBalance <- 10 + mzCheck <- 0.2 + rtCheck <- 15 + kNN <- 10 + + res <- do_groupChromPeaks_nearest(features, sampleGroups) + res_2 <- do_groupChromPeaks_nearest(features, sampleGroups, absRt = 3) + checkTrue(nrow(res$featureDefinitions) < nrow(res_2$featureDefinitions)) + res_x <- group(xs, method = "nearest") + checkEquals(res_x@groups, res$featureDefinitions) +} + +test_groupChromPeaks_NearestPeaksParam <- function() { + od_x <- faahko_xod + xs <- faahko_xs + + p <- NearestPeaksParam(sampleGroups = xs$class) + od_x <- groupChromPeaks(od_x, param = p) + xs <- group(xs, method = "nearest") + checkEquals(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(xs@groups, fg) + checkTrue(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), p) + checkEquals(rownames(featureDefinitions(od_x)), + xcms:::.featureIDs(nrow(featureDefinitions(od_x)))) + + fdp2 <- NearestPeaksParam(sampleGroups = xs$class, kNN = 3) + od_x <- groupChromPeaks(od_x, param = fdp2) + xs <- group(xs, method = "nearest", kNN = 3) + checkEquals(xs@groupidx, featureDefinitions(od_x)$peakidx) + fg <- featureDefinitions(od_x) + fg <- S4Vectors::as.matrix(fg[, -ncol(fg)]) + rownames(fg) <- NULL + checkEquals(xs@groups, fg) + checkTrue(length(processHistory(od_x)) == 2) + ph <- processHistory(od_x, type = xcms:::.PROCSTEP.PEAK.GROUPING)[[1]] + checkEquals(processParam(ph), fdp2) + checkEquals(rownames(featureDefinitions(od_x)), + xcms:::.featureIDs(nrow(featureDefinitions(od_x)))) +} + + +## That's to ensure that the do_ function yields identical results than the +## group.nearest method. Once we've replaced the code in the latter we rename +## this function to "dontrun". +dontrun_test_nearest_impl <- function() { + library(RUnit) + library(xcms) + library(faahKO) + data(faahko) + xs <- faahko + features <- peaks(xs) + sampleGroups <- sampclass(xs) + mzVsRtBalance <- 10 + absMz <- 0.2 + absRt <- 15 + kNN <- 10 + + res <- group(xs, method = "nearest") ## Oh, nasty warnings! These were + ## already there in version 1.51.0! + ## 1.48.0: Yup. + res_2 <- do_groupChromPeaks_nearest(features, sampleGroups) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sampleGroups) + + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + ## change sample grouping. + sc <- c("b", "b", "a", "a", "z", "z", "a", "b", "e", "e", "e", "e") + sampclass(xs) <- sc + ## levels are NOT ordered. + res <- group(xs, method = "nearest") + res_2 <- do_groupChromPeaks_nearest(features, sc) + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sc) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + ## Use sample assignment with ordered levels. + sampclass(xs) <- factor(sc) ## this re-orders levels! + res_3 <- group(xs, method = "nearest") + res_4 <- do_groupChromPeaks_nearest(features, factor(sc)) + checkEquals(res_3@groups, res@groups) + ## Now, the do functions does NOT re-order levels! + checkEquals(res@groups[, 1:7], res_4$featureDefinitions[, 1:7]) + checkEquals(res@groups[, levels(factor(sc))], + res_4$featureDefinitions[, levels(factor(sc))]) + checkEquals(res@groupidx, res_4$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, factor(sc)) + checkEquals(res_n$peakIndex, res_4$peakIndex) + checkEquals(res_n$featureDefinitions, res_4$featureDefinitions) + + ## Now change settings. + sampclass(xs) <- sc + res <- group(xs, method = "nearest", mzVsRTbalance = 5) + res_2 <- do_groupChromPeaks_nearest(features, sc, mzVsRtBalance = 5) + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sc, mzVsRtBalance = 5) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + res <- group(xs, method = "nearest", kNN = 3) + res_2 <- do_groupChromPeaks_nearest(features, sc, kNN = 3) + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sc, kNN) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + res <- group(xs, method = "nearest", mzCheck = 0.5) + res_2 <- do_groupChromPeaks_nearest(features, sc, absMz = 0.5) + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sc, absMz = 0.5) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + res <- group(xs, method = "nearest", rtCheck = 3) + res_2 <- do_groupChromPeaks_nearest(features, sc, absRt = 3) + checkEquals(res@groups, res_2$featureDefinitions) + checkEquals(res@groupidx, res_2$peakIndex) + res_n <- xcms:::do_groupChromPeaks_nearest_mod(features, sc, absRt = 3) + checkEquals(res_n$peakIndex, res_2$peakIndex) + checkEquals(res_n$featureDefinitions, res_2$featureDefinitions) + + ## library(profvis) + ## profvis({ + ## res_2 <- xcms:::do_groupChromPeaks_nearest(features, sampleGroups) + ## }) +} diff --git a/inst/unitTests/runit.fillChromPeaks.R b/inst/unitTests/runit.fillChromPeaks.R new file mode 100644 index 000000000..97e7f3023 --- /dev/null +++ b/inst/unitTests/runit.fillChromPeaks.R @@ -0,0 +1,699 @@ +test_fillChromPeaks <- function() { + ## No adjusted retention times + checkTrue(!xcms:::.hasFilledPeaks(xod_xg)) + res <- fillChromPeaks(xod_xg) + checkTrue(xcms:::.hasFilledPeaks(res)) + ph <- processHistory(res, type = xcms:::.PROCSTEP.PEAK.FILLING) + checkTrue(length(ph) == 1) + checkEquals(ph[[1]]@param, FillChromPeaksParam()) + ## Check parameter filled in featureValues (issue #157) + checkEquals(featureValues(res, filled = FALSE), featureValues(xod_xg)) + + ## Check if the signal corresponds to what we expect for some peaks. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 5) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(xod_xg, file = cfp[1, "sample"]) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Plot the data for some... + if (FALSE) { + pk_idx <- featureValues(res)[1, ] + pks <- chromPeaks(res)[pk_idx, ] + rtr <- c(min(pks[, "rtmin"]), max(pks[, "rtmax"])) + rtr[1] <- rtr[1] - 10 + rtr[2] <- rtr[2] + 10 + chrs <- extractChromatograms(res, rt = rtr, mz = c(min(pks[, "mzmin"]), + max(pks[, "mzmax"]))) + plot(3, 3, pch = NA, xlim = range(lapply(chrs, rtime), na.rm = TRUE), + ylim = range(lapply(chrs, intensity), na.rm = TRUE), xlab = "rt", + ylab = "int") + for (i in 1:length(chrs)) { + points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", + col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) + abline(v = pks[i, c("rtmin", "rtmax")], + col = ifelse(pks[i, "is_filled"], yes = "red", no = "black")) + } + } + + ## Check if the results are similar that we get with findChromPeaks + for (i in 1:length(fileNames(xod_xg))) { + fnd_pks <- chromPeaks(xod_xg)[chromPeaks(xod_xg)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData(filterFile(xod_xg, i), + peakArea = fnd_pks, + sample_idx = i, + cn = colnames(fnd_pks)) + ## rt + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + checkEquals(fnd_pks[, "mz"], fld_pks[, "mz"]) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## Check for the NAs if there is really no signal + gv <- featureValues(res) + feat_i <- which(is.na(gv[, 1])) + tmp <- chromPeaks(res)[featureDefinitions(res)$peakidx[[feat_i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + chr <- extractChromatograms(res, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(all(unlist(lapply(chr, function(z) is.na(intensity(z)))))) + ## Get also the spectra: + spctr <- spectra(filterRt(filterFile(xod_xg, file = 1), rt = pkArea[1:2])) + mzs <- unlist(lapply(spctr, mz)) + ## No spectra for the fiven mz: + checkEquals(sum(mzs >= pkArea[3] & mzs <= pkArea[4]), 0) + + ## Check increasing the expandRt and expandMz to see whether we get rid of + ## the NA. + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandMz = 1)) + ## Check if the mzrange is now indeed broader for the integrated ones. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + fp2 <- chromPeaks(res_2) + fp2 <- fp2[fp2[, "is_filled"] == 1, ] + checkEquals(fp2[, "mzmax"] - fp2[, "mzmin"], + 2 * (fp[, "mzmax"] - fp[, "mzmin"])) + + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(expandRt = 1)) + ## Check if the mzrange is now indeed broader for the integrated ones. + fp <- chromPeaks(res) + fp <- fp[fp[, "is_filled"] == 1, ] + fp2 <- chromPeaks(res_2) + fp2 <- fp2[fp2[, "is_filled"] == 1, ] + checkEquals(fp2[, "rtmax"] - fp2[, "rtmin"], + 2 * (fp[, "rtmax"] - fp[, "rtmin"])) + ## Check using ppm + res_2 <- fillChromPeaks(xod_xg, param = FillChromPeaksParam(ppm = 40, + expandMz = 5, + expandRt = 2)) + checkTrue(all(!is.na(rowSums(featureValues(res_2))))) + ## Drop them. + res_rem <- dropFilledChromPeaks(res) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkEquals(res_rem, xod_xg) + ## Drop feature definitions from res -> also filled peaks should be dropped. + res_rem <- dropFeatureDefinitions(res) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkTrue(!any(chromPeaks(res_rem)[, "is_filled"] == 1)) + checkEquals(res_rem, xod_x) + + ## With adjusted rtime. + res_2 <- fillChromPeaks(xod_xgrg) + ## Check if the signal corresponds to what we expect for some peaks. + fp <- chromPeaks(res_2) + fp <- fp[fp[, "is_filled"] == 1, ] + ## These have to be different from before! + fp_raw <- chromPeaks(res) + fp_raw <- fp_raw[fp_raw[, "is_filled"] == 1, ] + checkTrue(all(fp_raw[, "rt"] != fp[, "rt"])) + checkTrue(all(fp_raw[, "rtmin"] != fp[, "rtmin"])) + checkTrue(all(fp_raw[, "rtmax"] != fp[, "rtmax"])) + checkEquals(fp_raw[, "mz"], fp[, "mz"]) + checkEquals(fp_raw[, "mzmin"], fp[, "mzmin"]) + checkEquals(fp_raw[, "mzmax"], fp[, "mzmax"]) + ## Values are expected to be different, but still correlated! + checkTrue(all(fp_raw[, "into"] != fp[, "into"])) + checkTrue(cor(fp_raw[, "into"], fp[, "into"]) > 0.99) + ## Check if we can get the same data using the provided range. + ## Use the .rawMat function + first <- filterFile(xod_xgrg, file = 1, keepAdjustedRtime = TRUE) + spctr <- spectra(first) + mzs <- lapply(spctr, mz) + vps <- lengths(mzs) + ints <- unlist(lapply(spctr, intensity), use.names = FALSE) + mzs <- unlist(mzs, use.names = FALSE) + rtim <- rtime(first) + idx <- which(fp[, "sample"] == 1) + for (i in idx) { + mtx <- xcms:::.rawMat(mz = mzs, int = ints, scantime = rtim, + valsPerSpect = vps, + rtrange = fp[i, c("rtmin", "rtmax")], + mzrange = fp[i, c("mzmin", "mzmax")]) + into <- sum(mtx[, 3], na.rm = TRUE) * + ((fp[i, "rtmax"] - fp[i, "rtmin"]) / + (sum(rtim >= fp[i, "rtmin"] & rtim <= fp[i, "rtmax"]) - 1)) + checkEquals(unname(into), unname(fp[i, "into"])) + } + + ## Drop them. + res_rem <- dropFilledChromPeaks(res_2) + checkTrue(!xcms:::.hasFilledPeaks(res_rem)) + checkEquals(res_rem, xod_xgrg) + + ## ## Alternative without rawMat: + ## for (i in idx) { + ## chrs <- extractChromatograms(first, rt = fp[i, c("rtmin", "rtmax")], + ## mz = fp[i, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chrs), na.rm = TRUE) / (length(chrs) - 1) * + ## (fp[i, "rtmax"] - fp[i, "rtmin"]) + ## checkEquals(unname(into), unname(fp[i, "into"])) + ## } +} + +## fillChromPeaks for MSW peak detection. +test_fillChromPeaks_MSW <- function() { + p <- MzClustParam() + fticr_xodg <- groupChromPeaks(fticr_xod, param = p) + checkException(res <- fillChromPeaks(fticr_xod)) + res <- fillChromPeaks(fticr_xodg) + + ## Got a signal for all of em. + checkTrue(!any(is.na(featureValues(res)))) + ## 1) Compare with what I get for xcmsSet. + tmp_x <- fticr_xs + tmp_x <- group(tmp_x, method = "mzClust") + tmp_x <- fillPeaks(tmp_x, method = "MSW") + ## Compare + checkEquals(unname(groupval(tmp_x)), unname(featureValues(res))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(res, value = "maxo"))) + checkEquals(unname(groupval(tmp_x, value = "into")), + unname(featureValues(res, value = "into"))) + checkEquals(unname(groupval(tmp_x, value = "mz")), + unname(featureValues(res, value = "mz"))) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(res, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(res, value = "mzmax"))) + ## OK + ## 2) Check if the fillChromPeaks returns same/similar data than the + ## findChromPeaks does: + fdef <- featureDefinitions(fticr_xodg) + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(fticr_xodg)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(median(tmp[, 1]), median(tmp[, 2]), + median(tmp[, 3]), median(tmp[, 4])) + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = fdef$mzmed) + ## Get peak data for all peaks in the first file + allPks <- xcms:::.getMSWPeakData(filterFile(fticr_xodg, file = 1), + peakArea = pkArea, + sample_idx = 1, + cn = colnames(chromPeaks(fticr_xodg))) + curP <- chromPeaks(res)[chromPeaks(res)[, "sample"] == 1, ] + curP <- curP[order(curP[, "mz"]), ] + checkEquals(allPks[, "mz"], curP[, "mz"]) + checkEquals(allPks[, "maxo"], curP[, "maxo"]) + checkTrue(cor(allPks[, "into"], curP[, "into"]) > 0.99) ## Not exactly the + ## same but highly similar. +} + +test_fillChromPeaks_matchedFilter <- function() { + tmp <- findChromPeaks(faahko_od, param = MatchedFilterParam()) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + + tmp_filled <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_filled))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.97) + ## plot(featureValues(tmp, value = "into")[!nas, 1], + ## featureValues(tmp, value = "into")[!nas, 2]) + checkTrue(cor(featureValues(tmp_filled, value = "into")[, 1], + featureValues(tmp_filled, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## plot(featureValues(tmp_filled, value = "into")[, 1], + ## featureValues(tmp_filled, value = "into")[, 2]) + + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; most likely due to slightly different binning. + diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + checkTrue(max(diffs) < 1e-4) + ## into + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## modify fillChromPeaks settings. + tmp_fld_2 <- fillChromPeaks( + tmp, param = FillChromPeaksParam(ppm = 40, expandRt = 1)) + checkTrue(sum(is.na(featureValues(tmp_filled))) < + sum(is.na(featureValues(tmp)))) + checkTrue(sum(is.na(featureValues(tmp_fld_2))) < + sum(is.na(featureValues(tmp_filled)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp_fld_2, value = "into")[, 1], + featureValues(tmp_fld_2, value = "into")[, 2], + use = "complete.obs") > 0.97) +} + +dontrun_exhaustive_fillChromPeaks_matchedFilter <- function() { + ## Different step sizes. + prm <- MatchedFilterParam(binSize = 0.6) + tmp <- findChromPeaks(faahko_od, param = prm) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_fld))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.97) + checkTrue(cor(featureValues(tmp_fld, value = "into")[, 1], + featureValues(tmp_fld, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } + + ## Imputation. + prm <- MatchedFilterParam(binSize = 0.2, impute = "lin") + tmp <- findChromPeaks(faahko_od, param = prm) + tmp <- groupChromPeaks(tmp, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(tmp) + checkTrue(sum(is.na(featureValues(tmp_fld))) < + sum(is.na(featureValues(tmp)))) + nas <- is.na(featureValues(tmp)[, 1]) | is.na(featureValues(tmp)[, 2]) + checkTrue(cor(featureValues(tmp, value = "into")[!nas, 1], + featureValues(tmp, value = "into")[!nas, 2]) > 0.9) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(tmp))) { + fnd_pks <- chromPeaks(tmp)[chromPeaks(tmp)[, "sample"] == i, ] + prm <- processHistory(tmp, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(tmp, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + ## checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkTrue(cor(fnd_pks[, "maxo"], fld_pks[, "maxo"]) > 0.99) + } + + ## Own files. + fls <- c("/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_15.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_11.mzML") + raw <- readMSData2(fls) + pks <- findChromPeaks(raw, param = MatchedFilterParam(binSize = 0.05)) + pks <- groupChromPeaks(pks, param = PeakDensityParam()) + tmp_fld <- fillChromPeaks(pks) + nas <- is.na(featureValues(pks)[, 1]) | is.na(featureValues(pks)[, 2]) + checkTrue(cor(featureValues(pks, value = "into")[!nas, 1], + featureValues(pks, value = "into")[!nas, 2]) > 0.97) + checkTrue(cor(featureValues(tmp_fld, value = "into")[, 1], + featureValues(tmp_fld, value = "into")[, 2], + use = "complete.obs") > 0.97) + ## Check signal generation for already found peaks. + for (i in 1:length(fileNames(pks))) { + fnd_pks <- chromPeaks(pks)[chromPeaks(pks)[, "sample"] == i, ] + prm <- processHistory(pks, type ="Peak detection")[[1]]@param + ## Extract the data for these using the internal function. + fld_pks <- xcms:::.getChromPeakData_matchedFilter(filterFile(pks, i), + peakArea = fnd_pks, + sample_idx = i, + param = prm, + cn = colnames(fnd_pks)) + ## rt can not be the same, since for fillChromPeaks it is the rt of the + ## maximum signal and for findChromPeaks it is the rt of the apex of the + ## filtered/fitted peak. + checkTrue(cor(fnd_pks[, "rt"], fld_pks[, "rt"]) > 0.99) + ## mz: also not the same; in here we're weighting by sum of intensities + ## per mz and in findChromPeaks by individual intensities. + ## diffs <- fnd_pks[, "mz"] - fld_pks[, "mz"] + ## checkTrue(max(diffs) < 1e-4) + checkTrue(cor(fnd_pks[, "mz"], fld_pks[, "mz"]) > 0.99) + ## into + checkTrue(cor(fnd_pks[, "into"], fld_pks[, "into"]) > 0.99) + checkEquals(fnd_pks[, "into"], fld_pks[, "into"]) + ## maxo + checkTrue(cor(fnd_pks[, "maxo"], fld_pks[, "maxo"]) > 0.99) + checkEquals(fnd_pks[, "maxo"], fld_pks[, "maxo"]) + } +} + +dontrun_exhaustive_fillChromPeaks_MSW <- function() { + library(xcms) + library(RUnit) + library(msdata) + fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) + fticr <- readMSData2(fticrf, msLevel. = 1) + p <- MSWParam(scales = c(1, 7), ampTh = 0.005, + SNR.method = "data.mean", winSize.noise = 500) + fticr <- findChromPeaks(fticr, param = p) + ## Now create the MzClustParam parameter object: we're assuming here that + ## both samples are from the same sample group. + p <- MzClustParam() + fticr <- groupChromPeaks(fticr, param = p) + res <- fillChromPeaks(fticr) + ## Got a signal for all of em. + checkTrue(!any(is.na(featureValues(res)))) + + ## 1) Compare with what I get for xcmsSet. + tmp_x <- xcmsSet(fticrf, method = "MSW", SNR.method = "data.mean", + winSize.noise = 500, scales = c(1, 7), + amp.Th = 0.005) + sampclass(tmp_x) <- rep(1, length(sampnames(tmp_x))) + tmp_x <- group(tmp_x, method = "mzClust") + checkEquals(unname(groupval(tmp_x)), unname(featureValues(fticr))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(fticr, value = "maxo"))) + checkEquals(unname(groupval(tmp_x, value = "into")), + unname(featureValues(fticr, value = "into"))) + checkEquals(unname(groupval(tmp_x, value = "mz")), + unname(featureValues(fticr, value = "mz"))) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(fticr, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(fticr, value = "mzmax"))) + ## Fill peaks + tmp_x <- fillPeaks(tmp_x, method = "MSW") + checkTrue(!any(is.na(groupval(tmp_x)))) + checkEquals(unname(groupval(tmp_x)), unname(featureValues(res))) + checkEquals(unname(groupval(tmp_x, value = "maxo")), + unname(featureValues(res, value = "maxo"))) + ## This below could be made equal if we used the same approach to define + ## which values to use. See comments on issue #130. + ## checkEquals(unname(groupval(tmp_x, value = "into")), + ## unname(featureValues(res, value = "into"))) + ## plot(groupval(tmp_x, value = "into"), featureValues(res, value = "into")) + checkTrue(cor(as.numeric(groupval(tmp_x, value = "into")), + as.numeric(featureValues(res, value = "into"))) > 0.999) + ## plot(groupval(tmp_x, value = "mz"), featureValues(res, value = "mz")) + checkTrue(cor(as.numeric(groupval(tmp_x, value = "mz")), + as.numeric(featureValues(res, value = "mz"))) > 0.999) + checkEquals(unname(groupval(tmp_x, value = "mzmin")), + unname(featureValues(res, value = "mzmin"))) + checkEquals(unname(groupval(tmp_x, value = "mzmax")), + unname(featureValues(res, value = "mzmax"))) + + ## Check if I could get what MSW gets. + fdef <- featureDefinitions(fticr) + pkArea <- do.call( + rbind, + lapply( + fdef$peakidx, function(z) { + tmp <- chromPeaks(fticr)[z, c("rtmin", "rtmax", + "mzmin", "mzmax"), + drop = FALSE] + pa <- c(median(tmp[, 1]), median(tmp[, 2]), + median(tmp[, 3]), median(tmp[, 4])) + return(pa) + } + )) + colnames(pkArea) <- c("rtmin", "rtmax", "mzmin", "mzmax") + pkArea <- cbind(group_idx = 1:nrow(pkArea), pkArea, + mzmed = fdef$mzmed) + allPks <- xcms:::.getMSWPeakData(filterFile(fticr, file = 1), + peakArea = pkArea, + sample_idx = 1, + cn = colnames(chromPeaks(fticr))) + ## Get all chrom peaks part of a feature + curP <- chromPeaks(res)[unique(unlist(featureDefinitions(res)$peakidx)), ] + curP <- curP[curP[, "sample"] == 1, ] + curP <- curP[order(curP[, "mz"]), ] + ## checkEquals(allPks[, "mz"], curP[, "mz"]) + ## checkEquals(allPks[, "maxo"], curP[, "maxo"]) + + ## ## INVESTIGATE FURTHER! + fld <- curP[, "is_filled"] == 1 + plot(allPks[fld, "mz"], curP[fld, "mz"], main = "filled") + abline(0, 1, col = "grey") ## same + checkEquals(allPks[fld, "mz"], curP[fld, "mz"]) + plot(allPks[!fld, "mz"], curP[!fld, "mz"], main = "not filled") + abline(0, 1, col = "grey") ## highly similar!!! + ## checkEquals(allPks[!fld, "mz"], curP[!fld, "mz"]) ## HIGHLY similar though + ## into: + plot(allPks[fld, "into"], curP[fld, "into"], main = "filled") + checkEquals(allPks[fld, "into"], curP[fld, "into"]) + abline(0, 1, col = "grey") ## same + plot(allPks[!fld, "into"], curP[!fld, "into"], main = "not filled") + abline(0, 1, col = "grey") ## somewhat different!!! + + plot(allPks[fld, "maxo"], curP[fld, "maxo"], main = "filled") + checkEquals(allPks[fld, "maxo"], curP[fld, "maxo"]) + abline(0, 1, col = "grey") ## same + plot(allPks[!fld, "maxo"], curP[!fld, "maxo"], main = "not filled") + abline(0, 1, col = "grey") ## somewhat different!!! +} + + +dontrun_exhaustive_fillChromPeaks_test <- function() { + fls <- c("/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_15.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML", + "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_11.mzML") + raw <- readMSData2(fls) + pks <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(0.8, 20), + ppm = 40)) + pks_noRt <- groupChromPeaks(pks, param = PeakDensityParam(minFraction = 0.6)) + filled <- fillChromPeaks(pks_noRt) + fp <- chromPeaks(filled) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 20) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(pks_noRt, file = cfp[1, "sample"], + keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chr), na.rm = TRUE) * + ## (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / + ## (sum(rtime(tmp) >= cfp[1, "rtmin"] & rtime(tmp) <= cfp[1, "rtmax"]) - 1) + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Check those with an NA. + gv <- featureValues(filled) + with_na <- is.na(rowSums(gv)) + idxs <- sample(which(with_na), 20) + for (i in idxs) { + tmp <- chromPeaks(pks_noRt)[featureDefinitions(pks_noRt)$peakidx[[i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + smpl <- which(is.na(gv[i, ])) + tmp <- filterFile(pks_noRt, file = smpl, keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(length(chr) == 0) + } + still_missing <- is.na(rowSums(featureValues(filled))) + ## Try using ppm: + filled <- fillChromPeaks(pks_noRt, param = FillChromPeaksParam(ppm = 40)) + checkTrue(sum(still_missing) > sum(is.na(rowSums(featureValues(filled))))) + filled <- fillChromPeaks(pks_noRt, param = FillChromPeaksParam(ppm = 40, + expandMz = 2)) + checkTrue(sum(still_missing) > sum(is.na(rowSums(featureValues(filled))))) + ## Check that the mz and rt are all within the mzmin-mzmax and rtmin-rtmax + ## of the features. + fts <- featureDefinitions(filled) + for (i in 1:nrow(fts)) { + pks <- chromPeaks(filled)[fts[i, "peakidx"][[1]], ] + checkTrue(all(pks[, "mz"] >= fts[i, "mzmin"] & + pks[, "mz"] <= fts[i, "mzmax"])) + checkTrue(all(pks[, "rt"] >= fts[i, "rtmin"] & + pks[, "rt"] <= fts[i, "rtmax"])) + } + checkTrue(all(chromPeaks(filled)[, "rt"] >= chromPeaks(filled)[, "rtmin"] & + chromPeaks(filled)[, "rt"] <= chromPeaks(filled)[, "rtmax"])) + to_test <- chromPeaks(filled)[, "mz"] >= chromPeaks(filled)[, "mzmin"] & + chromPeaks(filled)[, "mz"] <= chromPeaks(filled)[, "mzmax"] + chromPeaks(filled)[!to_test, ] + ## checkTrue(all(to_test)) + + ## With adjusted retention times. + pks <- adjustRtime(pks, param = ObiwarpParam()) + pks <- groupChromPeaks(pks, param = PeakDensityParam(minFraction = 0.6)) + filled <- fillChromPeaks(pks) + ## + fp <- chromPeaks(filled) + fp <- fp[fp[, "is_filled"] == 1, ] + idxs <- sample(1:nrow(fp), 20) + for (i in idxs) { + cfp <- fp[i, , drop = FALSE] + tmp <- filterFile(pks, file = cfp[1, "sample"], keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = cfp[1, c("rtmin", "rtmax")], + mz = cfp[1, c("mzmin", "mzmax")])[[1]] + ## into <- sum(intensity(chr), na.rm = TRUE) * + ## (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / + ## (sum(rtime(tmp) >= cfp[1, "rtmin"] & rtime(tmp) <= cfp[1, "rtmax"]) - 1) + into <- sum(intensity(chr), na.rm = TRUE) * + (cfp[1, "rtmax"] - cfp[1, "rtmin"]) / (length(chr) - 1) + checkEquals(unname(into), unname(cfp[1, "into"])) + } + ## Check those with an NA. + gv <- featureValues(filled) + with_na <- is.na(rowSums(gv)) + idxs <- sample(which(with_na), 20) + for (i in idxs) { + tmp <- chromPeaks(pks)[featureDefinitions(pks)$peakidx[[i]], + c("rtmin", "rtmax", "mzmin", "mzmax")] + ## Get the intensities for the first one. + pkArea <- apply(tmp, median, MARGIN = 2) + smpl <- which(is.na(gv[i, ])) + tmp <- filterFile(pks, file = smpl, keepAdjustedRtime = TRUE) + chr <- extractChromatograms(tmp, rt = pkArea[1:2], mz = pkArea[3:4]) + checkTrue(length(chr) == 0) + } + + ## what is the mz for those with NA against those without? + boxplot(list(filled = featureDefinitions(filled)[!with_na, "mzmed"], + failed = featureDefinitions(filled)[with_na, "mzmed"]), + varwidth = TRUE) + ## They are about the same, no difference. + + ## what is the intensity for those with NA against those without? + maxo <- rowMeans(featureValues(filled, value = "maxo"), na.rm = TRUE) + boxplot(list(filled = log2(maxo[!with_na]), failed = log2(maxo[with_na])), + varwidth = TRUE) + ## No difference. +} + +dontrun_test_getPeakInt_functions <- function() { + ## Testing whether the .getPeakInt functions are correct and which one is + ## more performant. + tmp <- filterFile(xod_xgrg, file = 3) + pkInt <- xcms:::.getPeakInt(tmp, chromPeaks(tmp)) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt, pkInt2) + checkEquals(pkInt, pkInt3) + checkEquals(pkInt, chromPeaks(tmp)[, "into"]) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + library(microbenchmark) + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1, , drop = FALSE]), + times = 10) + ## 258 ms vs 503 ms vs 499 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1:5, ]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1:5, ]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1:5, ]), + times = 10) + ## 1269 ms vs 586 ms vs 587 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)[1:10, ]), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)[1:10, ]), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)[1:10, ]), + times = 10) + ## 2577 ms vs 594 ms vs 562 ms + microbenchmark(xcms:::.getPeakInt(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt3(tmp, chromPeaks(tmp)), + times = 10) + ## 16447 ms vs 676 ms vs 556 ms + ## Well. getPeakInt2 and getPeakInt3 are considerably faster! +} + +## This provides some extensive tests. +dontrun_getPeakInt_validity <- function() { + ## Do extensive tests on the .getPeakInt3 function to ensure it is really + ## returning what it should. + ## faahKO centWave peaks + tmp <- filterFile(xod_xgrg, file = 2) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + + ## faahKO matchedFilter peaks + tmp <- findChromPeaks(od_x, param = MatchedFilterParam()) + tmp <- filterFile(tmp, file = 1) + pkInt2 <- xcms:::.getPeakInt2(tmp, chromPeaks(tmp)) + checkEquals(pkInt2, chromPeaks(tmp)[, "into"]) + + ## own file centWave peaks + fl <- "/Users/jo/data/2016/2016-11/NoSN/190516_POOL_N_POS_19.mzML" + raw_x <- readMSData2(fl) + ## Default centWave - completely off. + tmp <- findChromPeaks(raw_x, param = CentWaveParam(verboseColumns = TRUE)) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + ## With OK settings + options(originalCentWave = FALSE) + tmp <- findChromPeaks(raw_x, param = CentWaveParam(peakwidth = c(0.8, 40))) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + library(microbenchmark) + microbenchmark(xcms:::.getPeakInt3(tmp, chromPeaks(tmp)), + xcms:::.getPeakInt2(tmp, chromPeaks(tmp)), times = 10) + ## + + ## Reproduce with msdata files: + fl <- system.file("microtofq/MM14.mzML", package = "msdata") + raw <- readMSData2(fl) + tmp <- findChromPeaks(raw, param = CentWaveParam(peakwidth = c(1, 20))) + pkInt3 <- xcms:::.getPeakInt3(tmp, chromPeaks(tmp)) + checkEquals(pkInt3, chromPeaks(tmp)[, "into"]) + + ## own file matchedFilter peaks + ## TODO: continue here - but be aware, for MatchedFilterParam we might have + ## to do the stuff on the profile matrix! + tmp <- findChromPeaks(raw_x, param = MatchedFilterParam()) +} diff --git a/inst/unitTests/runit.fillPeaks.R b/inst/unitTests/runit.fillPeaks.R index 7d574b2b8..2c14b01bf 100644 --- a/inst/unitTests/runit.fillPeaks.R +++ b/inst/unitTests/runit.fillPeaks.R @@ -42,6 +42,87 @@ test.fillPeaksColumns <- function() { } +test.getPeaks_implementation <- function() { + ## Compare the old and new getPeaks implementations. + xs_m <- xcmsSet(faahko_3_files[1]) + + pks_range <- peaks(xs_m)[1:200, ] + ## Extend the range + pks_range[, "mzmin"] <- pks_range[, "mzmin"] - 0.05 + pks_range[, "mzmax"] <- pks_range[, "mzmax"] + 0.05 + suppressWarnings( + pks_o <- xcms:::.getPeaks_orig(faahko_xr_1, peakrange = pks_range) + ) + pks_n <- xcms:::.getPeaks_new(faahko_xr_1, peakrange = pks_range) + checkEquals(pks_o, pks_n) + + pks_tmp <- pks_o + ## Force it to use different step. + suppressWarnings( + pks_o <- xcms:::.getPeaks_orig(faahko_xr_1, peakrange = pks_range, step = 0.3) + ) + pks_n <- xcms:::.getPeaks_new(faahko_xr_1, peakrange = pks_range, step = 0.3) + checkEquals(pks_o, pks_n) + checkTrue(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) + + ## Change profile generation settings. + tmp <- deepCopy(faahko_xr_1) + tmp@profmethod <- "binlin" + suppressWarnings( + pks_o <- xcms:::.getPeaks_orig(tmp, peakrange = pks_range, step = 0.2) + ) + pks_n <- xcms:::.getPeaks_new(tmp, peakrange = pks_range, step = 0.2) + ## Can not expect identical values because of differences in binlin + ## See issues #46 and #49. + checkTrue(cor(pks_o[, "into"], pks_n[, "into"]) > 0.999) + checkTrue(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) + pks_tmp <- pks_o + + ## Change profile generation settings. + tmp@profmethod <- "binlinbase" + suppressWarnings( + pks_o <- xcms:::.getPeaks_orig(tmp, peakrange = pks_range, step = 0.2) + ) + pks_n <- xcms:::.getPeaks_new(tmp, peakrange = pks_range, step = 0.2) + checkEquals(pks_o, pks_n) + checkTrue(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) + pks_tmp <- pks_o + + tmp@profmethod <- "intlin" + suppressWarnings( + pks_o <- xcms:::.getPeaks_orig(tmp, peakrange = pks_range, step = 0.2) + ) + pks_n <- xcms:::.getPeaks_new(tmp, peakrange = pks_range, step = 0.2) + checkEquals(pks_o, pks_n) + checkTrue(sum(pks_o[, "into"] != pks_tmp[, "into"]) > 0) + } + +## Compare the results we get when running the old and new fillPeaks. +test.fillPeaks_old_vs_new <- function() { + xsg <- group(faahko, minfrac = 1) + + register(SerialParam()) + res_n <- fillPeaks(xsg) + useOriginalCode(TRUE) + res_o <- fillPeaks(xsg) + useOriginalCode(FALSE) + pks_n <- peaks(res_n)[res_n@filled, ] + pks_o <- peaks(res_o)[res_o@filled, ] + checkTrue(cor(pks_o[pks_o[, "sample"] == 7, "into"], + pks_n[pks_n[, "sample"] == 7, "into"]) > 0.999) + ## plot(pks_n[, "into"], pks_o[, "into"]) + + profinfo(xsg) <- list(method = "binlin", step = 0.2) + res_n <- fillPeaks(xsg) + useOriginalCode(TRUE) + res_o <- fillPeaks(xsg) + useOriginalCode(FALSE) + pks_n <- peaks(res_n)[res_n@filled, ] + pks_o <- peaks(res_o)[res_o@filled, ] + checkTrue(cor(pks_o[pks_o[, "sample"] == 7, "into"], + pks_n[pks_n[, "sample"] == 7, "into"]) > 0.999) +} + ## testFilledFlagMSW <- function() { diff --git a/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R b/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R index b141062c6..9edc00084 100755 --- a/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R +++ b/inst/unitTests/runit.findPeaksCentWaveWithIsotopeROIs.R @@ -5,7 +5,19 @@ test.addPredictedIsotopeFeatures <- function() { p1 <- findPeaks.centWave(xr, verbose.columns = TRUE, noise = 10000) p2 <- findPeaks.addPredictedIsotopeFeatures( - object = xr, xcmsPeaks = p1, noise = 10000 ) + object = xr, xcmsPeaks = p1, noise = 10000) checkTrue(nrow(p1) < nrow(p2)) + ## Now the same with the new modified centWave: + options(originalCentWave = FALSE) + p1_2 <- findPeaks.centWave(xr, verbose.columns = TRUE, noise = 10000) + nrow(p1) + nrow(p1_2) + checkEquals(p1, p1_2) + + p2_2 <- findPeaks.addPredictedIsotopeFeatures( + object = xr, xcmsPeaks = p1_2, noise = 10000) + + options(originalCentWave = TRUE) + } diff --git a/inst/unitTests/runit.functions-XCMSnExp.R b/inst/unitTests/runit.functions-XCMSnExp.R new file mode 100644 index 000000000..a152a9de8 --- /dev/null +++ b/inst/unitTests/runit.functions-XCMSnExp.R @@ -0,0 +1,12 @@ +## Unit tests for functions in functions-XCMSnExp.R + +test_plotChromPeakDensity <- function() { + mzr <- c(305.05, 305.15) + plotChromPeakDensity(xod_x, mz = mzr) + + ## Use the full range. + plotChromPeakDensity(xod_x) + + plotChromPeakDensity(xod_x, mz = c(0, 1)) + plotChromPeakDensity(xod_x, mz = c(300, 310), pch = 16, xlim = c(2500, 4000)) +} diff --git a/inst/unitTests/runit.functions-utils.R b/inst/unitTests/runit.functions-utils.R new file mode 100644 index 000000000..9f4184e99 --- /dev/null +++ b/inst/unitTests/runit.functions-utils.R @@ -0,0 +1,53 @@ +library(xcms) +library(RUnit) +## Test the .grow_trues +test_grow_trues <- function() { + ## Compare performance with MSnbase:::utils.clean + Test <- c(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, + 1, 0) + Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, + FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + TRUE, TRUE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0) + Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, + TRUE, FALSE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 1, NA, 0, 0, 1) + Expect <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, NA, 1, 0, 0, 1, 0, 0) + Expect <- c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(0, 1, 0, 0, NA, 0, 1) + Expect <- c(TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) + + Test <- c(NA, 1, NA, NA, NA, NA, 1) + Expect <- c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE) + res_2 <- xcms:::.grow_trues(Test > 0) + checkEquals(res_2, Expect) +} + +benchmark_grow_trues <- function() { + set.seed(123) + Test <- rnorm(n = 30000) + Test[Test < 0] <- 0 + Test2 <- Test > 0 + res_1 <- MSnbase:::utils.clean(Test) + res_2 <- .clean(Test2) + + Test <- c(0, 0, 1, 1, 0, 0, 0, 1, 0, 0) + Expect <- c(FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE) + res_1 <- MSnbase:::utils.clean(Test) + +} diff --git a/inst/unitTests/runit.new_read_data.R b/inst/unitTests/runit.new_read_data.R index e47b75530..2f370efa5 100644 --- a/inst/unitTests/runit.new_read_data.R +++ b/inst/unitTests/runit.new_read_data.R @@ -56,7 +56,7 @@ test_evaluate_xcmsSource <- function() { library(msdata) mz_file <- system.file("microtofq/MM8.mzML", package = "msdata") src <- xcms:::xcmsSource(mz_file) - checkTrue(is(src, "rampSource")) + checkTrue(is(src, "pwizSource")) tmp <- loadRaw(src) checkEquals(names(tmp), c("rt", "acquisitionNum", "tic", "scanindex", "mz", "intensity", "polarity")) @@ -81,18 +81,22 @@ test_evaluate_xcmsSource <- function() { rawdata$MSn <- mzR:::rampRawDataMSn(rid) mzR:::rampClose(rid) rm(rid) + ## Ramp does not read polarity! + tmp$polarity <- rawdata$polarity checkEquals(rawdata, tmp) ## Next example: msnfile <- system.file("microtofq/MSMSpos20_6.mzML", package = "msdata") src <- xcms:::xcmsSource(msnfile) tmp <- loadRaw(src, includeMSn = TRUE) + ## checkTrue(all(tmp$polarity == 1)) ## OLD code: rid <- mzR:::rampOpen(msnfile) rawdata <- mzR:::rampRawData(rid) rawdata$MSn <- mzR:::rampRawDataMSn(rid) mzR:::rampClose(rid) rm(rid) + rawdata$polarity <- tmp$polarity checkEquals(rawdata, tmp) } diff --git a/inst/unitTests/runit.new_scanrange_subsetting.R b/inst/unitTests/runit.new_scanrange_subsetting.R index bf5c5c2ef..f1184d3d2 100644 --- a/inst/unitTests/runit.new_scanrange_subsetting.R +++ b/inst/unitTests/runit.new_scanrange_subsetting.R @@ -22,7 +22,7 @@ test_scanrange_centWave <- function() { ## Compare with do_ xsub <- xraw[90:345] - res_3 <- do_detectFeatures_centWave(mz = xsub@env$mz, + res_3 <- do_findChromPeaks_centWave(mz = xsub@env$mz, int = xsub@env$intensity, scantime = xsub@scantime, noise = 5000, diff --git a/inst/unitTests/runit.processHistory.R b/inst/unitTests/runit.processHistory.R index c5c228d12..c3f109c8b 100644 --- a/inst/unitTests/runit.processHistory.R +++ b/inst/unitTests/runit.processHistory.R @@ -86,15 +86,15 @@ test_XProcessHistory_class <- function() { checkTrue(inherits(ph, "ProcessHistory")) ph <- xcms:::XProcessHistory(info = "some info", - type = xcms:::.PROCSTEP.FEATURE.DETECTION) + type = xcms:::.PROCSTEP.PEAK.DETECTION) checkEquals(ph@info, "some info") - checkEquals(ph@type, xcms:::.PROCSTEP.FEATURE.DETECTION) + checkEquals(ph@type, xcms:::.PROCSTEP.PEAK.DETECTION) ph@type <- "other" checkException(validObject(ph)) ph <- xcms:::XProcessHistory(info = "some info", - type = xcms:::.PROCSTEP.FEATURE.DETECTION, + type = xcms:::.PROCSTEP.PEAK.DETECTION, param = CentWaveParam()) checkTrue(is(ph@param, "CentWaveParam")) diff --git a/inst/unitTests/runit.splitCombine.R b/inst/unitTests/runit.splitCombine.R index 36e346351..bed18f255 100644 --- a/inst/unitTests/runit.splitCombine.R +++ b/inst/unitTests/runit.splitCombine.R @@ -162,6 +162,18 @@ testSplitPhenoData <- function(){ checkEquals(xset@polarity, xsetList[[1]]@polarity) } +## Issue #133 +test_c_empty <- function() { + library(msdata) + suppressWarnings( + xs <- xcmsSet(system.file("microtofq/MM8.mzML", package="msdata"), + method="centWave", ppm=25, peakwidth=c(20, 50)) + ) + xs2 <- xcmsSet(system.file("microtofq/MM14.mzML", package="msdata"), + method="centWave", ppm=25, peakwidth=c(20, 50)) + comb <- c(xs, xs2) + checkTrue(nrow(peaks(comb)) == 0) +} testSubset <- function(){ ## first testing just the plain xset diff --git a/inst/unitTests/runit.useOriginalCode.R b/inst/unitTests/runit.useOriginalCode.R index 33adb0a02..ba166953e 100644 --- a/inst/unitTests/runit.useOriginalCode.R +++ b/inst/unitTests/runit.useOriginalCode.R @@ -29,12 +29,12 @@ dontrun_test_matchedFilter_orig_code <- function() { step <- 0.2 orig <- useOriginalCode() - res_new <- xcms:::do_detectFeatures_matchedFilter(mz, int, + res_new <- xcms:::do_findChromPeaks_matchedFilter(mz, int, scantime, valsPerSpect, binSize = step) useOriginalCode(TRUE) - res_old <- xcms:::do_detectFeatures_matchedFilter(mz, int, + res_old <- xcms:::do_findChromPeaks_matchedFilter(mz, int, scantime, valsPerSpect, binSize = step) diff --git a/man/Chromatogram-class.Rd b/man/Chromatogram-class.Rd new file mode 100644 index 000000000..51ce6a657 --- /dev/null +++ b/man/Chromatogram-class.Rd @@ -0,0 +1,211 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Chromatogram.R, +% R/methods-Chromatogram.R +\docType{class} +\name{Chromatogram-class} +\alias{Chromatogram-class} +\alias{Chromatogram} +\alias{show,Chromatogram-method} +\alias{rtime,Chromatogram-method} +\alias{intensity,Chromatogram-method} +\alias{mz,Chromatogram-method} +\alias{precursorMz,Chromatogram-method} +\alias{productMz,Chromatogram-method} +\alias{productMz} +\alias{aggregationFun,Chromatogram-method} +\alias{aggregationFun} +\alias{fromFile,Chromatogram-method} +\alias{length,Chromatogram-method} +\alias{as.data.frame,Chromatogram-method} +\alias{filterRt,Chromatogram-method} +\alias{clean,Chromatogram-method} +\title{Representation of chromatographic MS data} +\usage{ +Chromatogram(rtime = numeric(), intensity = numeric(), mz = c(NA_real_, + NA_real_), filterMz = c(NA_real_, NA_real_), precursorMz = c(NA_real_, + NA_real_), productMz = c(NA_real_, NA_real_), fromFile = integer(), + aggregationFun = character()) + +\S4method{show}{Chromatogram}(object) + +\S4method{rtime}{Chromatogram}(object) + +\S4method{intensity}{Chromatogram}(object) + +\S4method{mz}{Chromatogram}(object, filter = FALSE) + +\S4method{precursorMz}{Chromatogram}(object) + +\S4method{productMz}{Chromatogram}(object) + +\S4method{aggregationFun}{Chromatogram}(object) + +\S4method{fromFile}{Chromatogram}(object) + +\S4method{length}{Chromatogram}(x) + +\S4method{as.data.frame}{Chromatogram}(x) + +\S4method{filterRt}{Chromatogram}(object, rt) + +\S4method{clean}{Chromatogram}(object, all = FALSE) +} +\arguments{ +\item{rtime}{\code{numeric} with the retention times (length has to be equal +to the length of \code{intensity}).} + +\item{intensity}{\code{numeric} with the intensity values (length has to be +equal to the length of \code{rtime}).} + +\item{mz}{\code{numeric(2)} representing the mz value range (min, max) +on which the chromatogram was created. This is supposed to contain the +\emph{real} range of mz values in contrast to the \code{filterMz} below. +If not applicable use \code{mzrange = c(0, 0)}.} + +\item{filterMz}{\code{numeric(2)} representing the mz value range (min, +max) that was used to filter the original object on mz dimension. If not +applicable use \code{filterMz = c(0, 0)}.} + +\item{precursorMz}{\code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the precursor ion. See details for more information.} + +\item{productMz}{\code{numeric(2)} for SRM/MRM transitions. +Represents the mz of the product. See details for more information.} + +\item{fromFile}{\code{integer(1)} the index of the file within the +\code{\link{OnDiskMSnExp}} or \code{\link{XCMSnExp}} from which the +chromatogram was extracted.} + +\item{aggregationFun}{\code{character} string specifying the function that +was used to aggregate intensity values for the same retention time across +the mz range. Supported are \code{"sum"} (total ion chromatogram), +\code{"max"} (base peak chromatogram), \code{"min"} and \code{"mean"}.} + +\item{object}{A \code{Chromatogram} object.} + +\item{filter}{For \code{mz}: whether the mz range used to filter the +original object should be returned (\code{filter = TRUE}), or the mz +range calculated on the real data (\code{filter = FALSE}).} + +\item{x}{For \code{as.data.frame} and \code{length}: a \code{Chromatogram} +object.} + +\item{rt}{For \code{filterRt}: \code{numeric(2)} defining the lower and +upper retention time for the filtering.} + +\item{all}{For \code{clean}: \code{logical(1)} whether all \code{0} intensity +value pairs should be removed (defaults to \code{FALSE}).} +} +\value{ +For \code{clean}: a \emph{cleaned} \code{Chromatogram} object. +} +\description{ +The \code{Chromatogram} class is designed to store + chromatographic MS data, i.e. pairs of retention time and intensity + values. Instances of the class can be created with the + \code{Chromatogram} constructor function but in most cases the dedicated + methods for \code{\link{OnDiskMSnExp}} and \code{\link{XCMSnExp}} + objects extracting chromatograms should be used instead (i.e. the + \code{\link{extractChromatograms}}). + +\code{Chromatogram}: create an instance of the + \code{Chromatogram} class. + +\code{rtime} returns the retention times for the rentention time + - intensity pairs stored in the chromatogram. + +\code{intensity} returns the intensity for the rentention time + - intensity pairs stored in the chromatogram. + +\code{mz} get the mz (range) of the chromatogram. The + function returns a \code{numeric(2)} with the lower and upper mz value. + +\code{precursorMz} get the mz of the precursor ion. The + function returns a \code{numeric(2)} with the lower and upper mz value. + +\code{productMz} get the mz of the product chromatogram/ion. The + function returns a \code{numeric(2)} with the lower and upper mz value. + +\code{aggregationFun,aggregationFun<-} get or set the + aggregation function. + +\code{fromFile} returns the value from the \code{fromFile} slot. + +\code{length} returns the length (number of retention time - + intensity pairs) of the chromatogram. + +\code{as.data.frame} returns the \code{rtime} and + \code{intensity} values from the object as \code{data.frame}. + +\code{filterRt}: filters the chromatogram based on the provided + retention time range. + +\code{clean}: \emph{cleans} a \code{Chromatogram} class by + removing all \code{0} and \code{NA} intensity signals (along with the + associates retention times). By default (if \code{all = FALSE}) \code{0} + values that are directly adjacent to peaks are kept too. \code{NA} + values are always removed. +} +\details{ +The \code{mz}, \code{filterMz}, \code{precursorMz} and + \code{productMz} are stored as a \code{numeric(2)} representing a range + even if the chromatogram was generated for only a single ion (i.e. a + single mz value). Using ranges for \code{mz} values allow this class to + be used also for e.g. total ion chromatograms or base peak chromatograms. + + The slots \code{precursorMz} and \code{productMz} allow to represent SRM + (single reaction monitoring) and MRM (multiple SRM) chromatograms. As + example, a \code{Chromatogram} for a SRM transition 273 -> 153 will have + a \code{@precursorMz = c(273, 273)} and a + \code{@productMz = c(153, 153)}. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,rtime,intensity,mz,filterMz,precursorMz,productMz,fromFile,aggregationFun}}{See corresponding parameter above.} +}} + +\examples{ + +## Create a simple Chromatogram object based on random values. +chr <- Chromatogram(intensity = abs(rnorm(1000, mean = 2000, sd = 200)), + rtime = sort(abs(rnorm(1000, mean = 10, sd = 5)))) +chr + +## Get the intensities +head(intensity(chr)) + +## Get the retention time +head(rtime(chr)) + +## What is the retention time range of the object? +range(rtime(chr)) + +## Filter the chromatogram to keep only values between 4 and 10 seconds +chr2 <- filterRt(chr, rt = c(4, 10)) + +range(rtime(chr2)) + +## Create a simple Chromatogram object + +chr <- Chromatogram(rtime = 1:12, + intensity = c(0, 0, 20, 0, 0, 0, 123, 124343, 3432, 0, 0, 0)) + +## Remove 0-intensity values keeping those adjacent to peaks +chr <- clean(chr) +intensity(chr) + +## Remove all 0-intensity values +chr <- clean(chr, all = TRUE) +intensity(chr) +} +\seealso{ +\code{\link{extractChromatograms}} for the method to extract + \code{Chromatogram} objects from \code{\link{XCMSnExp}} or + \code{\link[MSnbase]{OnDiskMSnExp}} objects. + + \code{\link{plotChromatogram}} to plot \code{Chromatogram} objects. +} +\author{ +Johannes Rainer +} diff --git a/man/GenericParam.Rd b/man/GenericParam.Rd new file mode 100644 index 000000000..284b9e571 --- /dev/null +++ b/man/GenericParam.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-Params.R +\docType{class} +\name{GenericParam-class} +\alias{GenericParam-class} +\alias{GenericParam} +\alias{GenericParam} +\alias{show,GenericParam-method} +\title{Generic parameter class} +\usage{ +GenericParam(fun = character(), args = list()) + +\S4method{show}{GenericParam}(object) +} +\arguments{ +\item{fun}{\code{character} representing the name of the function.} + +\item{args}{\code{list} (ideally named) with the arguments to the function.} + +\item{object}{\code{GenericParam} object.} +} +\value{ +The \code{GenericParam} function returns a \code{GenericParam} object. +} +\description{ +The \code{GenericParam} class allows to store generic parameter + information such as the name of the function that was/has to be called + (slot \code{fun}) and its arguments (slot \code{args}). This object is + used to track the process history of the data processings of an + \code{\link{XCMSnExp}} object. This is in contrast to e.g. the + \code{\link{CentWaveParam}} object that is passed to the actual + processing method. +} +\section{Slots}{ + +\describe{ +\item{\code{fun}}{\code{character} specifying the function name.} + +\item{\code{args}}{\code{list} (ideally named) with the arguments to the +function.} + +\item{\code{.__classVersion__}}{the version of the class.} +}} + +\examples{ +prm <- GenericParam(fun = "mean") + +prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE)) +} +\seealso{ +\code{\link{processHistory}} for how to access the process history + of an \code{\link{XCMSnExp}} object. +} +\author{ +Johannes Rainer +} diff --git a/man/ProcessHistory-class.Rd b/man/ProcessHistory-class.Rd index 607d5e3d0..29a55dd37 100644 --- a/man/ProcessHistory-class.Rd +++ b/man/ProcessHistory-class.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/DataClasses.R, R/methods-ProcessHistory.R \docType{class} \name{ProcessHistory-class} -\alias{ProcessHistory} \alias{ProcessHistory-class} -\alias{XProcessHistory} +\alias{ProcessHistory} \alias{XProcessHistory-class} -\alias{fileIndex} -\alias{fileIndex,ProcessHistory-method} -\alias{processDate} -\alias{processDate,ProcessHistory-method} -\alias{processInfo} -\alias{processInfo,ProcessHistory-method} -\alias{processParam} -\alias{processParam,XProcessHistory-method} -\alias{processType} -\alias{processType,ProcessHistory-method} +\alias{XProcessHistory} \alias{show,ProcessHistory-method} \alias{show,XProcessHistory-method} +\alias{processParam,XProcessHistory-method} +\alias{processParam} +\alias{processType,ProcessHistory-method} +\alias{processType} +\alias{processDate,ProcessHistory-method} +\alias{processDate} +\alias{processInfo,ProcessHistory-method} +\alias{processInfo} +\alias{fileIndex,ProcessHistory-method} +\alias{fileIndex} \title{Tracking data processing} \usage{ \S4method{show}{ProcessHistory}(object) @@ -55,14 +55,14 @@ of the files/samples on which the processing step was applied. } \description{ Objects of the type \code{ProcessHistory} allow to keep track -of any data processing step in an metabolomics experiment. They are created -by the data processing methods, such as \code{\link{detectFeatures}} and -added to the corresponding results objects. Thus, usually, users don't need -to create them. + of any data processing step in an metabolomics experiment. They are + created by the data processing methods, such as + \code{\link{findChromPeaks}} and added to the corresponding results + objects. Thus, usually, users don't need to create them. The \code{XProcessHistory} extends the \code{ProcessHistory} by -adding a slot \code{param} that allows to store the actual parameter class -of the processing step. + adding a slot \code{param} that allows to store the actual parameter + class of the processing step. Get or set the parameter class from an \code{XProcessHistory} object. @@ -83,8 +83,8 @@ the processing step was applied. \describe{ \item{\code{type}}{character(1): string defining the type of the processing step. -This string has to match predefined values defined in the internal variable -\code{.PROCSTEPS}.} +This string has to match predefined values. Use +\code{\link{processHistoryTypes}} to list them.} \item{\code{date}}{character(1): date time stamp when the processing step was started.} @@ -96,9 +96,10 @@ samples of the object the processing was performed.} \item{\code{error}}{(ANY): used to store eventual calculation errors.} \item{\code{param}}{(Param): an object of type \code{Param} (e.g. -\code{\link{CentWaveParam}}) specifying the settings of the processing step.} +\code{\link{CentWaveParam}}) specifying the settings of the processing +step.} }} + \author{ Johannes Rainer } - diff --git a/man/XCMSnExp-class.Rd b/man/XCMSnExp-class.Rd index 6bc046f27..2f5caf7aa 100644 --- a/man/XCMSnExp-class.Rd +++ b/man/XCMSnExp-class.Rd @@ -1,65 +1,75 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/methods-MsFeatureData.R, R/methods-XCMSnExp.R +% Please edit documentation in R/DataClasses.R, R/functions-ProcessHistory.R, +% R/methods-MsFeatureData.R, R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R \docType{class} \name{MsFeatureData-class} -\alias{MsFeatureData} \alias{MsFeatureData-class} -\alias{XCMSnExp} +\alias{MsFeatureData} \alias{XCMSnExp-class} -\alias{adjustedRtime} +\alias{XCMSnExp} +\alias{processHistoryTypes} +\alias{show,MsFeatureData-method} +\alias{hasAdjustedRtime,MsFeatureData-method} +\alias{hasFeatures,MsFeatureData-method} +\alias{hasChromPeaks,MsFeatureData-method} \alias{adjustedRtime,MsFeatureData-method} -\alias{adjustedRtime,XCMSnExp-method} -\alias{adjustedRtime<-} \alias{adjustedRtime<-,MsFeatureData-method} -\alias{adjustedRtime<-,XCMSnExp-method} -\alias{dropAdjustedRtime} \alias{dropAdjustedRtime,MsFeatureData-method} -\alias{dropAdjustedRtime,XCMSnExp-method} -\alias{dropFeatureGroups} -\alias{dropFeatureGroups,MsFeatureData-method} -\alias{dropFeatureGroups,XCMSnExp-method} -\alias{dropFeatures} -\alias{dropFeatures,MsFeatureData-method} -\alias{dropFeatures,XCMSnExp-method} -\alias{featureGroups} -\alias{featureGroups,MsFeatureData-method} -\alias{featureGroups,XCMSnExp-method} -\alias{featureGroups<-} -\alias{featureGroups<-,MsFeatureData-method} -\alias{featureGroups<-,XCMSnExp-method} -\alias{features} -\alias{features,MsFeatureData-method} -\alias{features,XCMSnExp-method} -\alias{features<-} -\alias{features<-,MsFeatureData-method} -\alias{features<-,XCMSnExp-method} -\alias{hasAdjustedRtime} -\alias{hasAdjustedRtime,MsFeatureData-method} +\alias{featureDefinitions,MsFeatureData-method} +\alias{featureDefinitions<-,MsFeatureData-method} +\alias{dropFeatureDefinitions,MsFeatureData-method} +\alias{chromPeaks,MsFeatureData-method} +\alias{chromPeaks<-,MsFeatureData-method} +\alias{dropChromPeaks,MsFeatureData-method} +\alias{profMat,OnDiskMSnExp-method} +\alias{show,XCMSnExp-method} \alias{hasAdjustedRtime,XCMSnExp-method} -\alias{hasAlignedFeatures} -\alias{hasAlignedFeatures,MsFeatureData-method} -\alias{hasAlignedFeatures,XCMSnExp-method} -\alias{hasDetectedFeatures} -\alias{hasDetectedFeatures,MsFeatureData-method} -\alias{hasDetectedFeatures,XCMSnExp-method} -\alias{intensity,XCMSnExp-method} +\alias{hasAdjustedRtime} +\alias{hasFeatures,XCMSnExp-method} +\alias{hasFeatures} +\alias{hasChromPeaks,XCMSnExp-method} +\alias{hasChromPeaks} +\alias{adjustedRtime,XCMSnExp-method} +\alias{adjustedRtime} +\alias{adjustedRtime<-,XCMSnExp-method} +\alias{adjustedRtime<-} +\alias{featureDefinitions,XCMSnExp-method} +\alias{featureDefinitions} +\alias{featureDefinitions<-,XCMSnExp-method} +\alias{featureDefinitions<-} +\alias{chromPeaks,XCMSnExp-method} +\alias{chromPeaks} +\alias{chromPeaks<-,XCMSnExp-method} +\alias{chromPeaks<-} +\alias{rtime,XCMSnExp-method} \alias{mz,XCMSnExp-method} -\alias{processHistory} +\alias{intensity,XCMSnExp-method} +\alias{spectra,XCMSnExp-method} \alias{processHistory,XCMSnExp-method} -\alias{rtime,XCMSnExp-method} +\alias{processHistory} +\alias{dropChromPeaks,XCMSnExp-method} +\alias{dropChromPeaks} +\alias{dropFeatureDefinitions,XCMSnExp-method} +\alias{dropFeatureDefinitions} +\alias{dropAdjustedRtime,XCMSnExp-method} +\alias{dropAdjustedRtime} +\alias{XCMSnExp-class} \alias{setAs} -\alias{show,MsFeatureData-method} -\alias{show,XCMSnExp-method} -\alias{spectra,XCMSnExp-method} +\alias{profMat,XCMSnExp-method} +\alias{findChromPeaks,XCMSnExp,ANY-method} +\alias{dropFilledChromPeaks,XCMSnExp-method} +\alias{dropFilledChromPeaks} \title{Data container storing xcms preprocessing results} \usage{ +processHistoryTypes() + \S4method{show}{MsFeatureData}(object) \S4method{hasAdjustedRtime}{MsFeatureData}(object) -\S4method{hasAlignedFeatures}{MsFeatureData}(object) +\S4method{hasFeatures}{MsFeatureData}(object) -\S4method{hasDetectedFeatures}{MsFeatureData}(object) +\S4method{hasChromPeaks}{MsFeatureData}(object) \S4method{adjustedRtime}{MsFeatureData}(object) @@ -67,239 +77,382 @@ \S4method{dropAdjustedRtime}{MsFeatureData}(object) -\S4method{featureGroups}{MsFeatureData}(object) +\S4method{featureDefinitions}{MsFeatureData}(object) -\S4method{featureGroups}{MsFeatureData}(object) <- value +\S4method{featureDefinitions}{MsFeatureData}(object) <- value -\S4method{dropFeatureGroups}{MsFeatureData}(object) +\S4method{dropFeatureDefinitions}{MsFeatureData}(object) -\S4method{features}{MsFeatureData}(object) +\S4method{chromPeaks}{MsFeatureData}(object) -\S4method{features}{MsFeatureData}(object) <- value +\S4method{chromPeaks}{MsFeatureData}(object) <- value -\S4method{dropFeatures}{MsFeatureData}(object) +\S4method{dropChromPeaks}{MsFeatureData}(object) + +\S4method{profMat}{OnDiskMSnExp}(object, method = "bin", step = 0.1, + baselevel = NULL, basespace = NULL, mzrange. = NULL, fileIndex, ...) \S4method{show}{XCMSnExp}(object) \S4method{hasAdjustedRtime}{XCMSnExp}(object) -\S4method{hasAlignedFeatures}{XCMSnExp}(object) +\S4method{hasFeatures}{XCMSnExp}(object) -\S4method{hasDetectedFeatures}{XCMSnExp}(object) +\S4method{hasChromPeaks}{XCMSnExp}(object) \S4method{adjustedRtime}{XCMSnExp}(object, bySample = FALSE) \S4method{adjustedRtime}{XCMSnExp}(object) <- value -\S4method{featureGroups}{XCMSnExp}(object) +\S4method{featureDefinitions}{XCMSnExp}(object) -\S4method{featureGroups}{XCMSnExp}(object) <- value +\S4method{featureDefinitions}{XCMSnExp}(object) <- value -\S4method{features}{XCMSnExp}(object, bySample = FALSE) +\S4method{chromPeaks}{XCMSnExp}(object, bySample = FALSE, rt = numeric(), + mz = numeric(), ppm = 0, type = "any") -\S4method{features}{XCMSnExp}(object) <- value +\S4method{chromPeaks}{XCMSnExp}(object) <- value -\S4method{rtime}{XCMSnExp}(object, bySample = FALSE) +\S4method{rtime}{XCMSnExp}(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object)) -\S4method{mz}{XCMSnExp}(object, bySample = FALSE) +\S4method{mz}{XCMSnExp}(object, bySample = FALSE, BPPARAM = bpparam()) -\S4method{intensity}{XCMSnExp}(object, bySample = FALSE) +\S4method{intensity}{XCMSnExp}(object, bySample = FALSE, + BPPARAM = bpparam()) -\S4method{spectra}{XCMSnExp}(object, bySample = FALSE) +\S4method{spectra}{XCMSnExp}(object, bySample = FALSE, + adjusted = hasAdjustedRtime(object), BPPARAM = bpparam()) \S4method{processHistory}{XCMSnExp}(object, fileIndex, type) -\S4method{dropFeatures}{XCMSnExp}(object) +\S4method{dropChromPeaks}{XCMSnExp}(object) -\S4method{dropFeatureGroups}{XCMSnExp}(object) +\S4method{dropFeatureDefinitions}{XCMSnExp}(object, keepAdjRtime = FALSE, + dropLastN = -1) \S4method{dropAdjustedRtime}{XCMSnExp}(object) + +\S4method{profMat}{XCMSnExp}(object, method = "bin", step = 0.1, + baselevel = NULL, basespace = NULL, mzrange. = NULL, fileIndex, ...) + +\S4method{findChromPeaks}{XCMSnExp,ANY}(object, param, BPPARAM = bpparam(), + return.type = "XCMSnExp") + +\S4method{dropFilledChromPeaks}{XCMSnExp}(object) } \arguments{ -\item{object}{For \code{adjustedRtime}, \code{featureGroups}, -\code{features}, \code{hasAdjustedRtime}, \code{hasAlignedFeatures} and -\code{hasDetectedFeatures} either a \code{MsFeatureData} or a \code{XCMSnExp} +\item{object}{For \code{adjustedRtime}, \code{featureDefinitions}, +\code{chromPeaks}, \code{hasAdjustedRtime}, \code{hasFeatures} and +\code{hasChromPeaks} either a \code{MsFeatureData} or a \code{XCMSnExp} object, for all other methods a \code{XCMSnExp} object.} \item{value}{For \code{adjustedRtime<-}: a \code{list} (length equal to the -number of samples) with numeric vectors representing the adjusted retention -times per scan. + number of samples) with numeric vectors representing the adjusted + retention times per scan. -For \code{featureGroups<-}: a \code{DataFrame} with feature -alignment information. See return value for the \code{featureGroups} method -for the expected format. + For \code{featureDefinitions<-}: a \code{DataFrame} with peak + grouping information. See return value for the \code{featureDefinitions} + method for the expected format. -For \code{features<-}: a \code{matrix} with information on -detected features. See return value for the \code{features} method for the -expected format.} + For \code{chromPeaks<-}: a \code{matrix} with information on + detected peaks. See return value for the \code{chromPeaks} method for the + expected format.} -\item{bySample}{logical(1) specifying whether results should be grouped by -sample.} +\item{method}{The profile matrix generation method. Allowed are \code{"bin"}, +\code{"binlin"}, \code{"binlinbase"} and \code{"intlin"}. See details +section for more information.} + +\item{step}{numeric(1) representing the m/z bin size.} + +\item{baselevel}{numeric(1) representing the base value to which +empty elements (i.e. m/z bins without a measured intensity) should be set. +Only considered if \code{method = "binlinbase"}. See \code{baseValue} +parameter of \code{\link{imputeLinInterpol}} for more details.} + +\item{basespace}{numeric(1) representing the m/z length after +which the signal will drop to the base level. Linear interpolation will be +used between consecutive data points falling within \code{2 * basespace} to +each other. Only considered if \code{method = "binlinbase"}. If not +specified, it defaults to \code{0.075}. Internally this parameter is +translated into the \code{distance} parameter of the +\code{\link{imputeLinInterpol}} function by +\code{distance = floor(basespace / step)}. See \code{distance} parameter +of \code{\link{imputeLinInterpol}} for more details.} + +\item{mzrange.}{Optional numeric(2) manually specifying the mz value range to +be used for binnind. If not provided, the whole mz value range is used.} \item{fileIndex}{For \code{processHistory}: optional \code{numeric} specifying the index of the files/samples for which the \code{\link{ProcessHistory}} objects should be retrieved.} +\item{...}{Additional parameters.} + +\item{bySample}{logical(1) specifying whether results should be grouped by +sample.} + +\item{rt}{optional \code{numeric(2)} defining the retention time range for +which chromatographic peaks should be returned.} + +\item{mz}{optional \code{numeric(2)} defining the mz range for which +chromatographic peaks should be returned.} + +\item{ppm}{optional \code{numeric(1)} specifying the ppm by which the +\code{mz} range should be extended. For a value of \code{ppm = 10}, all +peaks within \code{mz[1] - ppm / 1e6} and \code{mz[2] + ppm / 1e6} are +returned.} + \item{type}{For \code{processHistory}: restrict returned -\code{\link{ProcessHistory}} objects to analysis steps of a certain type. -Supported values are \code{"Unknown"}, \code{"Feature detection"}, -\code{"Feature alignment"} and \code{"Retention time correction"}.} +\code{\link{ProcessHistory}} objects to analysis steps of a certain +type. Use the \code{processHistoryTypes} to list all supported values. +For \code{chromPeaks}: \code{character} specifying which peaks to return +if \code{rt} or \code{mz} are defined. For \code{type = "any"} all +chromatographic peaks that \emph{overlap} the range defined by the +\code{mz} or by the \code{rt}. For \code{type = "within"} only peaks +completely within the range(s) are returned.} + +\item{adjusted}{logical(1) whether adjusted or raw (i.e. the original +retention times reported in the files) should be returned.} + +\item{BPPARAM}{Parameter class for parallel processing. See +\code{\link[BiocParallel]{bpparam}}.} + +\item{keepAdjRtime}{For \code{dropFeatureDefinitions,XCMSnExp}: +\code{logical(1)} defining whether eventually present retention time +adjustment should not be dropped. By default dropping feature definitions +drops retention time adjustment results too.} + +\item{dropLastN}{For \code{dropFeatureDefinitions,XCMSnExp}: +\code{numeric(1)} defining the number of peak grouping related process +history steps to remove. By default \code{dropLastN = -1}, dropping the +chromatographic peaks removes all process history steps related to peak +grouping. Setting e.g. \code{dropLastN = 1} will only remove the most +recent peak grouping related process history step.} + +\item{param}{A \code{\link{CentWaveParam}}, \code{\link{MatchedFilterParam}}, +\code{\link{MassifquantParam}}, \code{\link{MSWParam}} or +\code{\link{CentWavePredIsoParam}} object with the settings for the +chromatographic peak detection algorithm.} + +\item{return.type}{Character specifying what type of object the method should +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or +\code{"xcmsSet"}.} } \value{ +For \code{profMat}: a \code{list} with a the profile matrix +\code{matrix} (or matrices if \code{fileIndex} was not specified or if +\code{length(fileIndex) > 1}). See \code{\link{profile-matrix}} for general +help and information about the profile matrix. + For \code{adjustedRtime}: if \code{bySample = FALSE} a \code{numeric} -vector with the adjusted retention for each spectrum of all files/samples -within the object. If \code{bySample = TRUE } a \code{list} (length equal to -the number of samples) with adjusted retention times grouped by sample. -Returns \code{NULL} if no adjusted retention times are present. - -For \code{featureGroups}: a \code{DataFrame} with feature alignment -information, each row corresponding to one group of aligned features (across -samples) and columns \code{"mzmed"} (median mz value), \code{"mzmin"} -(minimal mz value), \code{"mzmax"} (maximum mz value), \code{"rtmed"} (median -retention time), \code{"rtmin"} (minimal retention time), \code{"rtmax"} -(maximal retention time) and \code{"featureidx"}. Column \code{"featureidx"} -contains a \code{list} with indices of features (rows) in the matrix returned -by the \code{features} method that belong to that feature group. The method -returns \code{NULL} if no aligned feature information is present. - -For \code{features}: if \code{bySample = FALSE} a \code{matrix} with -at least the following columns: \code{"mz"} (mz value for the largest -intensity), \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximal mz -value), \code{"rt"} (retention time for the peak apex), \code{"rtmin"} -(minimal retention time), \code{"rtmax"} (maximal retention time), -\code{"into"} (integrated, original, intensity of the feature) and -\code{"sample"} (sample index in which the feature was identified). -Depending on the employed feature detection algorithm and the -\code{verboseColumns} parameter of it additional columns might be returned. -For \code{bySample = TRUE} the features are returned as a \code{list} of -matrices, each containing the features of a specific sample. For sample in -which no feastures were detected a matrix with 0 rows is returned. - -For \code{rtime}: if \code{bySample = FALSE} a numeric vector with the -retention times of each scan, if \code{bySample = TRUE} a \code{list} of -numeric vectors with the retention times per sample. + vector with the adjusted retention for each spectrum of all files/samples + within the object. If \code{bySample = TRUE } a \code{list} (length equal + to the number of samples) with adjusted retention times grouped by + sample. Returns \code{NULL} if no adjusted retention times are present. + +For \code{featureDefinitions}: a \code{DataFrame} with peak grouping + information, each row corresponding to one mz-rt feature (grouped peaks + within and across samples) and columns \code{"mzmed"} (median mz value), + \code{"mzmin"} (minimal mz value), \code{"mzmax"} (maximum mz value), + \code{"rtmed"} (median retention time), \code{"rtmin"} (minimal retention + time), \code{"rtmax"} (maximal retention time) and \code{"peakidx"}. + Column \code{"peakidx"} contains a \code{list} with indices of + chromatographic peaks (rows) in the matrix returned by the + \code{chromPeaks} method that belong to that feature group. The method + returns \code{NULL} if no feature definitions are present. + +For \code{chromPeaks}: if \code{bySample = FALSE} a \code{matrix} + with at least the following columns: + \code{"mz"} (intensity-weighted mean of mz values of the peak across + scans/retention times), + \code{"mzmin"} (minimal mz value), + \code{"mzmax"} (maximal mz value), + \code{"rt"} (retention time for the peak apex), + \code{"rtmin"} (minimal retention time), + \code{"rtmax"} (maximal retention time), + \code{"into"} (integrated, original, intensity of the peak), + \code{"maxo"} (maximum intentity of the peak), + \code{"sample"} (sample index in which the peak was identified) and + \code{"is_filled"} defining whether the chromatographic peak was + identified by the peak picking algorithm (\code{0}) or was added by the + \code{fillChromPeaks} method (\code{1}). + Depending on the employed peak detection algorithm and the + \code{verboseColumns} parameter of it additional columns might be + returned. For \code{bySample = TRUE} the chronatographic peaks are + returned as a \code{list} of matrices, each containing the + chromatographic peaks of a specific sample. For samples in which no + peaks were detected a matrix with 0 rows is returned. + +For \code{rtime}: if \code{bySample = FALSE} a numeric vector with + the retention times of each scan, if \code{bySample = TRUE} a + \code{list} of numeric vectors with the retention times per sample. For \code{mz}: if \code{bySample = FALSE} a \code{list} with the mz -values (numeric vectors) of each scan. If \code{bySample = TRUE} a -\code{list} with the mz values per sample. + values (numeric vectors) of each scan. If \code{bySample = TRUE} a + \code{list} with the mz values per sample. For \code{intensity}: if \code{bySample = FALSE} a \code{list} with -the intensity values (numeric vectors) of each scan. If -\code{bySample = TRUE} a \code{list} with the intensity values per sample. + the intensity values (numeric vectors) of each scan. If + \code{bySample = TRUE} a \code{list} with the intensity values per + sample. For \code{spectra}: if \code{bySample = FALSE} a \code{list} with -\code{\link[MSnbase]{Spectrum}} objects. If \code{bySample = TRUE} the result -is grouped by sample, i.e. as a \code{list} of \code{lists}, each element in -the \emph{outer} \code{list} being the \code{list} of spectra of the specific -file. + \code{\link[MSnbase]{Spectrum}} objects. If \code{bySample = TRUE} the + result is grouped by sample, i.e. as a \code{list} of \code{lists}, each + element in the \emph{outer} \code{list} being the \code{list} of spectra + of the specific file. For \code{processHistory}: a \code{list} of -\code{\link{ProcessHistory}} objects providing the details of the individual -data processing steps that have been performed. + \code{\link{ProcessHistory}} objects providing the details of the + individual data processing steps that have been performed. } \description{ The \code{MsFeatureData} class is designed to encapsule all -data related to the preprocessing of metabolomics data using the \code{xcms} -package, i.e. it contains a \code{matrix} with the features identified by the -feature detection, a \code{DataFrame} with the information on aligned -features across samples and a \code{list} with the adjusted retention times -per sample. + data related to the preprocessing of metabolomics data using the + \code{xcms} package, i.e. it contains a \code{matrix} with the + chromatographic peaks identified by the peak detection, a + \code{DataFrame} with the definition on grouped chromatographic peaks + across samples and a \code{list} with the adjusted retention times per + sample. The \code{XCMSnExp} object is designed to contain all results -from metabolomics data preprocessing (feature detection, feature alignment -and retention time correction). The corresponding elements in the -\code{msFeatureData} slot are \code{"features"} (a \code{matrix}), -\code{"featureGroups"} (a \code{DataFrame}) and \code{"adjustedRtime"} (a -\code{list} of numeric vectors). Note that these should not be accessed -directly but rather \emph{via} their accessor methods. Along with the results, -the object contains the processing history that allow to track each -processing step along with the used settings. The object also directly -extends the \code{\link[MSnbase]{OnDiskMSnExp}} object hence allowing easy -access to the full data on which the feature detection was performed. - -Objects from this class should not be created directly, they are returned as -result from the \code{\link{detectFeatures}} method. - -\code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}} -objects using the \code{as} method. + from metabolomics data preprocessing (chromatographic peak detection, + peak grouping (correspondence) and retention time correction). The + corresponding elements in the \code{msFeatureData} slot are + \code{"chromPeaks"} (a \code{matrix}), \code{"featureDefinitions"} + (a \code{DataFrame}) and \code{"adjustedRtime"} (a \code{list} of + numeric vectors). Note that these should not be accessed directly but + rather \emph{via} their accessor methods. + Along with the results, the object contains the processing history that + allow to track each processing step along with the used settings. The + object also directly extends the \code{\link[MSnbase]{OnDiskMSnExp}} + object hence allowing easy access to the full data on which the peak + detection was performed. + + Objects from this class should not be created directly, they are + returned as result from the \code{\link{findChromPeaks}} method. + + \code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}} + objects using the \code{as} method. + +\code{processHistoryTypes} returns the available \emph{types} of +process histories. These can be passed with argument \code{type} to the +\code{processHistory} method to extract specific process step(s). + +\code{profMat}: creates a \emph{profile matrix}, which +is a n x m matrix, n (rows) representing equally spaced m/z values (bins) and +m (columns) the retention time of the corresponding scans. Each cell contains +the maximum intensity measured for the specific scan and m/z values. See +\code{\link{profMat}} for more details and description of the various binning +methods. \code{hasAdjustedRtime}: whether the object provides adjusted -retention times. - -\code{hasAlignedFeatures}: whether the object contains feature -alignment results. - -\code{hasDetectedFeatures}: whether the object contains feature -detection results. - -The \code{adjustedRtime},\code{adjustedRtime<-} method -extract/set adjusted retention times. Retention times are adjusted by -retention time correction/adjustment methods. The \code{bySample} parameter -allows to specify whether the adjusted retention time should be grouped by -sample (file). - -The \code{featureGroups}, \code{featureGroups<-} methods extract -or set the feature alignment results. - -The \code{features}, \code{features<-} methods extract or set -the matrix containing the information on identified features. Parameter -\code{bySample} allows to specify whether features should be returned -ungrouped (default \code{bySample = FALSE}) or grouped by sample ( -\code{bySample = TRUE}). -See description on the return value for details on the matrix columns. Users -usually don't have to use the \code{features<-} method directly as detected -features are added to the object by the \code{\link{detectFeatures}} method. - -The \code{rtime} method extracts the retention time for each -scan. The \code{bySample} parameter allows to return the values grouped -by sample/file. - -The \code{mz} method extracts the mz values from each scan of -all files within an \code{XCMSnExp} object. These values are extracted from -the original data files and eventual processing steps are applied -\emph{on the fly}. Using the \code{bySample} parameter it is possible to -switch from the default grouping of mz values by spectrum/scan to a grouping -by sample/file. - -The \code{intensity} method extracts the intensity values from -each scan of all files within an \code{XCMSnExp} object. These values are -extracted from the original data files and eventual processing steps are -applied \emph{on the fly}. Using the \code{bySample} parameter it is possible -to switch from the default grouping of intensity values by spectrum/scan to -a grouping by sample/file. - -The \code{spectra} method extracts the -\code{\link[MSnbase]{Spectrum}} objects containing all data from -\code{object}. These values are extracted from the original data files and -eventual processing steps are applied \emph{on the fly}. Setting -\code{bySample = TRUE} the spectra are returned grouped by sample/file. - -The \code{processHistory} method returns a \code{list} with -\code{\link{ProcessHistory}} objects (or objects inheriting from this base -class) representing the individual processing steps that have been performed, -eventually along with their settings (\code{Param} parameter class). Optional -arguments \code{fileIndex} and \code{type} allow to restrict to process steps -of a certain type or performed on a certain file. - -The \code{dropFeatures} method drops any identified features -and returns the object without that information. Note that for -\code{XCMSnExp} objects the method drops all results from a feature alignment -or retention time adjustment too. For \code{XCMSnExp} objects the method -drops also any related process history steps. - -The \code{dropFeatureGroups} method drops aligned feature -information (i.e. feature groups) and returns the object -without that information. Note that for \code{XCMSnExp} objects the method -drops also retention time adjustments. -For \code{XCMSnExp} objects the method drops also any related process history -steps. - -The \code{dropAdjustedRtime} method drops any retention time -adjustment information and returns the object without adjusted retention -time. Note that for \code{XCMSnExp} objects the method drops also all feature -alignment results if these were performed after the retention time adjustment. -For \code{XCMSnExp} objects the method drops also any related process history -steps. + retention times. + +\code{hasFeatures}: whether the object contains correspondence + results (i.e. features). + +\code{hasChromPeaks}: whether the object contains peak + detection results. + +\code{adjustedRtime},\code{adjustedRtime<-}: + extract/set adjusted retention times. \code{adjustedRtime<-} should not + be called manually, it is called internally by the + \code{\link{adjustRtime}} methods. For \code{XCMSnExp} objects, + \code{adjustedRtime<-} does also apply the retention time adjustment to + the chromatographic peaks in the object. The \code{bySample} parameter + allows to specify whether the adjusted retention time should be grouped + by sample (file). + +\code{featureDefinitions}, \code{featureDefinitions<-}: extract + or set the correspondence results, i.e. the mz-rt features (peak groups). + +\code{chromPeaks}, \code{chromPeaks<-}: extract or set + the matrix containing the information on identified chromatographic + peaks. Parameter \code{bySample} allows to specify whether peaks should + be returned ungrouped (default \code{bySample = FALSE}) or grouped by + sample (\code{bySample = TRUE}). The \code{chromPeaks<-} method for + \code{XCMSnExp} objects removes also all correspondence (peak grouping) + and retention time correction (alignment) results. The optional + arguments \code{rt}, \code{mz} and \code{ppm} allow to extract only + chromatographic peaks overlapping (if \code{type = "any"}) or completely + within (if \code{type = "within"}) the defined retention time and mz + ranges. + See description of the return value for details on the returned matrix. + Users usually don't have to use the \code{chromPeaks<-} method directly + as detected chromatographic peaks are added to the object by the + \code{\link{findChromPeaks}} method. + +\code{rtime}: extracts the retention time for each + scan. The \code{bySample} parameter allows to return the values grouped + by sample/file and \code{adjusted} whether adjusted or raw retention + times should be returned. By default the method returns adjusted + retention times, if they are available (i.e. if retention times were + adjusted using the \code{\link{adjustRtime}} method). + +\code{mz}: extracts the mz values from each scan of + all files within an \code{XCMSnExp} object. These values are extracted + from the original data files and eventual processing steps are applied + \emph{on the fly}. Using the \code{bySample} parameter it is possible to + switch from the default grouping of mz values by spectrum/scan to a + grouping by sample/file. + +\code{intensity}: extracts the intensity values from + each scan of all files within an \code{XCMSnExp} object. These values are + extracted from the original data files and eventual processing steps are + applied \emph{on the fly}. Using the \code{bySample} parameter it is + possible to switch from the default grouping of intensity values by + spectrum/scan to a grouping by sample/file. + +\code{spectra}: extracts the + \code{\link[MSnbase]{Spectrum}} objects containing all data from + \code{object}. The values are extracted from the original data files and + eventual processing steps are applied \emph{on the fly}. By setting + \code{bySample = TRUE}, the spectra are returned grouped by sample/file. + If the \code{XCMSnExp} object contains adjusted retention times, these + are returned by default in the \code{Spectrum} objects (can be + overwritten by setting \code{adjusted = FALSE}). + +\code{processHistory}: returns a \code{list} with + \code{\link{ProcessHistory}} objects (or objects inheriting from this + base class) representing the individual processing steps that have been + performed, eventually along with their settings (\code{Param} parameter + class). Optional arguments \code{fileIndex} and \code{type} allow to + restrict to process steps of a certain type or performed on a certain + file. + +\code{dropChromPeaks}: drops any identified chromatographic + peaks and returns the object without that information. Note that for + \code{XCMSnExp} objects the method drops all results from a + correspondence (peak grouping) or alignment (retention time adjustment) + too. For \code{XCMSnExp} objects the method drops also any related + process history steps. + +\code{dropFeatureDefinitions}: drops the results from a + correspondence (peak grouping) analysis, i.e. the definition of the mz-rt + features and returns the object without that information. Note that for + \code{XCMSnExp} objects the method will also drop retention time + adjustment results, if these were performed after the last peak grouping + (i.e. which base on the results from the peak grouping that are going to + be removed). + For \code{XCMSnExp} objects also all related process history steps are + removed. Also eventually filled in peaks (by \code{\link{fillChromPeaks}} + ) will be removed too. + +\code{dropAdjustedRtime}: drops any retention time + adjustment information and returns the object without adjusted retention + time. For \code{XCMSnExp} object this also reverts the retention times + reported for the chromatographic peaks in the peak matrix to the + original, raw, ones (after chromatographic peak detection). Note that + for \code{XCMSnExp} objects the method drops also all peak grouping + results if these were performed \emph{after} the retention time + adjustment. For \code{XCMSnExp} objects the method drops also any + related process history steps. + +\code{dropFilledChromPeaks}: drops any filled-in chromatographic + peaks (filled in by the \code{\link{fillChromPeaks}} method) and all + related process history steps. } \section{Slots}{ @@ -308,14 +461,16 @@ steps. tracking all individual analysis steps that have been performed.} \item{\code{msFeatureData}}{\code{MsFeatureData} class extending \code{environment} -and containing the results from a feature detection (element -\code{"features"}), feature alignment (element \code{"featureGroups"}) and -retention time correction (element \code{""}) steps.} +and containing the results from a chromatographic peak detection (element +\code{"chromPeaks"}), peak grouping (element \code{"featureDefinitions"}) +and retention time correction (element \code{"adjustedRtime"}) steps.} }} + \note{ -The \code{"features"} element in the \code{msFeatureData} slot is -equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the -\code{"featureGroups"} contains information from the \code{} +The \code{"chromPeaks"} element in the \code{msFeatureData} slot is + equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the + \code{"featureDefinitions"} contains information from the \code{@groups} + and \code{@groupidx} slots from an \code{xcmsSet} object. } \examples{ @@ -323,24 +478,24 @@ equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the library(faahKO) od <- readMSData2(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"), system.file("cdf/KO/ko16.CDF", package = "faahKO"))) -## Now we perform a feature detection on this data set using the +## Now we perform a chromatographic peak detection on this data set using the ## matched filter method. We are tuning the settings such that it performs ## faster. mfp <- MatchedFilterParam(binSize = 4) -xod <- detectFeatures(od, param = mfp) +xod <- findChromPeaks(od, param = mfp) -## The results from the feature detection are now stored in the XCMSnExp +## The results from the peak detection are now stored in the XCMSnExp ## object xod -## The detected features can be accessed with the features method. -head(features(xod)) +## The detected peaks can be accessed with the chromPeaks method. +head(chromPeaks(xod)) -## The settings of the feature detection can be accessed with the -## processHistory method +## The settings of the chromatographic peak detection can be accessed with +## the processHistory method processHistory(xod) -## Also the parameter class for the feature detection can be accessed +## Also the parameter class for the peak detection can be accessed processParam(processHistory(xod)[[1]]) ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes @@ -368,16 +523,16 @@ head(split(spctr, fromFile(xod))[[1]]) ## ## XCMSnExp objects can be filtered by file, retention time, mz values or ## MS level. For some of these filter preprocessing results (mostly -## retention time correction and feature alignment results) will be dropped. +## retention time correction and peak grouping results) will be dropped. ## Below we filter the XCMSnExp object by file to extract the results for ## only the second file. xod_2 <- filterFile(xod, file = 2) xod_2 -## Now the objects contains only the idenfified features for the second file -head(features(xod_2)) +## Now the objects contains only the idenfified peaks for the second file +head(chromPeaks(xod_2)) -head(features(xod)[features(xod)[, "sample"] == 2, ]) +head(chromPeaks(xod)[chromPeaks(xod)[, "sample"] == 2, ]) ########## ## Coercing to an xcmsSet object @@ -386,16 +541,28 @@ head(features(xod)[features(xod)[, "sample"] == 2, ]) xs <- as(xod, "xcmsSet") head(peaks(xs)) } -\author{ -Johannes Rainer -} \seealso{ \code{\linkS4class{xcmsSet}} for the old implementation. + \code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} + and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. -\code{\link[MSnbase]{OnDiskMSnExp}}, \code{\link[MSnbase]{MSnExp}} -and \code{\link[MSnbase]{pSet}} for a complete list of inherited methods. + \code{\link{findChromPeaks}} for available peak detection methods + returning a \code{XCMSnExp} object as a result. -\code{\link{detectFeatures}} for available feature detection methods -returning a \code{XCMSnExp} object as a result. -} + \code{\link{groupChromPeaks}} for available peak grouping + methods and \code{\link{featureDefinitions}} for the method to extract + the feature definitions representing the peak grouping results. + \code{\link{adjustRtime}} for retention time adjustment methods. + + \code{\link{extractChromatograms}} to extract MS data as + \code{\link{Chromatogram}} objects. + \code{\link{extractMsData}} for the method to extract MS data as + \code{data.frame}s. + +\code{\link{fillChromPeaks}} for the method to fill-in eventually + missing chromatographic peaks for a feature in some samples. +} +\author{ +Johannes Rainer +} diff --git a/man/XCMSnExp-filter-methods.Rd b/man/XCMSnExp-filter-methods.Rd index f89952fa8..7b0615136 100644 --- a/man/XCMSnExp-filter-methods.Rd +++ b/man/XCMSnExp-filter-methods.Rd @@ -2,38 +2,38 @@ % Please edit documentation in R/methods-XCMSnExp.R \docType{methods} \name{filterFile,XCMSnExp-method} -\alias{XCMSnExp-filter} \alias{filterFile,XCMSnExp-method} +\alias{XCMSnExp-filter} \alias{filterMz,XCMSnExp-method} \alias{filterRt,XCMSnExp-method} -\title{XCMSnExp filtering and subsetting - -The methods listed on this page allow to filter and subset -\code{\link{XCMSnExp}} objects. Most of them are inherited from the -\code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for -\code{\link{XCMSnExp}} to enable subsetting also on the preprocessing -results.} +\title{XCMSnExp filtering and subsetting} \usage{ -\S4method{filterFile}{XCMSnExp}(object, file) +\S4method{filterFile}{XCMSnExp}(object, file, keepAdjustedRtime = FALSE) \S4method{filterMz}{XCMSnExp}(object, mz, msLevel., ...) -\S4method{filterRt}{XCMSnExp}(object, rt, msLevel., adjusted = FALSE) +\S4method{filterRt}{XCMSnExp}(object, rt, msLevel., + adjusted = hasAdjustedRtime(object)) } \arguments{ \item{object}{A \code{\link{XCMSnExp}} object.} \item{file}{For \code{filterFile}: \code{integer} defining the file index -within the object to subset the object by file or \code{character} specifying -the file names to sub set. The indices are expected to be increasingly -ordered, if not they are ordered internally.} +within the object to subset the object by file or \code{character} +specifying the file names to sub set. The indices are expected to be +increasingly ordered, if not they are ordered internally.} + +\item{keepAdjustedRtime}{For \code{filterFile}: \code{logical(1)} defining +whether the adjusted retention times should be kept, even if features are +being removed (and the retention time correction being potentially +performed on these features).} \item{mz}{For \code{filterMz}: \code{numeric(2)} defining the lower and upper mz value for the filtering.} \item{msLevel.}{For \code{filterMz}, \code{filterRt}, \code{numeric(1)} -defining the MS level(s) to which operations should be applied or to which -the object should be subsetted.} +defining the MS level(s) to which operations should be applied or to +which the object should be subsetted.} \item{...}{Optional additional arguments.} @@ -41,36 +41,54 @@ the object should be subsetted.} window (lower and upper bound) for the filtering.} \item{adjusted}{For \code{filterRt}: \code{logical} indicating whether the -object should be filtered by original (\code{adjusted = FALSE}) or adjusted -retention times (\code{adjusted = TRUE}).} +object should be filtered by original (\code{adjusted = FALSE}) or +adjusted retention times (\code{adjusted = TRUE}). +For \code{spectra}: whether the retention times in the individual +\code{Spectrum} objects should be the adjusted or raw retention times.} } \value{ All methods return an \code{\link{XCMSnExp}} object. } \description{ -The \code{filterFile} method allows to reduce the -\code{\link{XCMSnExp}} to data from only certain files. Identified features -for these files are retained while eventually all present feature -alignment/grouping information and adjusted retention times are dropped.. - -The \code{filterMz} method filters the data set based on the -provided mz value range. All features and feature groups (aligned features) -falling completely within the provided mz value range are retained (if their -minimal mz value is \code{>= mz[1]} and the maximal mz value \code{<= mz[2]}. -Adjusted retention times, if present, are not altered by the filtering. - -The \code{filterRt} method filters the data set based on the -provided retention time range. All features and feature groups within -the specified retention time window are retained. Filtering by retention time -does not drop any preprocessing results. The method returns an empty object -if no spectrum or feature is within the specified retention time range. +The methods listed on this page allow to filter and subset + \code{\link{XCMSnExp}} objects. Most of them are inherited from the + \code{\link[MSnbase]{OnDiskMSnExp}} object and have been adapted for + \code{\link{XCMSnExp}} to enable subsetting also on the preprocessing + results. + + \code{filterFile}: allows to reduce the + \code{\link{XCMSnExp}} to data from only certain files. Identified + chromatographic peaks for these files are retained while all eventually + present features (peak grouping information) are dropped. By default also + adjusted retention times are removed. This can be overwritten by setting + \code{keepAdjustedRtime = TRUE}, but users should use this option with + caution. + +\code{filterMz}: filters the data set based on the + provided mz value range. All chromatographic peaks and features (grouped + peaks) falling completely within the provided mz value range are retained + (if their minimal mz value is \code{>= mz[1]} and the maximal mz value + \code{<= mz[2]}. Adjusted retention times, if present, are not altered by + the filtering. + +\code{filterRt}: filters the data set based on the + provided retention time range. All chromatographic peaks and features + (grouped peaks) the specified retention time window are retained (i.e. if + the retention time corresponding to the peak's apex is within the + specified rt range). If retention time correction has been performed, + the method will by default filter the object by adjusted retention times. + The argument \code{adjusted} allows to specify manually whether filtering + should be performed by raw or adjusted retention times. Filtering by + retention time does not drop any preprocessing results. + The method returns an empty object if no spectrum or feature is within + the specified retention time range. } \note{ The \code{filterFile} method removes also process history steps not -related to the files to which the object should be sub-setted and updates -the \code{fileIndex} attribute accordingly. Also, the method does not allow -arbitrary ordering of the files or re-ordering of the files within the -object. + related to the files to which the object should be sub-setted and updates + the \code{fileIndex} attribute accordingly. Also, the method does not + allow arbitrary ordering of the files or re-ordering of the files within + the object. } \examples{ @@ -82,26 +100,44 @@ fs <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), ## Read the files od <- readMSData2(fs) -## Perform feature detection on them using default matched filter settings. +## Perform peak detection on them using default matched filter settings. mfp <- MatchedFilterParam() -xod <- detectFeatures(od, param = mfp) +xod <- findChromPeaks(od, param = mfp) ## Subset the dataset to the first and third file. xod_sub <- filterFile(xod, file = c(1, 3)) -## The number of features per file for the full object -table(features(xod)[, "sample"]) +## The number of chromatographic peaks per file for the full object +table(chromPeaks(xod)[, "sample"]) -## The number of features per file for the subset -table(features(xod_sub)[, "sample"]) +## The number of chromatographic peaks per file for the subset +table(chromPeaks(xod_sub)[, "sample"]) basename(fileNames(xod)) basename(fileNames(xod_sub)) -} -\author{ -Johannes Rainer + +## Filter on mz values; chromatographic peaks and features within the +## mz range are retained (as well as adjusted retention times). +xod_sub <- filterMz(xod, mz = c(300, 400)) +head(chromPeaks(xod_sub)) +nrow(chromPeaks(xod_sub)) +nrow(chromPeaks(xod)) + +## Filter on rt values. All chromatographic peaks and features within the +## retention time range are retained. Filtering is performed by default on +## adjusted retention times, if present. +xod_sub <- filterRt(xod, rt = c(2700, 2900)) + +range(rtime(xod_sub)) +head(chromPeaks(xod_sub)) +range(chromPeaks(xod_sub)[, "rt"]) + +nrow(chromPeaks(xod)) +nrow(chromPeaks(xod_sub)) } \seealso{ \code{\link{XCMSnExp}} for base class documentation. } - +\author{ +Johannes Rainer +} diff --git a/man/XCMSnExp-inherited-methods.Rd b/man/XCMSnExp-inherited-methods.Rd index 3b4872639..9da8e200e 100644 --- a/man/XCMSnExp-inherited-methods.Rd +++ b/man/XCMSnExp-inherited-methods.Rd @@ -5,8 +5,8 @@ \alias{[,XCMSnExp,logicalOrNumeric,missing,missing-method} \alias{bin,XCMSnExp-method} \alias{clean,XCMSnExp-method} -\alias{filterAcquisitionNum,XCMSnExp-method} \alias{filterMsLevel,XCMSnExp-method} +\alias{filterAcquisitionNum,XCMSnExp-method} \alias{normalize,XCMSnExp-method} \alias{pickPeaks,XCMSnExp-method} \alias{removePeaks,XCMSnExp-method} @@ -44,7 +44,7 @@ which spectra the data set should be reduced.} \item{drop}{For \code{[}: not supported.} -\item{object}{An \code{\link{XCMSnExp}} object.} +\item{object}{An \code{\link{XCMSnExp}} or \code{OnDiskMSnExp} object.} \item{binSize}{\code{numeric(1)} defining the size of a bin (in Dalton).} @@ -71,8 +71,8 @@ object by file.} normalization method. See \code{\link[MSnbase]{normalize}} for details. For \code{pickPeaks}: \code{character(1)} defining the method. See \code{\link[MSnbase]{pickPeaks}} for options. For \code{smooth}: -\code{character(1)} defining the method. See \code{\link[MSnbase]{smooth}} -for options and details.} +\code{character(1)} defining the method. See +\code{\link[MSnbase]{smooth}} for options and details.} \item{...}{Optional additional arguments.} @@ -94,55 +94,58 @@ For all methods: a \code{XCMSnExp} object. } \description{ The methods listed on this page are \code{\link{XCMSnExp}} -methods inherited from its parent, the \code{\link[MSnbase]{OnDiskMSnExp}} -class from the \code{MSnbase} package, that alter the raw data or are related -to data subsetting. Thus calling any of these methods causes all \code{xcms} -pre-processing results to be removed from the \code{\link{XCMSnExp}} object -to ensure its data integrity. - -The \code{[} method allows to subset a \code{\link{XCMSnExp}} object by -spectra. For more details and examples see the documentation for -\code{\link[MSnbase]{OnDiskMSnExp}}. - -The \code{bin} method allows to \emph{bin} spectra. See -\code{\link[MSnbase]{bin}} documentation for more details and examples. - -The \code{clean} method removes unused \code{0} intensity data -points. See \code{\link[MSnbase]{clean}} documentation for details and -examples. - -The \code{filterMsLevel} reduces the \code{\link{XCMSnExp}} -object to spectra of the specified MS level(s). See -\code{\link[MSnbase]{filterMsLevel}} documentation for details and examples. - -The \code{filterAcquisitionNum} method filters the -\code{\link{XCMSnExp}} object keeping only spectra with the provided -acquisition numbers. See \code{\link[MSnbase]{filterAcquisitionNum}} for -details and examples. - -The \code{normalize} method performs basic normalization of spectra -intensities. See \code{\link[MSnbase]{normalize}} documentation for details -and examples. + methods inherited from its parent, the + \code{\link[MSnbase]{OnDiskMSnExp}} class from the \code{MSnbase} + package, that alter the raw data or are related to data subsetting. Thus + calling any of these methods causes all \code{xcms} pre-processing + results to be removed from the \code{\link{XCMSnExp}} object to ensure + its data integrity. + + The \code{[} method allows to subset a \code{\link{XCMSnExp}} object by + spectra. For more details and examples see the documentation for + \code{\link[MSnbase]{OnDiskMSnExp}}. + +\code{bin}: allows to \emph{bin} spectra. See + \code{\link[MSnbase]{bin}} documentation for more details and examples. + +\code{clean}: removes unused \code{0} intensity data + points. See \code{\link[MSnbase]{clean}} documentation for details and + examples. + +\code{filterMsLevel}: reduces the \code{\link{XCMSnExp}} + object to spectra of the specified MS level(s). See + \code{\link[MSnbase]{filterMsLevel}} documentation for details and + examples. + +\code{filterAcquisitionNum}: filters the + \code{\link{XCMSnExp}} object keeping only spectra with the provided + acquisition numbers. See \code{\link[MSnbase]{filterAcquisitionNum}} for + details and examples. + +The \code{normalize} method performs basic normalization of + spectra intensities. See \code{\link[MSnbase]{normalize}} documentation + for details and examples. The \code{pickPeaks} method performs peak picking. See -\code{\link[MSnbase]{pickPeaks}} documentation for details and examples. + \code{\link[MSnbase]{pickPeaks}} documentation for details and examples. -The \code{removePeaks} method removes peaks (intensities) lower than a -threshold. Note that these peaks are not features! See \code{\link[MSnbase]{removePeaks}} documentation for details and examples. +The \code{removePeaks} method removes mass peaks (intensities) + lower than a threshold. Note that these peaks refer to \emph{mass} + peaks, which are different to the chromatographic peaks detected and + analyzed in a metabolomics experiment! See + \code{\link[MSnbase]{removePeaks}} documentation for details and + examples. -The \code{smooth} method smooths spectra. See \code{\link[MSnbase]{smooth}} -documentation for details and examples. -} -\author{ -Johannes Rainer +The \code{smooth} method smooths spectra. See + \code{\link[MSnbase]{smooth}} documentation for details and examples. } \seealso{ \code{\link{XCMSnExp-filter}} for methods to filter and subset -\code{XCMSnExp} objects. - -\code{\link{XCMSnExp}} for base class documentation. - -\code{\link[MSnbase]{OnDiskMSnExp}} for the documentation of the -parent class. + \code{XCMSnExp} objects. + \code{\link{XCMSnExp}} for base class documentation. + \code{\link[MSnbase]{OnDiskMSnExp}} for the documentation of the + parent class. +} +\author{ +Johannes Rainer } - diff --git a/man/XCMSnExp-peak-grouping-results.Rd b/man/XCMSnExp-peak-grouping-results.Rd new file mode 100644 index 000000000..fcbce04e6 --- /dev/null +++ b/man/XCMSnExp-peak-grouping-results.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-XCMSnExp.R +\docType{methods} +\name{featureValues,XCMSnExp-method} +\alias{featureValues,XCMSnExp-method} +\alias{featureValues} +\title{Accessing mz-rt feature data values} +\usage{ +\S4method{featureValues}{XCMSnExp}(object, method = c("medret", "maxint"), + value = "index", intensity = "into", filled = TRUE) +} +\arguments{ +\item{object}{A \code{\link{XCMSnExp}} object providing the feature +definitions.} + +\item{method}{\code{character} specifying the method to resolve +multi-peak mappings within the same sample, i.e. to define the +\emph{representative} peak for a feature in samples where more than +one peak was assigned to the feature. If \code{"medret"}: select the +peak closest to the median retention time of the feature. +If \code{"maxint"}: select the peak yielding the largest signal.} + +\item{value}{\code{character} specifying the name of the column in +\code{chromPeaks(object)} that should be returned or \code{"index"} (the +default) to return the index of the peak in the +\code{chromPeaks(object)} matrix corresponding to the +\emph{representative} peak for the feature in the respective sample.} + +\item{intensity}{\code{character} specifying the name of the column in the +\code{chromPeaks(objects)} matrix containing the intensity value of the +peak that should be used for the conflict resolution if +\code{method = "maxint"}.} + +\item{filled}{\code{logical(1)} specifying whether values for filled-in +peaks should be returned or not. If \code{filled = FALSE}, an \code{NA} +is returned in the matrix for the respective peak. See +\code{\link{fillChromPeaks}} for details on peak filling.} +} +\value{ +For \code{featureValues}: a \code{matrix} with + feature values, columns representing samples, rows features. The order + of the features matches the order found in the + \code{featureDefinitions(object)} \code{DataFrame}. The rownames of the + \code{matrix} are the same than those of the \code{featureDefinitions} + \code{DataFrame}. \code{NA} is reported for features without + corresponding chromatographic peak in the respective sample(s). +} +\description{ +\code{featureValues,XCMSnExp} : extract a \code{matrix} for + feature values with rows representing features and columns samples. + Parameter \code{value} allows to define which column from the + \code{\link{chromPeaks}} matrix should be returned. Multiple + chromatographic peaks from the same sample can be assigned to a feature. + Parameter \code{method} allows to specify the method to be used in such + cases to chose from which of the peaks the value should be returned. +} +\note{ +This method is equivalent to the \code{\link{groupval}} for + \code{xcmsSet} objects. +} +\seealso{ +\code{\link{XCMSnExp}} for information on the data object. +\code{\link{featureDefinitions}} to extract the \code{DataFrame} with the +feature definitions. +\code{\link{hasFeatures}} to evaluate whether the +\code{\link{XCMSnExp}} provides feature definitions. +\code{\link{groupval}} for the equivalent method on \code{xcmsSet} objects. +} +\author{ +Johannes Rainer +} diff --git a/man/adjustRtime-obiwarp.Rd b/man/adjustRtime-obiwarp.Rd new file mode 100644 index 000000000..375b69ad2 --- /dev/null +++ b/man/adjustRtime-obiwarp.Rd @@ -0,0 +1,298 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{adjustRtime-obiwarp} +\alias{adjustRtime-obiwarp} +\alias{ObiwarpParam-class} +\alias{ObiwarpParam} +\alias{adjustRtime,OnDiskMSnExp,ObiwarpParam-method} +\alias{show,ObiwarpParam-method} +\alias{binSize,ObiwarpParam-method} +\alias{binSize<-,ObiwarpParam-method} +\alias{centerSample,ObiwarpParam-method} +\alias{centerSample} +\alias{centerSample<-,ObiwarpParam-method} +\alias{centerSample<-} +\alias{response,ObiwarpParam-method} +\alias{response} +\alias{response<-,ObiwarpParam-method} +\alias{response<-} +\alias{distFun,ObiwarpParam-method} +\alias{distFun} +\alias{distFun<-,ObiwarpParam-method} +\alias{distFun<-} +\alias{gapInit,ObiwarpParam-method} +\alias{gapInit} +\alias{gapInit<-,ObiwarpParam-method} +\alias{gapInit<-} +\alias{gapExtend,ObiwarpParam-method} +\alias{gapExtend} +\alias{gapExtend<-,ObiwarpParam-method} +\alias{gapExtend<-} +\alias{factorDiag,ObiwarpParam-method} +\alias{factorDiag} +\alias{factorDiag<-,ObiwarpParam-method} +\alias{factorDiag<-} +\alias{factorGap,ObiwarpParam-method} +\alias{factorGap} +\alias{factorGap<-,ObiwarpParam-method} +\alias{factorGap<-} +\alias{localAlignment,ObiwarpParam-method} +\alias{localAlignment} +\alias{localAlignment<-,ObiwarpParam-method} +\alias{localAlignment<-} +\alias{initPenalty,ObiwarpParam-method} +\alias{initPenalty} +\alias{initPenalty<-,ObiwarpParam-method} +\alias{initPenalty<-} +\alias{adjustRtime,XCMSnExp,ObiwarpParam-method} +\title{Align retention times across samples using Obiwarp} +\usage{ +ObiwarpParam(binSize = 1, centerSample = integer(), response = 1L, + distFun = "cor_opt", gapInit = numeric(), gapExtend = numeric(), + factorDiag = 2, factorGap = 1, localAlignment = FALSE, + initPenalty = 0) + +\S4method{adjustRtime}{OnDiskMSnExp,ObiwarpParam}(object, param) + +\S4method{show}{ObiwarpParam}(object) + +\S4method{binSize}{ObiwarpParam}(object) + +\S4method{binSize}{ObiwarpParam}(object) <- value + +\S4method{centerSample}{ObiwarpParam}(object) + +\S4method{centerSample}{ObiwarpParam}(object) <- value + +\S4method{response}{ObiwarpParam}(object) + +\S4method{response}{ObiwarpParam}(object) <- value + +\S4method{distFun}{ObiwarpParam}(object) + +\S4method{distFun}{ObiwarpParam}(object) <- value + +\S4method{gapInit}{ObiwarpParam}(object) + +\S4method{gapInit}{ObiwarpParam}(object) <- value + +\S4method{gapExtend}{ObiwarpParam}(object) + +\S4method{gapExtend}{ObiwarpParam}(object) <- value + +\S4method{factorDiag}{ObiwarpParam}(object) + +\S4method{factorDiag}{ObiwarpParam}(object) <- value + +\S4method{factorGap}{ObiwarpParam}(object) + +\S4method{factorGap}{ObiwarpParam}(object) <- value + +\S4method{localAlignment}{ObiwarpParam}(object) + +\S4method{localAlignment}{ObiwarpParam}(object) <- value + +\S4method{initPenalty}{ObiwarpParam}(object) + +\S4method{initPenalty}{ObiwarpParam}(object) <- value + +\S4method{adjustRtime}{XCMSnExp,ObiwarpParam}(object, param) +} +\arguments{ +\item{binSize}{\code{numeric(1)} defining the bin size (in mz dimension) +to be used for the \emph{profile matrix} generation. See \code{step} +parameter in \code{\link{profile-matrix}} documentation for more details.} + +\item{centerSample}{\code{integer(1)} defining the index of the center sample +in the experiment. It defaults to +\code{floor(median(1:length(fileNames(object))))}.} + +\item{response}{\code{numeric(1)} defining the \emph{responsiveness} of +warping with \code{response = 0} giving linear warping on start and end +points and \code{response = 100} warping using all bijective anchors.} + +\item{distFun}{character defining the distance function to be used. Allowed +values are \code{"cor"} (Pearson's correlation), \code{"cor_opt"} +(calculate only 10\% diagonal band of distance matrix; better runtime), +\code{"cov"} (covariance), \code{"prd"} (product) and \code{"euc"} +(Euclidian distance). The default value is \code{distFun = "cor_opt"}.} + +\item{gapInit}{\code{numeric(1)} defining the penalty for gap opening. The +default value for \code{gapInit} depends on the value of \code{distFun}: +for \code{distFun = "cor"} and \code{distFun = "cor_opt"} it is +\code{0.3}, for \code{distFun = "cov"} and \code{distFun = "prd"} +\code{0.0} and for \code{distFun = "euc"} \code{0.9}.} + +\item{gapExtend}{\code{numeric(1)} defining the penalty for gap enlargement. +The default value for \code{gapExtend} depends on the value of +\code{distFun}, for \code{distFun = "cor"} and +\code{distFun = "cor_opt"} it is \code{2.4}, for \code{distFun = "cov"} +\code{11.7}, for \code{distFun = "euc"} \code{1.8} and for +\code{distFun = "prd"} {7.8}.} + +\item{factorDiag}{\code{numeric(1)} defining the local weight applied to +diagonal moves in the alignment.} + +\item{factorGap}{\code{numeric(1)} defining the local weight for gap moves +in the alignment.} + +\item{localAlignment}{\code{logical(1)} whether a local alignment should be +performed instead of the default global alignment.} + +\item{initPenalty}{\code{numeric(1)} defining the penalty for initiating an +alignment (for local alignment only).} + +\item{object}{For \code{adjustRtime}: an \code{\link{XCMSnExp}} object. + + For all other methods: a \code{ObiwarpParam} object.} + +\item{param}{A \code{ObiwarpParam} object containing all settings for +the alignment method.} + +\item{value}{The value for the slot.} +} +\value{ +The \code{ObiwarpParam} function returns a +\code{ObiwarpParam} class instance with all of the settings +specified for obiwarp retention time adjustment and alignment. + +For \code{adjustRtime,XCMSnExp,ObiwarpParam}: a + \code{\link{XCMSnExp}} object with the results of the retention time + adjustment step. These can be accessed with the + \code{\link{adjustedRtime}} method. Retention time correction does also + adjust the retention time of the identified chromatographic peaks + (accessed \emph{via} \code{\link{chromPeaks}}. Note that retention time + correction drops all previous peak grouping results from the result + object. + + For \code{adjustRtime,OnDiskMSnExp,ObiwarpParam}: a \code{numeric} with + the adjusted retention times per spectra (in the same order than + \code{rtime}). +} +\description{ +This method performs retention time adjustment using the + Obiwarp method [Prince 2006]. It is based on the code at + \url{http://obi-warp.sourceforge.net} but supports alignment of multiple + samples by aligning each against a \emph{center} sample. The alignment is + performed directly on the \code{\link{profile-matrix}} and can hence be + performed independently of the peak detection or peak grouping. + +The \code{ObiwarpParam} class allows to specify all + settings for the retention time adjustment based on the \emph{obiwarp} + method. Class Instances should be created using the + \code{ObiwarpParam} constructor. + +\code{binSize},\code{binSize<-}: getter and setter + for the \code{binSize} slot of the object. + +\code{centerSample},\code{centerSample<-}: getter and setter + for the \code{centerSample} slot of the object. + +\code{response},\code{response<-}: getter and setter + for the \code{response} slot of the object. + +\code{distFun},\code{distFun<-}: getter and setter + for the \code{distFun} slot of the object. + +\code{gapInit},\code{gapInit<-}: getter and setter + for the \code{gapInit} slot of the object. + +\code{gapExtend},\code{gapExtend<-}: getter and setter + for the \code{gapExtend} slot of the object. + +\code{factorDiag},\code{factorDiag<-}: getter and setter + for the \code{factorDiag} slot of the object. + +\code{factorGap},\code{factorGap<-}: getter and setter + for the \code{factorGap} slot of the object. + +\code{localAlignment},\code{localAlignment<-}: getter and setter + for the \code{localAlignment} slot of the object. + +\code{initPenalty},\code{initPenalty<-}: getter and setter + for the \code{initPenalty} slot of the object. + +\code{adjustRtime,XCMSnExp,ObiwarpParam}: + performs retention time correction/alignment based on the total mz-rt + data using the \emph{obiwarp} method. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{retcor}} methods. All of the settings to the alignment + algorithm can be passed with a \code{ObiwarpParam} object. + +Calling \code{adjustRtime} on an \code{XCMSnExp} object will cause + all peak grouping (correspondence) results and any previous retention + time adjustment results to be dropped. +} +\examples{ +library(faahKO) +library(MSnbase) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform retention time correction on the OnDiskMSnExp: +res <- adjustRtime(raw_data, param = ObiwarpParam()) + +## As a result we get a numeric vector with the adjusted retention times for +## all spectra. +head(res) + +## We can split this by file to get the adjusted retention times for each +## file +resL <- split(res, fromFile(raw_data)) + +############################## +## Perform retention time correction on an XCMSnExp: +## +## Perform first the chromatographic peak detection using the matchedFilter +## method. +mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +res <- findChromPeaks(raw_data, param = mfp) + +## Performing the retention time adjustment using obiwarp. +res_2 <- adjustRtime(res, param = ObiwarpParam()) + +head(rtime(res_2)) +head(rtime(raw_data)) + +## Also the retention times of the detected peaks were adjusted. +tail(chromPeaks(res)) +tail(chromPeaks(res_2)) +} +\references{ +John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of +ESI-LC-MS Proteomics Data Sets by Ordered Bijective Interpolated Warping" +\emph{Anal. Chem.} 2006, 78(17):6140-6152. + +John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of +ESI-LC-MS Proteomic Data Sets by Ordered Bijective Interpolated Warping" +\emph{Anal. Chem.} 2006, 78 (17), 6140-6152. +} +\seealso{ +\code{\link{retcor.obiwarp}} for the old user interface. + \code{\link{plotAdjustedRtime}} for visualization of alignment results. + +\code{\link{XCMSnExp}} for the object containing the results of + the alignment. + +Other retention time correction methods: \code{\link{adjustRtime-peakGroups}}, + \code{\link{adjustRtime}} +} +\author{ +Colin Smith, Johannes Rainer +} diff --git a/man/adjustRtime-peakGroups.Rd b/man/adjustRtime-peakGroups.Rd new file mode 100644 index 000000000..27492dd68 --- /dev/null +++ b/man/adjustRtime-peakGroups.Rd @@ -0,0 +1,292 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/functions-XCMSnExp.R, R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{adjustRtime-peakGroups} +\alias{adjustRtime-peakGroups} +\alias{PeakGroupsParam-class} +\alias{PeakGroupsParam} +\alias{adjustRtimePeakGroups} +\alias{show,PeakGroupsParam-method} +\alias{minFraction,PeakGroupsParam-method} +\alias{minFraction<-,PeakGroupsParam-method} +\alias{extraPeaks,PeakGroupsParam-method} +\alias{extraPeaks} +\alias{extraPeaks<-,PeakGroupsParam-method} +\alias{extraPeaks<-} +\alias{smooth,PeakGroupsParam-method} +\alias{smooth} +\alias{smooth<-,PeakGroupsParam-method} +\alias{smooth<-} +\alias{span,PeakGroupsParam-method} +\alias{span} +\alias{span<-,PeakGroupsParam-method} +\alias{span<-} +\alias{family,PeakGroupsParam-method} +\alias{family} +\alias{family<-,PeakGroupsParam-method} +\alias{family<-} +\alias{peakGroupsMatrix,PeakGroupsParam-method} +\alias{peakGroupsMatrix} +\alias{peakGroupsMatrix<-,PeakGroupsParam-method} +\alias{peakGroupsMatrix<-} +\alias{adjustRtime,XCMSnExp,PeakGroupsParam-method} +\title{Retention time correction based on alignment of house keeping peak +groups} +\usage{ +PeakGroupsParam(minFraction = 0.9, extraPeaks = 1, smooth = "loess", + span = 0.2, family = "gaussian", peakGroupsMatrix = matrix(nrow = 0, + ncol = 0)) + +adjustRtimePeakGroups(object, param = PeakGroupsParam()) + +\S4method{show}{PeakGroupsParam}(object) + +\S4method{minFraction}{PeakGroupsParam}(object) + +\S4method{minFraction}{PeakGroupsParam}(object) <- value + +\S4method{extraPeaks}{PeakGroupsParam}(object) + +\S4method{extraPeaks}{PeakGroupsParam}(object) <- value + +\S4method{smooth}{PeakGroupsParam}(x) + +\S4method{smooth}{PeakGroupsParam}(object) <- value + +\S4method{span}{PeakGroupsParam}(object) + +\S4method{span}{PeakGroupsParam}(object) <- value + +\S4method{family}{PeakGroupsParam}(object) + +\S4method{family}{PeakGroupsParam}(object) <- value + +\S4method{peakGroupsMatrix}{PeakGroupsParam}(object) + +\S4method{peakGroupsMatrix}{PeakGroupsParam}(object) <- value + +\S4method{adjustRtime}{XCMSnExp,PeakGroupsParam}(object, param) +} +\arguments{ +\item{minFraction}{\code{numeric(1)} between 0 and 1 defining the minimum +required fraction of samples in which peaks for the peak group were +identified. Peak groups passing this criteria will aligned across +samples and retention times of individual spectra will be adjusted +based on this alignment. For \code{minFraction = 1} the peak group +has to contain peaks in all samples of the experiment.} + +\item{extraPeaks}{\code{numeric(1)} defining the maximal number of +additional peaks for all samples to be assigned to a peak group (i.e. +feature) for retention time correction. For a data set with 6 samples, +\code{extraPeaks = 1} uses all peak groups with a total peak count +\code{<= 6 + 1}. The total peak count is the total number of peaks being +assigned to a peak group and considers also multiple peaks within a +sample being assigned to the group.} + +\item{smooth}{character defining the function to be used, to interpolate +corrected retention times for all peak groups. Either \code{"loess"} or +\code{"linear"}.} + +\item{span}{\code{numeric(1)} defining the degree of smoothing (if +\code{smooth = "loess"}). This parameter is passed to the internal call +to \code{\link{loess}}.} + +\item{family}{character defining the method to be used for loess smoothing. +Allowed values are \code{"gaussian"} and \code{"symmetric"}.See +\code{\link{loess}} for more information.} + +\item{peakGroupsMatrix}{optional \code{matrix} of (raw) retention times for +the peak groups on which the alignment should be performed. Each column +represents a sample, each row a feature/peak group. Such a matrix is +for example returned by the \code{\link{adjustRtimePeakGroups}} method.} + +\item{object}{For \code{adjustRtime}: an \code{\link{XCMSnExp}} object + containing the results from a previous chromatographic peak detection + (see \code{\link{findChromPeaks}}) and alignment analysis (see + \code{\link{groupChromPeaks}}). + + For all other methods: a \code{PeakGroupsParam} object.} + +\item{param}{A \code{PeakGroupsParam} object containing all settings for +the retention time correction method..} + +\item{value}{The value for the slot.} + +\item{x}{a \code{PeakGroupsParam} object.} +} +\value{ +The \code{PeakGroupsParam} function returns a +\code{PeakGroupsParam} class instance with all of the settings +specified for retention time adjustment based on \emph{house keeping} +features/peak groups. + +For \code{adjustRtimePeakGroups}: a \code{matrix}, rows being + features, columns samples, of retention times. The features are ordered + by the median retention time across columns. + +For \code{adjustRtime}: a \code{\link{XCMSnExp}} object with the + results of the retention time adjustment step. These can be accessed + with the \code{\link{adjustedRtime}} method. Retention time correction + does also adjust the retention time of the identified chromatographic + peaks (accessed \emph{via} \code{\link{chromPeaks}}. Note that retention + time correction drops all previous alignment results from the result + object. +} +\description{ +This method performs retention time adjustment based on the + alignment of chromatographic peak groups present in all/most samples + (hence corresponding to house keeping compounds). First the retention + time deviation of these peak groups is described by fitting either a + polynomial (\code{smooth = "loess"}) or a linear ( + \code{smooth = "linear"}) model to the data points. These models are + subsequently used to adjust the retention time of each spectrum in + each sample. + +The \code{PeakGroupsParam} class allows to specify all + settings for the retention time adjustment based on \emph{house keeping} + peak groups present in most samples. + Instances should be created with the \code{PeakGroupsParam} constructor. + +\code{adjustRtimePeakGroups} returns the features (peak groups) + which would, depending on the provided \code{\link{PeakGroupsParam}}, be + selected for alignment/retention time correction. + +\code{minFraction},\code{minFraction<-}: getter and setter + for the \code{minFraction} slot of the object. + +\code{extraPeaks},\code{extraPeaks<-}: getter and setter + for the \code{extraPeaks} slot of the object. + +\code{smooth},\code{smooth<-}: getter and setter + for the \code{smooth} slot of the object. + +\code{span},\code{span<-}: getter and setter + for the \code{span} slot of the object. + +\code{family},\code{family<-}: getter and setter + for the \code{family} slot of the object. + +\code{peakGroupsMatrix},\code{peakGroupsMatrix<-}: getter and + setter for the \code{peakGroupsMatrix} slot of the object. + +\code{adjustRtime,XCMSnExp,PeakGroupsParam}: + performs retention time correction based on the alignment of peak groups + (features) found in all/most samples. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{group}} methods. All of the settings to the alignment + algorithm can be passed with a \code{PeakGroupsParam} object. + + The matrix with the (raw) retention times of the peak groups used + in the alignment is added to the \code{peakGroupsMatrix} slot of the + \code{PeakGroupsParam} object that is stored into the corresponding + \emph{process history step} (see \code{\link{processHistory}} for how + to access the process history). + +\code{adjustRtimePeakGroups} is supposed to be called \emph{before} the + sample alignment, but after a correspondence (peak grouping). + +This method requires that a correspondence has been performed on the + data (see \code{\link{groupChromPeaks}}). Calling \code{adjustRtime} on + an \code{XCMSnExp} object will cause all peak grouping (correspondence) + results and any previous retention time adjustments to be dropped. + In some instances, the \code{adjustRtime,XCMSnExp,PeakGroupsParam} + re-adjusts adjusted retention times to ensure them being in the same + order than the raw (original) retention times. +} +\examples{ +############################## +## Chromatographic peak detection and grouping. +## +## Below we perform first a peak detection (using the matchedFilter +## method) on some of the test files from the faahKO package followed by +## a peak grouping. +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the peak detection using the matchedFilter method. +mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +res <- findChromPeaks(raw_data, param = mfp) + +head(chromPeaks(res)) +## The number of peaks identified per sample: +table(chromPeaks(res)[, "sample"]) + +## Performing the peak grouping using the "peak density" method. +p <- PeakDensityParam(sampleGroups = c(1, 1)) +res <- groupChromPeaks(res, param = p) + +## Perform the retention time adjustment using peak groups found in both +## files. +fgp <- PeakGroupsParam(minFraction = 1) + +## Before running the alignment we can evaluate which features (peak groups) +## would be used based on the specified parameters. +pkGrps <- adjustRtimePeakGroups(res, param = fgp) + +## We can also plot these to evaluate if the peak groups span a large portion +## of the retention time range. +plot(x = pkGrps[, 1], y = rep(1, nrow(pkGrps)), xlim = range(rtime(res)), + ylim = c(1, 2), xlab = "rt", ylab = "", yaxt = "n") +points(x = pkGrps[, 2], y = rep(2, nrow(pkGrps))) +segments(x0 = pkGrps[, 1], x1 = pkGrps[, 2], + y0 = rep(1, nrow(pkGrps)), y1 = rep(2, nrow(pkGrps))) +grid() +axis(side = 2, at = c(1, 2), labels = colnames(pkGrps)) + +## Next we perform the alignment. +res <- adjustRtime(res, param = fgp) + +## Any grouping information was dropped +hasFeatures(res) + +## Plot the raw against the adjusted retention times. +plot(rtime(raw_data), rtime(res), pch = 16, cex = 0.25, col = fromFile(res)) + +## Adjusterd retention times can be accessed using +## rtime(object, adjusted = TRUE) and adjustedRtime +all.equal(rtime(res), adjustedRtime(res)) + +## To get the raw, unadjusted retention times: +all.equal(rtime(res, adjusted = FALSE), rtime(raw_data)) + +## To extract the retention times grouped by sample/file: +rts <- rtime(res, bySample = TRUE) +} +\references{ +Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +\emph{Anal. Chem.} 2006, 78:779-787. +} +\seealso{ +The \code{\link{do_adjustRtime_peakGroups}} core + API function and \code{\link{retcor.peakgroups}} for the old user + interface. + \code{\link{plotAdjustedRtime}} for visualization of alignment results. + +\code{\link{XCMSnExp}} for the object containing the results of + the alignment. + +Other retention time correction methods: \code{\link{adjustRtime-obiwarp}}, + \code{\link{adjustRtime}} +} +\author{ +Colin Smith, Johannes Rainer +} diff --git a/man/adjustRtime.Rd b/man/adjustRtime.Rd new file mode 100644 index 000000000..c8f9c7629 --- /dev/null +++ b/man/adjustRtime.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R +\name{adjustRtime} +\alias{adjustRtime} +\title{Alignment: Retention time correction methods.} +\description{ +The \code{adjustRtime} method(s) perform retention time + correction (alignment) between chromatograms of different samples. These + methods are part of the modernized \code{xcms} user interface. + + The implemented retention time adjustment methods are: + \describe{ + \item{peakGroups}{retention time correction based on aligment of + features (peak groups) present in most/all samples. + See \code{\link{adjustRtime-peakGroups}} for more details.} + + \item{obiwarp}{alignment based on the complete mz-rt data. This method + does not require any identified peaks or defined features. See + \code{\link{adjustRtime-obiwarp}} for more details.} + } +} +\seealso{ +\code{\link{retcor}} for the \emph{old} retention time correction + methods. + \code{\link{plotAdjustedRtime}} for visualization of alignment results. + +Other retention time correction methods: \code{\link{adjustRtime-obiwarp}}, + \code{\link{adjustRtime-peakGroups}} +} +\author{ +Johannes Rainer +} diff --git a/man/binYonX.Rd b/man/binYonX.Rd index 320297a74..6499687e0 100644 --- a/man/binYonX.Rd +++ b/man/binYonX.Rd @@ -182,10 +182,9 @@ binYonX(X, nBins = 5L, fromIdx = fIdx, toIdx = tIdx) binYonX(X, nBins = 5L, fromIdx = fIdx, toIdx = tIdx, binFromX = 4, binToX = 28) ## The same bins are thus used for each sub-set. } -\author{ -Johannes Rainer -} \seealso{ \code{\link{imputeLinInterpol}} } - +\author{ +Johannes Rainer +} diff --git a/man/breaks_on_binSize.Rd b/man/breaks_on_binSize.Rd index e820fac33..9a7c4588a 100644 --- a/man/breaks_on_binSize.Rd +++ b/man/breaks_on_binSize.Rd @@ -45,12 +45,11 @@ diff(breaks_on_binSize(1, 10, 0.51)) seq(1, 10, by = 0.51) ## Thus it defines one bin (break) less. } -\author{ -Johannes Rainer -} \seealso{ \code{\link{binYonX}} for a binning function. Other functions to define bins: \code{\link{breaks_on_nBins}} } - +\author{ +Johannes Rainer +} diff --git a/man/breaks_on_nBins.Rd b/man/breaks_on_nBins.Rd index 9fbdc0eca..a7e33053d 100644 --- a/man/breaks_on_nBins.Rd +++ b/man/breaks_on_nBins.Rd @@ -37,12 +37,11 @@ breaks_on_nBins(3, 20, nBins = 20) ## The same call but using shiftByHalfBinSize breaks_on_nBins(3, 20, nBins = 20, shiftByHalfBinSize = TRUE) } -\author{ -Johannes Rainer -} \seealso{ \code{\link{binYonX}} for a binning function. Other functions to define bins: \code{\link{breaks_on_binSize}} } - +\author{ +Johannes Rainer +} diff --git a/man/chromatographic-peak-detection.Rd b/man/chromatographic-peak-detection.Rd new file mode 100644 index 000000000..acb43ac8c --- /dev/null +++ b/man/chromatographic-peak-detection.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R +\name{chromatographic-peak-detection} +\alias{chromatographic-peak-detection} +\alias{findChromPeaks} +\title{Chromatographic peak detection methods.} +\description{ +The \code{findChromPeaks} methods perform the chromatographic + peak detection on LC/GC-MS data and are part of the modernized + \code{xcms} user interface. + + The implemented peak detection methods in chromatographic space are: + \describe{ + \item{centWave}{chromatographic peak detection using the \emph{centWave} + method. See \code{\link{centWave}} for more details.} + + \item{centWave with predicted isotopes}{peak detection using a two-step + centWave-based approach considering also feature isotopes. See + \code{\link{centWaveWithPredIsoROIs}} for more details.} + + \item{matchedFilter}{peak detection in chromatographic space. See + \code{\link{matchedFilter}} for more details.} + + \item{massifquant}{peak detection using the Kalman filter-based + method. See \code{\link{massifquant}} for more details.} + + \item{MSW}{single-spectrum non-chromatography MS data peak detection. + See \code{\link{MSW}} for more details.} + + } +} +\seealso{ +\code{\link{findPeaks}} for the \emph{old} peak detection + methods. + +Other peak detection methods: \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, + \code{\link{findChromPeaks-centWave}}, + \code{\link{findChromPeaks-massifquant}}, + \code{\link{findChromPeaks-matchedFilter}}, + \code{\link{findPeaks-MSW}} +} +\author{ +Johannes Rainer +} diff --git a/man/detectFeatures.Rd b/man/detectFeatures.Rd deleted file mode 100644 index eb37f0b9e..000000000 --- a/man/detectFeatures.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R -\name{detectFeatures} -\alias{detectFeatures} -\title{Feature detection methods.} -\description{ -The \code{detectFeature} methods are part of the modernized -\code{xcms} user interface. - -The implemented feature detection methods are: -\describe{ -\item{centWave}{feature detection using the \emph{centWave} method. -See \code{\link{centWave}} for more details.} - -\item{centWave with predicted isotopes}{feature detection using a two-step -centWave-based approach considering also feature isotopes. See -\code{\link{centWaveWithPredIsoROIs}} for more details.} - -\item{matchedFilter}{peak detection in chromatographic space. See -\code{\link{matchedFilter}} for more details.} - -\item{massifquant}{peak detection using the Kalman filter-based feature -method. See \code{\link{massifquant}} for more details.} - -\item{MSW}{single-spectrum non-chromatography MS data feature detection. -See \code{\link{MSW}} for more details.} - -} -} -\author{ -Johannes Rainer -} -\seealso{ -\code{\link{findPeaks}} for the \emph{old} feature detection -methods. - -Other feature detection methods: \code{\link{featureDetection-MSW}}, - \code{\link{featureDetection-centWaveWithPredIsoROIs}}, - \code{\link{featureDetection-centWave}}, - \code{\link{featureDetection-massifquant}}, - \code{\link{featureDetection-matchedFilter}} -} - diff --git a/man/do_adjustRtime_peakGroups.Rd b/man/do_adjustRtime_peakGroups.Rd new file mode 100644 index 000000000..faf545d63 --- /dev/null +++ b/man/do_adjustRtime_peakGroups.Rd @@ -0,0 +1,92 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_adjustRtime-functions.R +\name{do_adjustRtime_peakGroups} +\alias{do_adjustRtime_peakGroups} +\title{Align spectrum retention times across samples using peak groups +found in most samples} +\usage{ +do_adjustRtime_peakGroups(peaks, peakIndex, rtime, minFraction = 0.9, + extraPeaks = 1, smooth = c("loess", "linear"), span = 0.2, + family = c("gaussian", "symmetric"), peakGroupsMatrix = matrix(ncol = 0, + nrow = 0)) +} +\arguments{ +\item{peaks}{a \code{matrix} or \code{data.frame} with the identified +chromatographic peaks in the samples.} + +\item{peakIndex}{a \code{list} of indices that provides the grouping +information of the chromatographic peaks (across and within samples).} + +\item{rtime}{a \code{list} of \code{numeric} vectors with the retention +times per file/sample.} + +\item{minFraction}{\code{numeric(1)} between 0 and 1 defining the minimum +required fraction of samples in which peaks for the peak group were +identified. Peak groups passing this criteria will aligned across +samples and retention times of individual spectra will be adjusted +based on this alignment. For \code{minFraction = 1} the peak group +has to contain peaks in all samples of the experiment.} + +\item{extraPeaks}{\code{numeric(1)} defining the maximal number of +additional peaks for all samples to be assigned to a peak group (i.e. +feature) for retention time correction. For a data set with 6 samples, +\code{extraPeaks = 1} uses all peak groups with a total peak count +\code{<= 6 + 1}. The total peak count is the total number of peaks being +assigned to a peak group and considers also multiple peaks within a +sample being assigned to the group.} + +\item{smooth}{character defining the function to be used, to interpolate +corrected retention times for all peak groups. Either \code{"loess"} or +\code{"linear"}.} + +\item{span}{\code{numeric(1)} defining the degree of smoothing (if +\code{smooth = "loess"}). This parameter is passed to the internal call +to \code{\link{loess}}.} + +\item{family}{character defining the method to be used for loess smoothing. +Allowed values are \code{"gaussian"} and \code{"symmetric"}.See +\code{\link{loess}} for more information.} + +\item{peakGroupsMatrix}{optional \code{matrix} of (raw) retention times for +peak groups on which the alignment should be performed. Each column +represents a sample, each row a feature/peak group. If not provided, +this matrix will be determined depending on parameters +\code{minFraction} and \code{extraPeaks}. If provided, +\code{minFraction} and \code{extraPeaks} will be ignored.} +} +\value{ +A \code{list} with \code{numeric} vectors with the adjusted + retention times grouped by sample. +} +\description{ +The function performs retention time correction by assessing + the retention time deviation across all samples using peak groups + (features) containg chromatographic peaks present in most/all samples. + The retention time deviation for these features in each sample is + described by fitting either a polynomial (\code{smooth = "loess"}) or + a linear (\code{smooth = "linear"}) model to the data points. The + models are subsequently used to adjust the retention time for each + spectrum in each sample. +} +\details{ +The alignment bases on the presence of compounds that can be found + in all/most samples of an experiment. The retention times of individual + spectra are then adjusted based on the alignment of the features + corresponding to these \emph{house keeping compounds}. The paraneters + \code{minFraction} and \code{extraPeaks} can be used to fine tune which + features should be used for the alignment (i.e. which features + most likely correspond to the above mentioned house keeping compounds). +} +\note{ +The method ensures that returned adjusted retention times are + increasingly ordered, just as the raw retention times. +} +\references{ +Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +\emph{Anal. Chem.} 2006, 78:779-787. +} +\author{ +Colin Smith, Johannes Rainer +} diff --git a/man/do_detectFeatures_centWaveWithPredIsoROIs.Rd b/man/do_detectFeatures_centWaveWithPredIsoROIs.Rd deleted file mode 100644 index 3ce77c2ab..000000000 --- a/man/do_detectFeatures_centWaveWithPredIsoROIs.Rd +++ /dev/null @@ -1,180 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/do_detectFeatures-functions.R -\name{do_detectFeatures_centWaveWithPredIsoROIs} -\alias{do_detectFeatures_addPredIsoROIs} -\alias{do_detectFeatures_centWaveWithPredIsoROIs} -\title{Core API function for two-step centWave feature detection with feature isotopes} -\usage{ -do_detectFeatures_centWaveWithPredIsoROIs(mz, int, scantime, valsPerSpect, - ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), - mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), - firstBaselineCheck = TRUE, roiScales = NULL, snthreshIsoROIs = 6.25, - maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, - polarity = "unknown") - -do_detectFeatures_addPredIsoROIs(mz, int, scantime, valsPerSpect, ppm = 25, - peakwidth = c(20, 50), snthresh = 6.25, prefilter = c(3, 100), - mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, features. = NULL, - maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, - polarity = "unknown") -} -\arguments{ -\item{mz}{Numeric vector with the individual m/z values from all scans/ -spectra of one file/sample.} - -\item{int}{Numeric vector with the individual intensity values from all -scans/spectra of one file/sample.} - -\item{scantime}{Numeric vector of length equal to the number of -spectra/scans of the data representing the retention time of each scan.} - -\item{valsPerSpect}{Numeric vector with the number of values for each -spectrum.} - -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} - -\item{peakwidth}{numeric(2) with the expected approximate -feature/peak width in chromatographic space. Given as a range (min, max) -in seconds.} - -\item{snthresh}{For \code{do_detectFeatures_addPredIsoROIs}: -numeric(1) defining the signal to noise threshold for the centWave algorithm. -For \code{do_detectFeatures_centWaveWithPredIsoROIs}: numeric(1) defining the -signal to noise threshold for the initial (first) centWave run.} - -\item{prefilter}{numeric(2): \code{c(k, I)} specifying the prefilter -step for the first analysis step (ROI detection). Mass traces are only -retained if they contain at least \code{k} peaks with intensity \code{>= I}.} - -\item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} - -\item{integrate}{Integration method. For \code{integrate = 1} peak limits -are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} - -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} - -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} - -\item{noise}{numeric(1) allowing to set a minimum intensity required -for centroids to be considered in the first analysis step (centroids with -intensity \code{< noise} are omitted from ROI detection).} - -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} - -\item{roiList}{An optional list of regions-of-interest (ROI) representing -detected mass traces. If ROIs are submitted the first analysis step is -omitted and feature detection is performed on the submitted ROIs. Each -ROI is expected to have the following elements specified: -\code{scmin} (start scan index), \code{scmax} (end scan index), -\code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} -(number of scans), \code{intensity} (summed intensity). Each ROI should be -represented by a \code{list} of elements or a single row \code{data.frame}.} - -\item{firstBaselineCheck}{logical(1). If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} - -\item{roiScales}{Optional numeric vector with length equal to \code{roiList} -defining the scale for each region of interest in \code{roiList} that should -be used for the centWave-wavelets.} - -\item{snthreshIsoROIs}{numeric(1) defining the signal to noise ratio cutoff -to be used in the second centWave run to identify features for predicted -isotope ROIs.} - -\item{maxCharge}{integer(1) defining the maximal isotope charge. Isotopes -will be defined for charges \code{1:maxCharge}.} - -\item{maxIso}{integer(1) defining the number of isotope peaks that should be -predicted for each feature identified in the first centWave run.} - -\item{mzIntervalExtension}{logical(1) whether the mz range for the predicted -isotope ROIs should be extended to increase detection of low intensity peaks.} - -\item{polarity}{character(1) specifying the polarity of the data. Currently -not used, but has to be \code{"positive"}, \code{"negative"} or -\code{"unknown"} if provided.} - -\item{features.}{A matrix or \code{xcmsPeaks} object such as one returned by -a call to \code{link{do_detectFeatures_centWave}} or -\code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) -with the features for which isotopes should be predicted and used for an -additional feature detectoin using the centWave method. Required columns are: -\code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, -\code{"scale"} and \code{"into"}.} -} -\value{ -A matrix, each row representing an identified feature. All non-overlapping -features identified in both centWave runs are reported. -The matrix columns are: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} -\item{rt}{Retention time of the feature's midpoint.} -\item{rtmin}{Minimum retention time of the feature.} -\item{rtmax}{Maximum retention time of the feature.} -\item{into}{Integrated (original) intensity of the feature.} -\item{intb}{Per-feature baseline corrected integrated feature intensity.} -\item{maxo}{Maximum intensity of the feature.} -\item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, -\code{sd} being the standard deviation of local chromatographic noise.} -\item{egauss}{RMSE of Gaussian fit.} -} -Additional columns for \code{verboseColumns = TRUE}: -\describe{ -\item{mu}{Gaussian parameter mu.} -\item{sigma}{Gaussian parameter sigma.} -\item{h}{Gaussian parameter h.} -\item{f}{Region number of the m/z ROI where the peak was localized.} -\item{dppm}{m/z deviation of mass trace across scanns in ppk.} -\item{scale}{Scale on which the feature was localized.} -\item{scpos}{Peak position found by wavelet analysis (scan number).} -\item{scmin}{Left peak limit found by wavelet analysis (scan number).} -\item{scmax}{Right peak limit found by wavelet analysis (scan numer).} -} -} -\description{ -The \code{do_detectFeatures_centWaveWithPredIsoROIs} performs a -two-step centWave based feature detection: features are identified using -centWave followed by a prediction of the location of the identified features' -isotopes in the mz-retention time space. These locations are fed as -\emph{regions of interest} (ROIs) to a subsequent centWave run. All non -overlapping features from these two feature detection runs are reported as -the final list of identified features. - -The \code{do_detectFeatures_centWaveAddPredIsoROIs} performs -centWave based feature detection based in regions of interest (ROIs) -representing predicted isotopes for the features submitted with argument -\code{features.}. The function returns a matrix with the identified features -consisting of all input features and features representing predicted isotopes -of these (if found by the centWave algorithm). -} -\details{ -For more details on the centWave algorithm see -\code{\link{centWave}}. -} -\author{ -Hendrik Treutler, Johannes Rainer -} -\seealso{ -Other core feature detection functions: \code{\link{do_detectFeatures_MSW}}, - \code{\link{do_detectFeatures_centWave}}, - \code{\link{do_detectFeatures_massifquant}}, - \code{\link{do_detectFeatures_matchedFilter}} -} - diff --git a/man/do_detectFeatures_massifquant.Rd b/man/do_detectFeatures_massifquant.Rd deleted file mode 100644 index 49e02da7e..000000000 --- a/man/do_detectFeatures_massifquant.Rd +++ /dev/null @@ -1,185 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/do_detectFeatures-functions.R -\name{do_detectFeatures_massifquant} -\alias{do_detectFeatures_massifquant} -\title{Core API function for massifquant feature detection} -\usage{ -do_detectFeatures_massifquant(mz, int, scantime, valsPerSpect, ppm = 10, - peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), - mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, - fitgauss = FALSE, noise = 0, verboseColumns = FALSE, - criticalValue = 1.125, consecMissedLimit = 2, unions = 1, - checkBack = 0, withWave = FALSE) -} -\arguments{ -\item{mz}{Numeric vector with the individual m/z values from all scans/ -spectra of one file/sample.} - -\item{int}{Numeric vector with the individual intensity values from all -scans/spectra of one file/sample.} - -\item{scantime}{Numeric vector of length equal to the number of -spectra/scans of the data representing the retention time of each scan.} - -\item{valsPerSpect}{Numeric vector with the number of values for each -spectrum.} - -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} - -\item{peakwidth}{numeric(2) with the expected approximate -feature/peak width in chromatographic space. Given as a range (min, max) -in seconds.} - -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} - -\item{prefilter}{numeric(2): \code{c(k, I)} specifying the prefilter -step for the first analysis step (ROI detection). Mass traces are only -retained if they contain at least \code{k} peaks with intensity \code{>= I}.} - -\item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} - -\item{integrate}{Integration method. For \code{integrate = 1} peak limits -are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} - -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} - -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} - -\item{noise}{numeric(1) allowing to set a minimum intensity required -for centroids to be considered in the first analysis step (centroids with -intensity \code{< noise} are omitted from ROI detection).} - -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} - -\item{criticalValue}{numeric(1). Suggested values: -(\code{0.1-3.0}). This setting helps determine the the Kalman Filter -prediciton margin of error. A real centroid belonging to a bonafide -feature must fall within the KF prediction margin of error. Much like -in the construction of a confidence interval, \code{criticalVal} loosely -translates to be a multiplier of the standard error of the prediction -reported by the Kalman Filter. If the features in the XC-MS sample have -a small mass deviance in ppm error, a smaller critical value might be -better and vice versa.} - -\item{consecMissedLimit}{Integer: Suggested values: (\code{1,2,3}). While -a feature is in the proces of being detected by a Kalman Filter, the -Kalman Filter may not find a predicted centroid in every scan. After 1 -or more consecutive failed predictions, this setting informs Massifquant -when to stop a Kalman Filter from following a candidate feature.} - -\item{unions}{Integer: set to \code{1} if apply t-test union on -segmentation; set to \code{0} if no t-test to be applied on -chromatographically continous features sharing same m/z range. -Explanation: With very few data points, sometimes a Kalman Filter stops -tracking a feature prematurely. Another Kalman Filter is instantiated -and begins following the rest of the signal. Because tracking is done -backwards to forwards, this algorithmic defect leaves a real feature -divided into two segments or more. With this option turned on, the -program identifies segmented features and combines them (merges them) -into one with a two sample t-test. The potential danger of this option -is that some truly distinct features may be merged.} - -\item{checkBack}{Integer: set to \code{1} if turned on; set to \code{0} -if turned off. The convergence of a Kalman Filter to a feature's precise -m/z mapping is very fast, but sometimes it incorporates erroneous centroids -as part of a feature (especially early on). The \code{scanBack} option is an -attempt to remove the occasional outlier that lies beyond the converged -bounds of the Kalman Filter. The option does not directly affect -identification of a feature because it is a postprocessing measure; it -has not shown to be a extremely useful thus far and the default is set -to being turned off.} - -\item{withWave}{Logical: if \code{TRUE}, the features identified first -with Massifquant are subsequently filtered with the second step of the -centWave algorithm, which includes wavelet estimation.} -} -\value{ -A matrix, each row representing an identified feature, with columns: -\describe{ -\item{mz}{Intensity weighted mean of m/z values of the features across -scans.} -\item{mzmin}{Minumum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} -\item{rtmin}{Minimum retention time of the feature.} -\item{rtmax}{Maximum retention time of the feature.} -\item{rt}{Retention time of the feature's midpoint.} -\item{into}{Integrated (original) intensity of the feature.} -\item{maxo}{Maximum intensity of the feature.} -} -If \code{withWave} is set to \code{TRUE}, the result is the same as -returned by the \code{\link{do_detectFeatures_centWave}} method. -} -\description{ -Massifquant is a Kalman filter (KF)-based feature -detection for XC-MS data in centroid mode. The identified features -can be further refined with the \emph{centWave} method (see -\code{\link{do_detectFeatures_centWave}} for details on centWave) -by specifying \code{withWave = TRUE}. -} -\details{ -This algorithm's performance has been tested rigorously -on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -Simultaneous kalman filters identify features and calculate their -area under the curve. The default parameters are set to operate on -a complex LC-MS Orbitrap sample. Users will find it useful to do some -simple exploratory data analysis to find out where to set a minimum -intensity, and identify how many scans an average feature spans. The -\code{consecMissedLimit} parameter has yielded good performance on -Orbitrap data when set to (\code{2}) and on TOF data it was found best -to be at (\code{1}). This may change as the algorithm has yet to be -tested on many samples. The \code{criticalValue} parameter is perhaps -most dificult to dial in appropriately and visual inspection of peak -identification is the best suggested tool for quick optimization. -The \code{ppm} and \code{checkBack} parameters have shown less influence -than the other parameters and exist to give users flexibility and -better accuracy. -} -\examples{ -library(faahKO) -library(xcms) -cdfpath <- system.file("cdf", package = "faahKO") -cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) - -## Read the first file -xraw <- xcmsRaw(cdffiles[1]) -## Extract the required data -mzVals <- xraw@env$mz -intVals <- xraw@env$intensity -## Define the values per spectrum: -valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) - -## Perform the feature detection using massifquant -res <- do_detectFeatures_massifquant(mz = mzVals, int = intVals, -scantime = xraw@scantime, valsPerSpect = valsPerSpect) -head(res) -} -\author{ -Christopher Conley -} -\references{ -Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT -"Massifquant: open-source Kalman filter-based XC-MS isotope trace feature -detection" \emph{Bioinformatics} 2014, 30(18):2636-43. -} -\seealso{ -\code{\link{massifquant}} for the standard user interface method. - -Other core feature detection functions: \code{\link{do_detectFeatures_MSW}}, - \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}}, - \code{\link{do_detectFeatures_centWave}}, - \code{\link{do_detectFeatures_matchedFilter}} -} - diff --git a/man/do_detectFeatures_centWave.Rd b/man/do_findChromPeaks_centWave.Rd similarity index 56% rename from man/do_detectFeatures_centWave.Rd rename to man/do_findChromPeaks_centWave.Rd index 954f5c7c9..165cda5e3 100644 --- a/man/do_detectFeatures_centWave.Rd +++ b/man/do_findChromPeaks_centWave.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/do_detectFeatures-functions.R -\name{do_detectFeatures_centWave} -\alias{do_detectFeatures_centWave} -\title{Core API function for centWave feature detection} +% Please edit documentation in R/do_findChromPeaks-functions.R +\name{do_findChromPeaks_centWave} +\alias{do_findChromPeaks_centWave} +\title{Core API function for centWave peak detection} \usage{ -do_detectFeatures_centWave(mz, int, scantime, valsPerSpect, ppm = 25, +do_findChromPeaks_centWave(mz, int, scantime, valsPerSpect, ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), @@ -23,74 +23,80 @@ spectra/scans of the data representing the retention time of each scan.} \item{valsPerSpect}{Numeric vector with the number of values for each spectrum.} -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} -\item{peakwidth}{numeric(2) with the expected approximate -feature/peak width in chromatographic space. Given as a range (min, max) +\item{peakwidth}{\code{numeric(2)} with the expected approximate +peak width in chromatographic space. Given as a range (min, max) in seconds.} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{prefilter}{numeric(2): \code{c(k, I)} specifying the prefilter +\item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter step for the first analysis step (ROI detection). Mass traces are only -retained if they contain at least \code{k} peaks with intensity \code{>= I}.} +retained if they contain at least \code{k} peaks with intensity +\code{>= I}.} \item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} \item{integrate}{Integration method. For \code{integrate = 1} peak limits are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} -\item{noise}{numeric(1) allowing to set a minimum intensity required +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity \code{< noise} are omitted from ROI detection).} -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} \item{roiList}{An optional list of regions-of-interest (ROI) representing detected mass traces. If ROIs are submitted the first analysis step is -omitted and feature detection is performed on the submitted ROIs. Each -ROI is expected to have the following elements specified: +omitted and chromatographic peak detection is performed on the submitted +ROIs. Each ROI is expected to have the following elements specified: \code{scmin} (start scan index), \code{scmax} (end scan index), \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} -(number of scans), \code{intensity} (summed intensity). Each ROI should be -represented by a \code{list} of elements or a single row \code{data.frame}.} +(number of scans), \code{intensity} (summed intensity). Each ROI should +be represented by a \code{list} of elements or a single row +\code{data.frame}.} -\item{firstBaselineCheck}{logical(1). If \code{TRUE} continuous +\item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous data within regions of interest is checked to be above the first baseline.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} -defining the scale for each region of interest in \code{roiList} that should -be used for the centWave-wavelets.} +defining the scale for each region of interest in \code{roiList} that +should be used for the centWave-wavelets.} } \value{ -A matrix, each row representing an identified feature, with columns: +A matrix, each row representing an identified chromatographic peak, +with columns: \describe{ -\item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} -\item{rt}{Retention time of the feature's midpoint.} -\item{rtmin}{Minimum retention time of the feature.} -\item{rtmax}{Maximum retention time of the feature.} -\item{into}{Integrated (original) intensity of the feature.} -\item{intb}{Per-feature baseline corrected integrated feature intensity.} -\item{maxo}{Maximum intensity of the feature.} +\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +\item{mzmin}{Minimum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} +\item{rt}{Retention time of the peak's midpoint.} +\item{rtmin}{Minimum retention time of the peak.} +\item{rtmax}{Maximum retention time of the peak.} +\item{into}{Integrated (original) intensity of the peak.} +\item{intb}{Per-peak baseline corrected integrated peak intensity.} +\item{maxo}{Maximum intensity of the peak.} \item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, \code{sd} being the standard deviation of local chromatographic noise.} \item{egauss}{RMSE of Gaussian fit.} @@ -102,15 +108,16 @@ Additional columns for \code{verboseColumns = TRUE}: \item{h}{Gaussian parameter h.} \item{f}{Region number of the m/z ROI where the peak was localized.} \item{dppm}{m/z deviation of mass trace across scanns in ppk.} -\item{scale}{Scale on which the feature was localized.} +\item{scale}{Scale on which the peak was localized.} \item{scpos}{Peak position found by wavelet analysis (scan number).} \item{scmin}{Left peak limit found by wavelet analysis (scan number).} \item{scmax}{Right peak limit found by wavelet analysis (scan numer).} } } \description{ -This function performs peak density and wavelet based feature -detection for high resolution LC/MS data in centroid mode [Tautenhahn 2008]. +This function performs peak density and wavelet based +chromatographic peak detection for high resolution LC/MS data in centroid +mode [Tautenhahn 2008]. } \details{ This algorithm is most suitable for high resolution @@ -126,7 +133,7 @@ of interest are passed with the \code{roiList} parameter. The \emph{centWave} was designed to work on centroided mode, thus it is expected that such data is presented to the function. -This function exposes core feature detection functionality of +This function exposes core chromatographic peak detection functionality of the \emph{centWave} method. While this function can be called directly, users will generally call the corresponding method for the data object instead. @@ -137,7 +144,7 @@ library(faahKO) fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") xr <- xcmsRaw(fs, profstep = 0) -## Extracting the data from the xcmsRaw for do_detectFeatures_centWave +## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave mzVals <- xr@env$mz intVals <- xr@env$intensity ## Define the values per spectrum: @@ -146,13 +153,10 @@ valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) ## Calling the function. We're using a large value for noise to speed up ## the call in the example performance - in a real use case we would either ## set the value to a reasonable value or use the default value. -res <- do_detectFeatures_centWave(mz = mzVals, int = intVals, +res <- do_findChromPeaks_centWave(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect, noise = 10000) head(res) } -\author{ -Ralf Tautenhahn, Johannes Rainer -} \references{ Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} @@ -161,9 +165,11 @@ sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} \seealso{ \code{\link{centWave}} for the standard user interface method. -Other core feature detection functions: \code{\link{do_detectFeatures_MSW}}, - \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}}, - \code{\link{do_detectFeatures_massifquant}}, - \code{\link{do_detectFeatures_matchedFilter}} +Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, + \code{\link{do_findChromPeaks_massifquant}}, + \code{\link{do_findChromPeaks_matchedFilter}}, + \code{\link{do_findPeaks_MSW}} +} +\author{ +Ralf Tautenhahn, Johannes Rainer } - diff --git a/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd new file mode 100644 index 000000000..eee8fa2d6 --- /dev/null +++ b/man/do_findChromPeaks_centWaveWithPredIsoROIs.Rd @@ -0,0 +1,185 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_findChromPeaks-functions.R +\name{do_findChromPeaks_centWaveWithPredIsoROIs} +\alias{do_findChromPeaks_centWaveWithPredIsoROIs} +\alias{do_findChromPeaks_addPredIsoROIs} +\title{Core API function for two-step centWave peak detection with isotopes} +\usage{ +do_findChromPeaks_centWaveWithPredIsoROIs(mz, int, scantime, valsPerSpect, + ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), + mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, + fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), + firstBaselineCheck = TRUE, roiScales = NULL, snthreshIsoROIs = 6.25, + maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, + polarity = "unknown") + +do_findChromPeaks_addPredIsoROIs(mz, int, scantime, valsPerSpect, ppm = 25, + peakwidth = c(20, 50), snthresh = 6.25, prefilter = c(3, 100), + mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, + fitgauss = FALSE, noise = 0, verboseColumns = FALSE, peaks. = NULL, + maxCharge = 3, maxIso = 5, mzIntervalExtension = TRUE, + polarity = "unknown") +} +\arguments{ +\item{mz}{Numeric vector with the individual m/z values from all scans/ +spectra of one file/sample.} + +\item{int}{Numeric vector with the individual intensity values from all +scans/spectra of one file/sample.} + +\item{scantime}{Numeric vector of length equal to the number of +spectra/scans of the data representing the retention time of each scan.} + +\item{valsPerSpect}{Numeric vector with the number of values for each +spectrum.} + +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} + +\item{peakwidth}{\code{numeric(2)} with the expected approximate +peak width in chromatographic space. Given as a range (min, max) +in seconds.} + +\item{snthresh}{For \code{do_findChromPeaks_addPredIsoROIs}: +numeric(1) defining the signal to noise threshold for the centWave algorithm. +For \code{do_findChromPeaks_centWaveWithPredIsoROIs}: numeric(1) defining the +signal to noise threshold for the initial (first) centWave run.} + +\item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter +step for the first analysis step (ROI detection). Mass traces are only +retained if they contain at least \code{k} peaks with intensity +\code{>= I}.} + +\item{mzCenterFun}{Name of the function to calculate the m/z center of the +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} + +\item{integrate}{Integration method. For \code{integrate = 1} peak limits +are found through descent on the mexican hat filtered data, for +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} + +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} + +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} + +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required +for centroids to be considered in the first analysis step (centroids with +intensity \code{< noise} are omitted from ROI detection).} + +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} + +\item{roiList}{An optional list of regions-of-interest (ROI) representing +detected mass traces. If ROIs are submitted the first analysis step is +omitted and chromatographic peak detection is performed on the submitted +ROIs. Each ROI is expected to have the following elements specified: +\code{scmin} (start scan index), \code{scmax} (end scan index), +\code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} +(number of scans), \code{intensity} (summed intensity). Each ROI should +be represented by a \code{list} of elements or a single row +\code{data.frame}.} + +\item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous +data within regions of interest is checked to be above the first baseline.} + +\item{roiScales}{Optional numeric vector with length equal to \code{roiList} +defining the scale for each region of interest in \code{roiList} that +should be used for the centWave-wavelets.} + +\item{snthreshIsoROIs}{\code{numeric(1)} defining the signal to noise ratio +cutoff to be used in the second centWave run to identify peaks for +predicted isotope ROIs.} + +\item{maxCharge}{\code{integer(1)} defining the maximal isotope charge. +Isotopes will be defined for charges \code{1:maxCharge}.} + +\item{maxIso}{\code{integer(1)} defining the number of isotope peaks that +should be predicted for each peak identified in the first centWave run.} + +\item{mzIntervalExtension}{\code{logical(1)} whether the mz range for the +predicted isotope ROIs should be extended to increase detection of low +intensity peaks.} + +\item{polarity}{\code{character(1)} specifying the polarity of the data. +Currently not used, but has to be \code{"positive"}, \code{"negative"} or +\code{"unknown"} if provided.} + +\item{peaks.}{A matrix or \code{xcmsPeaks} object such as one returned by +a call to \code{link{do_findChromPeaks_centWave}} or +\code{link{findPeaks.centWave}} (both with \code{verboseColumns = TRUE}) +with the peaks for which isotopes should be predicted and used for an +additional peak detectoin using the centWave method. Required columns are: +\code{"mz"}, \code{"mzmin"}, \code{"mzmax"}, \code{"scmin"}, \code{"scmax"}, +\code{"scale"} and \code{"into"}.} +} +\value{ +A matrix, each row representing an identified chromatographic peak. All +non-overlapping peaks identified in both centWave runs are reported. +The matrix columns are: +\describe{ +\item{mz}{Intensity weighted mean of m/z values of the peaks across scans.} +\item{mzmin}{Minimum m/z of the peaks.} +\item{mzmax}{Maximum m/z of the peaks.} +\item{rt}{Retention time of the peak's midpoint.} +\item{rtmin}{Minimum retention time of the peak.} +\item{rtmax}{Maximum retention time of the peak.} +\item{into}{Integrated (original) intensity of the peak.} +\item{intb}{Per-peak baseline corrected integrated peak intensity.} +\item{maxo}{Maximum intensity of the peak.} +\item{sn}{Signal to noise ratio, defined as \code{(maxo - baseline)/sd}, +\code{sd} being the standard deviation of local chromatographic noise.} +\item{egauss}{RMSE of Gaussian fit.} +} +Additional columns for \code{verboseColumns = TRUE}: +\describe{ +\item{mu}{Gaussian parameter mu.} +\item{sigma}{Gaussian parameter sigma.} +\item{h}{Gaussian parameter h.} +\item{f}{Region number of the m/z ROI where the peak was localized.} +\item{dppm}{m/z deviation of mass trace across scanns in ppk.} +\item{scale}{Scale on which the peak was localized.} +\item{scpos}{Peak position found by wavelet analysis (scan number).} +\item{scmin}{Left peak limit found by wavelet analysis (scan number).} +\item{scmax}{Right peak limit found by wavelet analysis (scan numer).} +} +} +\description{ +The \code{do_findChromPeaks_centWaveWithPredIsoROIs} performs a +two-step centWave based peak detection: chromatographic peaks are identified +using centWave followed by a prediction of the location of the identified +peaks' isotopes in the mz-retention time space. These locations are fed as +\emph{regions of interest} (ROIs) to a subsequent centWave run. All non +overlapping peaks from these two peak detection runs are reported as +the final list of identified peaks. + +The \code{do_findChromPeaks_centWaveAddPredIsoROIs} performs +centWave based peak detection based in regions of interest (ROIs) +representing predicted isotopes for the peaks submitted with argument +\code{peaks.}. The function returns a matrix with the identified peaks +consisting of all input peaks and peaks representing predicted isotopes +of these (if found by the centWave algorithm). +} +\details{ +For more details on the centWave algorithm see +\code{\link{centWave}}. +} +\seealso{ +Other core peak detection functions: \code{\link{do_findChromPeaks_centWave}}, + \code{\link{do_findChromPeaks_massifquant}}, + \code{\link{do_findChromPeaks_matchedFilter}}, + \code{\link{do_findPeaks_MSW}} +} +\author{ +Hendrik Treutler, Johannes Rainer +} diff --git a/man/do_findChromPeaks_massifquant.Rd b/man/do_findChromPeaks_massifquant.Rd new file mode 100644 index 000000000..a28658c86 --- /dev/null +++ b/man/do_findChromPeaks_massifquant.Rd @@ -0,0 +1,189 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_findChromPeaks-functions.R +\name{do_findChromPeaks_massifquant} +\alias{do_findChromPeaks_massifquant} +\title{Core API function for massifquant peak detection} +\usage{ +do_findChromPeaks_massifquant(mz, int, scantime, valsPerSpect, ppm = 10, + peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), + mzCenterFun = "wMean", integrate = 1, mzdiff = -0.001, + fitgauss = FALSE, noise = 0, verboseColumns = FALSE, + criticalValue = 1.125, consecMissedLimit = 2, unions = 1, + checkBack = 0, withWave = FALSE) +} +\arguments{ +\item{mz}{Numeric vector with the individual m/z values from all scans/ +spectra of one file/sample.} + +\item{int}{Numeric vector with the individual intensity values from all +scans/spectra of one file/sample.} + +\item{scantime}{Numeric vector of length equal to the number of +spectra/scans of the data representing the retention time of each scan.} + +\item{valsPerSpect}{Numeric vector with the number of values for each +spectrum.} + +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} + +\item{peakwidth}{\code{numeric(2)} with the expected approximate +peak width in chromatographic space. Given as a range (min, max) +in seconds.} + +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} + +\item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter +step for the first analysis step (ROI detection). Mass traces are only +retained if they contain at least \code{k} peaks with intensity +\code{>= I}.} + +\item{mzCenterFun}{Name of the function to calculate the m/z center of the +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} + +\item{integrate}{Integration method. For \code{integrate = 1} peak limits +are found through descent on the mexican hat filtered data, for +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} + +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} + +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} + +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required +for centroids to be considered in the first analysis step (centroids with +intensity \code{< noise} are omitted from ROI detection).} + +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} + +\item{criticalValue}{\code{numeric(1)}. Suggested values: +(\code{0.1-3.0}). This setting helps determine the the Kalman Filter +prediciton margin of error. A real centroid belonging to a bonafide +peak must fall within the KF prediction margin of error. Much like +in the construction of a confidence interval, \code{criticalVal} loosely +translates to be a multiplier of the standard error of the prediction +reported by the Kalman Filter. If the peak in the XC-MS sample have +a small mass deviance in ppm error, a smaller critical value might be +better and vice versa.} + +\item{consecMissedLimit}{\code{integer(1)} Suggested values: (\code{1,2,3}). +While a peak is in the proces of being detected by a Kalman Filter, the +Kalman Filter may not find a predicted centroid in every scan. After 1 +or more consecutive failed predictions, this setting informs Massifquant +when to stop a Kalman Filter from following a candidate peak.} + +\item{unions}{\code{integer(1)} set to \code{1} if apply t-test union on +segmentation; set to \code{0} if no t-test to be applied on +chromatographically continous peaks sharing same m/z range. +Explanation: With very few data points, sometimes a Kalman Filter stops +tracking a peak prematurely. Another Kalman Filter is instantiated +and begins following the rest of the signal. Because tracking is done +backwards to forwards, this algorithmic defect leaves a real peak +divided into two segments or more. With this option turned on, the +program identifies segmented peaks and combines them (merges them) +into one with a two sample t-test. The potential danger of this option +is that some truly distinct peaks may be merged.} + +\item{checkBack}{\code{integer(1)} set to \code{1} if turned on; set to +\code{0} if turned off. The convergence of a Kalman Filter to a peak's +precise m/z mapping is very fast, but sometimes it incorporates erroneous +centroids as part of a peak (especially early on). The \code{scanBack} +option is an attempt to remove the occasional outlier that lies beyond +the converged bounds of the Kalman Filter. The option does not directly +affect identification of a peak because it is a postprocessing measure; +it has not shown to be a extremely useful thus far and the default is set +to being turned off.} + +\item{withWave}{\code{logical(1)} if \code{TRUE}, the peaks identified first +with Massifquant are subsequently filtered with the second step of the +centWave algorithm, which includes wavelet estimation.} +} +\value{ +A matrix, each row representing an identified chromatographic peak, +with columns: +\describe{ +\item{mz}{Intensity weighted mean of m/z values of the peaks across +scans.} +\item{mzmin}{Minumum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} +\item{rtmin}{Minimum retention time of the peak.} +\item{rtmax}{Maximum retention time of the peak.} +\item{rt}{Retention time of the peak's midpoint.} +\item{into}{Integrated (original) intensity of the peak.} +\item{maxo}{Maximum intensity of the peak.} +} +If \code{withWave} is set to \code{TRUE}, the result is the same as +returned by the \code{\link{do_findChromPeaks_centWave}} method. +} +\description{ +Massifquant is a Kalman filter (KF)-based chromatographic peak +detection for XC-MS data in centroid mode. The identified peaks +can be further refined with the \emph{centWave} method (see +\code{\link{do_findChromPeaks_centWave}} for details on centWave) +by specifying \code{withWave = TRUE}. +} +\details{ +This algorithm's performance has been tested rigorously +on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. +Simultaneous kalman filters identify peaks and calculate their +area under the curve. The default parameters are set to operate on +a complex LC-MS Orbitrap sample. Users will find it useful to do some +simple exploratory data analysis to find out where to set a minimum +intensity, and identify how many scans an average peak spans. The +\code{consecMissedLimit} parameter has yielded good performance on +Orbitrap data when set to (\code{2}) and on TOF data it was found best +to be at (\code{1}). This may change as the algorithm has yet to be +tested on many samples. The \code{criticalValue} parameter is perhaps +most dificult to dial in appropriately and visual inspection of peak +identification is the best suggested tool for quick optimization. +The \code{ppm} and \code{checkBack} parameters have shown less influence +than the other parameters and exist to give users flexibility and +better accuracy. +} +\examples{ +library(faahKO) +library(xcms) +cdfpath <- system.file("cdf", package = "faahKO") +cdffiles <- list.files(cdfpath, recursive = TRUE, full.names = TRUE) + +## Read the first file +xraw <- xcmsRaw(cdffiles[1]) +## Extract the required data +mzVals <- xraw@env$mz +intVals <- xraw@env$intensity +## Define the values per spectrum: +valsPerSpect <- diff(c(xraw@scanindex, length(mzVals))) + +## Perform the peak detection using massifquant +res <- do_findChromPeaks_massifquant(mz = mzVals, int = intVals, +scantime = xraw@scantime, valsPerSpect = valsPerSpect) +head(res) +} +\references{ +Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT +"Massifquant: open-source Kalman filter-based XC-MS isotope trace feature +detection" \emph{Bioinformatics} 2014, 30(18):2636-43. +} +\seealso{ +\code{\link{massifquant}} for the standard user interface method. + +Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, + \code{\link{do_findChromPeaks_centWave}}, + \code{\link{do_findChromPeaks_matchedFilter}}, + \code{\link{do_findPeaks_MSW}} +} +\author{ +Christopher Conley +} diff --git a/man/do_detectFeatures_matchedFilter.Rd b/man/do_findChromPeaks_matchedFilter.Rd similarity index 57% rename from man/do_detectFeatures_matchedFilter.Rd rename to man/do_findChromPeaks_matchedFilter.Rd index a0651d746..c3e7d58b9 100644 --- a/man/do_detectFeatures_matchedFilter.Rd +++ b/man/do_findChromPeaks_matchedFilter.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/do_detectFeatures-functions.R -\name{do_detectFeatures_matchedFilter} -\alias{do_detectFeatures_matchedFilter} -\title{Core API function for matchedFilter feature detection} +% Please edit documentation in R/do_findChromPeaks-functions.R +\name{do_findChromPeaks_matchedFilter} +\alias{do_findChromPeaks_matchedFilter} +\title{Core API function for matchedFilter peak detection} \usage{ -do_detectFeatures_matchedFilter(mz, int, scantime, valsPerSpect, +do_findChromPeaks_matchedFilter(mz, int, scantime, valsPerSpect, binSize = 0.1, impute = "none", baseValue, distance, fwhm = 30, sigma = fwhm/2.3548, max = 5, snthresh = 10, steps = 2, mzdiff = 0.8 - binSize * steps, index = FALSE) @@ -22,14 +22,14 @@ spectra/scans of the data representing the retention time of each scan.} \item{valsPerSpect}{Numeric vector with the number of values for each spectrum.} -\item{binSize}{numeric(1) specifying the width of the +\item{binSize}{\code{numeric(1)} specifying the width of the bins/slices in m/z dimension.} \item{impute}{Character string specifying the method to be used for missing -value imputation. Allowed values are \code{"none"} (no linear interpolation), -\code{"lin"} (linear interpolation), \code{"linbase"} (linear interpolation -within a certain bin-neighborhood) and \code{"intlin"}. See -\code{\link{imputeLinInterpol}} for more details.} +value imputation. Allowed values are \code{"none"} (no linear +interpolation), \code{"lin"} (linear interpolation), \code{"linbase"} +(linear interpolation within a certain bin-neighborhood) and +\code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details.} \item{baseValue}{The base value to which empty elements should be set. This is only considered for \code{method = "linbase"} and corresponds to the @@ -39,57 +39,59 @@ is only considered for \code{method = "linbase"} and corresponds to the element of an empty element that should be considered for linear interpolation. See details section for more information.} -\item{fwhm}{numeric(1) specifying the full width at half maximum -of matched filtration gaussian model peak. Only used to calculate the actual -sigma, see below.} +\item{fwhm}{\code{numeric(1)} specifying the full width at half maximum +of matched filtration gaussian model peak. Only used to calculate the +actual sigma, see below.} -\item{sigma}{numeric(1) specifying the standard deviation (width) +\item{sigma}{\code{numeric(1)} specifying the standard deviation (width) of the matched filtration model peak.} -\item{max}{numeric(1) representing the maximum number of peaks +\item{max}{\code{numeric(1)} representing the maximum number of peaks that are expected/will be identified per slice.} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{steps}{numeric(1) defining the number of bins to be -merged before filtration (i.e. the number of neighboring bins that will be -joined to the slice in which filtration and peak detection will be +\item{steps}{\code{numeric(1)} defining the number of bins to be +merged before filtration (i.e. the number of neighboring bins that will +be joined to the slice in which filtration and peak detection will be performed).} -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} -\item{index}{Logical specifying whether indicies should be returned instead -of values for m/z and retention times.} +\item{index}{\code{logical(1)} specifying whether indicies should be +returned instead of values for m/z and retention times.} } \value{ -A matrix, each row representing an identified feature, with columns: +A matrix, each row representing an identified chromatographic peak, +with columns: \describe{ -\item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} -\item{rt}{Retention time of the feature's midpoint.} -\item{rtmin}{Minimum retention time of the feature.} -\item{rtmax}{Maximum retention time of the feature.} -\item{into}{Integrated (original) intensity of the feature.} +\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +\item{mzmin}{Minimum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} +\item{rt}{Retention time of the peak's midpoint.} +\item{rtmin}{Minimum retention time of the peak.} +\item{rtmax}{Maximum retention time of the peak.} +\item{into}{Integrated (original) intensity of the peak.} \item{intf}{Integrated intensity of the filtered peak.} -\item{maxo}{Maximum intensity of the feature.} +\item{maxo}{Maximum intensity of the peak.} \item{maxf}{Maximum intensity of the filtered peak.} -\item{i}{Rank of feature in merged EIC (\code{<= max}).} -\item{sn}{Signal to noise ratio of the feature} +\item{i}{Rank of peak in merged EIC (\code{<= max}).} +\item{sn}{Signal to noise ratio of the peak} } } \description{ -This function identifies features in the chromatographic +This function identifies peaks in the chromatographic time domain as described in [Smith 2006]. The intensity values are binned by cutting The LC/MS data into slices (bins) of a mass unit (\code{binSize} m/z) wide. Within each bin the maximal intensity is -selected. The feature detection is then performed in each bin by +selected. The peak detection is then performed in each bin by extending it based on the \code{steps} parameter to generate slices comprising bins \code{current_bin - steps +1} to \code{current_bin + steps - 1}. Each of these slices is then filtered with matched filtration using -a second-derative Gaussian as the model feature/peak shape. After filtration -features are detected using a signal-to-ration cut-off. For more details +a second-derative Gaussian as the model peak shape. After filtration +peaks are detected using a signal-to-ration cut-off. For more details and illustrations see [Smith 2006]. } \details{ @@ -102,7 +104,7 @@ For more details on binning and missing value imputation see \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. } \note{ -This function exposes core feature detection functionality of +This function exposes core peak detection functionality of the \emph{matchedFilter} method. While this function can be called directly, users will generally call the corresponding method for the data object instead (e.g. the \code{link{findPeaks.matchedFilter}} method). @@ -113,19 +115,16 @@ library(faahKO) fs <- system.file('cdf/KO/ko15.CDF', package = "faahKO") xr <- xcmsRaw(fs) -## Extracting the data from the xcmsRaw for do_detectFeatures_centWave +## Extracting the data from the xcmsRaw for do_findChromPeaks_centWave mzVals <- xr@env$mz intVals <- xr@env$intensity ## Define the values per spectrum: valsPerSpect <- diff(c(xr@scanindex, length(mzVals))) -res <- do_detectFeatures_matchedFilter(mz = mzVals, int = intVals, +res <- do_findChromPeaks_matchedFilter(mz = mzVals, int = intVals, scantime = xr@scantime, valsPerSpect = valsPerSpect) head(res) } -\author{ -Colin A Smith, Johannes Rainer -} \references{ Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite @@ -137,9 +136,11 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" \code{\link{imputeLinInterpol}} for the interpolation of missing values. \code{\link{matchedFilter}} for the standard user interface method. -Other core feature detection functions: \code{\link{do_detectFeatures_MSW}}, - \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}}, - \code{\link{do_detectFeatures_centWave}}, - \code{\link{do_detectFeatures_massifquant}} +Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, + \code{\link{do_findChromPeaks_centWave}}, + \code{\link{do_findChromPeaks_massifquant}}, + \code{\link{do_findPeaks_MSW}} +} +\author{ +Colin A Smith, Johannes Rainer } - diff --git a/man/do_detectFeatures_MSW.Rd b/man/do_findPeaks_MSW.Rd similarity index 53% rename from man/do_detectFeatures_MSW.Rd rename to man/do_findPeaks_MSW.Rd index d7aa32038..601bf0c1e 100644 --- a/man/do_detectFeatures_MSW.Rd +++ b/man/do_findPeaks_MSW.Rd @@ -1,11 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/do_detectFeatures-functions.R -\name{do_detectFeatures_MSW} -\alias{do_detectFeatures_MSW} +% Please edit documentation in R/do_findChromPeaks-functions.R +\name{do_findPeaks_MSW} +\alias{do_findPeaks_MSW} \title{Core API function for single-spectrum non-chromatography MS data -feature detection} +peak detection} \usage{ -do_detectFeatures_MSW(mz, int, snthresh = 3, verboseColumns = FALSE, ...) +do_findPeaks_MSW(mz, int, snthresh = 3, verboseColumns = FALSE, ...) } \arguments{ \item{mz}{Numeric vector with the individual m/z values from all scans/ @@ -14,32 +14,32 @@ spectra of one file/sample.} \item{int}{Numeric vector with the individual intensity values from all scans/spectra of one file/sample.} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} \item{...}{Additional parameters to be passed to the \code{\link[MassSpecWavelet]{peakDetectionCWT}} function.} } \value{ -A matrix, each row representing an identified feature, with columns: +A matrix, each row representing an identified peak, with columns: \describe{ -\item{mz}{m/z value of the feature at the centroid position.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} +\item{mz}{m/z value of the peak at the centroid position.} +\item{mzmin}{Minimum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} \item{rt}{Always \code{-1}.} \item{rtmin}{Always \code{-1}.} \item{rtmax}{Always \code{-1}.} -\item{into}{Integrated (original) intensity of the feature.} -\item{maxo}{Maximum intensity of the feature.} +\item{into}{Integrated (original) intensity of the peak.} +\item{maxo}{Maximum intensity of the peak.} \item{intf}{Always \code{NA}.} -\item{maxf}{Maximum MSW-filter response of the feature.} +\item{maxf}{Maximum MSW-filter response of the peak.} \item{sn}{Signal to noise ratio.} } } \description{ -This function performs feature detection in mass spectrometry +This function performs peak detection in mass spectrometry direct injection spectrum using a wavelet based algorithm. } \details{ @@ -49,17 +49,16 @@ This is a wrapper around the peak picker in Bioconductor's \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the \emph{xcmsDirect} vignette for more information. } -\author{ -Joachim Kutzera, Steffen Neumann, Johannes Rainer -} \seealso{ ##' \code{\link{MSW}} for the standard user interface method. \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the \code{MassSpecWavelet} package. -Other core feature detection functions: \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}}, - \code{\link{do_detectFeatures_centWave}}, - \code{\link{do_detectFeatures_massifquant}}, - \code{\link{do_detectFeatures_matchedFilter}} +Other core peak detection functions: \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}}, + \code{\link{do_findChromPeaks_centWave}}, + \code{\link{do_findChromPeaks_massifquant}}, + \code{\link{do_findChromPeaks_matchedFilter}} +} +\author{ +Joachim Kutzera, Steffen Neumann, Johannes Rainer } - diff --git a/man/do_groupChromPeaks_density.Rd b/man/do_groupChromPeaks_density.Rd new file mode 100644 index 000000000..df69aed12 --- /dev/null +++ b/man/do_groupChromPeaks_density.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_groupChromPeaks-functions.R +\name{do_groupChromPeaks_density} +\alias{do_groupChromPeaks_density} +\title{Core API function for peak density based chromatographic peak +grouping} +\usage{ +do_groupChromPeaks_density(peaks, sampleGroups, bw = 30, minFraction = 0.5, + minSamples = 1, binSize = 0.25, maxFeatures = 50) +} +\arguments{ +\item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and +retention times of the identified chromatographic peaks in all samples of an +experiment. Required columns are \code{"mz"}, \code{"rt"} and +\code{"sample"}. The latter should contain \code{numeric} values representing +the index of the sample in which the peak was found.} + +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{bw}{\code{numeric(1)} defining the bandwidth (standard deviation ot the +smoothing kernel) to be used. This argument is passed to the +\code{\link{density}} method.} + +\item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples +in at least one sample group in which the peaks have to be present to be +considered as a peak group (feature).} + +\item{minSamples}{\code{numeric(1)} with the minimum number of samples in at +least one sample group in which the peaks have to be detected to be +considered a peak group (feature).} + +\item{binSize}{\code{numeric(1)} defining the size of the overlapping slices +in mz dimension.} + +\item{maxFeatures}{\code{numeric(1)} with the maximum number of peak groups +to be identified in a single mz slice.} +} +\value{ +A \code{list} with elements \code{"featureDefinitions"} and +\code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +representing a (mz-rt) feature (i.e. a peak group) with columns: +\describe{ +\item{"mzmed"}{median of the peaks' apex mz values.} +\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +\item{"rtmed"}{the median of the peaks' retention times.} +\item{"rtmin"}{the smallest retention time of the peaks in the group.} +\item{"rtmax"}{the largest retention time of the peaks in the group.} +\item{"npeaks"}{the total number of peaks assigned to the feature. +Note that this number can be larger than the total number of samples, since +multiple peaks from the same sample could be assigned to a feature.} +} +\code{"peakIndex"} is a \code{list} with the indices of all peaks in a +feature in the \code{peaks} input matrix. +} +\description{ +The \code{do_groupChromPeaks_density} function performs +chromatographic peak grouping based on the density (distribution) of peaks, +found in different samples, along the retention time axis in slices of +overlapping mz ranges. +} +\details{ +For overlapping slices along the mz dimension, the function +calculates the density distribution of identified peaks along the +retention time axis and groups peaks from the same or different samples +that are close to each other. See [Smith 2006] for more details. +} +\note{ +The default settings might not be appropriate for all LC/GC-MS setups, +especially the \code{bw} and \code{binSize} parameter should be adjusted +accordingly. +} +\examples{ +## Load the test data set +library(faahKO) +data(faahko) + +## Extract the matrix with the identified peaks from the xcmsSet: +fts <- peaks(faahko) + +## Perform the peak grouping with default settings: +res <- do_groupChromPeaks_density(fts, sampleGroups = sampclass(faahko)) + +## The feature definitions: +head(res$featureDefinitions) + +## The assignment of peaks from the input matrix to the features +head(res$peakIndex) +} +\references{ +Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +\emph{Anal. Chem.} 2006, 78:779-787. +} +\seealso{ +Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_nearest}}, + \code{\link{do_groupPeaks_mzClust}} +} +\author{ +Colin Smith, Johannes Rainer +} diff --git a/man/do_groupChromPeaks_nearest.Rd b/man/do_groupChromPeaks_nearest.Rd new file mode 100644 index 000000000..5ab85fb6d --- /dev/null +++ b/man/do_groupChromPeaks_nearest.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_groupChromPeaks-functions.R +\name{do_groupChromPeaks_nearest} +\alias{do_groupChromPeaks_nearest} +\title{Core API function for chromatic peak grouping using a nearest +neighbor approach} +\usage{ +do_groupChromPeaks_nearest(peaks, sampleGroups, mzVsRtBalance = 10, + absMz = 0.2, absRt = 15, kNN = 10) +} +\arguments{ +\item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and +retention times of the identified chromatographic peaks in all samples of an +experiment. Required columns are \code{"mz"}, \code{"rt"} and +\code{"sample"}. The latter should contain \code{numeric} values representing +the index of the sample in which the peak was found.} + +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{mzVsRtBalance}{\code{numeric(1)} representing the factor by which mz +values are multiplied before calculating the (euclician) distance between +two peaks.} + +\item{absMz}{\code{numeric(1)} maximum tolerated distance for mz values.} + +\item{absRt}{\code{numeric(1)} maximum tolerated distance for rt values.} + +\item{kNN}{\code{numeric(1)} representing the number of nearest neighbors +to check.} +} +\value{ +A \code{list} with elements \code{"featureDefinitions"} and +\code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +representing an (mz-rt) feature (i.e. peak group) with columns: +\describe{ +\item{"mzmed"}{median of the peaks' apex mz values.} +\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +\item{"rtmed"}{the median of the peaks' retention times.} +\item{"rtmin"}{the smallest retention time of the peaks in the feature.} +\item{"rtmax"}{the largest retention time of the peaks in the feature.} +\item{"npeaks"}{the total number of peaks assigned to the feature.} +} +\code{"peakIndex"} is a \code{list} with the indices of all peaks in a +feature in the \code{peaks} input matrix. +} +\description{ +The \code{do_groupChromPeaks_nearest} function groups peaks +across samples by creating a master peak list and assigning corresponding +peaks from all samples to each peak group (i.e. feature). The method is +inspired by the correspondence algorithm of mzMine [Katajamaa 2006]. +} +\references{ +Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for +processing and visualization of mass spectrometry based molecular profile +data. \emph{Bioinformatics} 2006, 22:634-636. +} +\seealso{ +Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_density}}, + \code{\link{do_groupPeaks_mzClust}} +} diff --git a/man/do_groupPeaks_mzClust.Rd b/man/do_groupPeaks_mzClust.Rd new file mode 100644 index 000000000..6cef5b1fe --- /dev/null +++ b/man/do_groupPeaks_mzClust.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/do_groupChromPeaks-functions.R +\name{do_groupPeaks_mzClust} +\alias{do_groupPeaks_mzClust} +\title{Core API function for peak grouping using mzClust} +\usage{ +do_groupPeaks_mzClust(peaks, sampleGroups, ppm = 20, absMz = 0, + minFraction = 0.5, minSamples = 1) +} +\arguments{ +\item{peaks}{A \code{matrix} or \code{data.frame} with the mz values and +retention times of the identified chromatographic peaks in all samples of an +experiment. Required columns are \code{"mz"}, \code{"rt"} and +\code{"sample"}. The latter should contain \code{numeric} values representing +the index of the sample in which the peak was found.} + +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{ppm}{\code{numeric(1)} representing the relative mz error for the +clustering/grouping (in parts per million).} + +\item{absMz}{\code{numeric(1)} representing the absolute mz error for the +clustering.} + +\item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples +in at least one sample group in which the peaks have to be present to be +considered as a peak group (feature).} + +\item{minSamples}{\code{numeric(1)} with the minimum number of samples in at +least one sample group in which the peaks have to be detected to be +considered a peak group (feature).} +} +\value{ +A \code{list} with elements \code{"featureDefinitions"} and +\code{"peakIndex"}. \code{"featureDefinitions"} is a \code{matrix}, each row +representing an (mz-rt) feature (i.e. peak group) with columns: +\describe{ +\item{"mzmed"}{median of the peaks' apex mz values.} +\item{"mzmin"}{smallest mz value of all peaks' apex within the feature.} +\item{"mzmax"}{largest mz value of all peaks' apex within the feature.} +\item{"rtmed"}{always \code{-1}.} +\item{"rtmin"}{always \code{-1}.} +\item{"rtmax"}{always \code{-1}.} +\item{"npeaks"}{the total number of peaks assigned to the feature. +Note that this number can be larger than the total number of samples, since +multiple peaks from the same sample could be assigned to a group.} +} +\code{"peakIndex"} is a \code{list} with the indices of all peaks in a +peak group in the \code{peaks} input matrix. +} +\description{ +The \code{do_groupPeaks_mzClust} function performs high +resolution correspondence on single spectra samples. +} +\references{ +Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill +and David F. Grant\cr \emph{Alignment of high resolution mass spectra: +development of a heuristic approach for metabolomics}.\cr Metabolomics, +Vol. 2, No. 2, 75-83 (2006) +} +\seealso{ +Other core peak grouping algorithms: \code{\link{do_groupChromPeaks_density}}, + \code{\link{do_groupChromPeaks_nearest}} +} diff --git a/man/extractChromatograms-method.Rd b/man/extractChromatograms-method.Rd new file mode 100644 index 000000000..a95568246 --- /dev/null +++ b/man/extractChromatograms-method.Rd @@ -0,0 +1,152 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +\docType{methods} +\name{extractChromatograms,OnDiskMSnExp-method} +\alias{extractChromatograms,OnDiskMSnExp-method} +\alias{extractChromatograms,XCMSnExp-method} +\alias{extractChromatograms} +\title{Extracting chromatograms} +\usage{ +\S4method{extractChromatograms}{OnDiskMSnExp}(object, rt, mz, + aggregationFun = "sum", missing = NA_real_) + +\S4method{extractChromatograms}{XCMSnExp}(object, rt, mz, + adjustedRtime = hasAdjustedRtime(object), aggregationFun = "sum", + missing = NA_real_) +} +\arguments{ +\item{object}{Either a \code{\link[MSnbase]{OnDiskMSnExp}} or +\code{\link{XCMSnExp}} object from which the chromatograms should be +extracted.} + +\item{rt}{\code{numeric(2)} or two-column \code{matrix} defining the lower +and upper boundary for the retention time range(s). If not specified, +the full retention time range of the original data will be used. +It is also possible to submit a \code{numeric(1)} in which case +\code{range} is called on it to transform it to a \code{numeric(2)}.} + +\item{mz}{\code{numeric(2)} or two-column \code{matrix} defining the lower +and upper mz value for the MS data slice(s). If not specified, the +chromatograms will be calculated on the full mz range. +It is also possible to submit a \code{numeric(1)} in which case +\code{range} is called on it to transform it to a \code{numeric(2)}.} + +\item{aggregationFun}{\code{character} specifying the function to be used to +aggregate intensity values across the mz value range for the same +retention time. Allowed values are \code{"sum"}, \code{"max"}, +\code{"mean"} and \code{"min"}.} + +\item{missing}{\code{numeric(1)} allowing to specify the intensity value to +be used if for a given retention time no signal was measured within the +mz range of the corresponding scan. Defaults to \code{NA_real_} (see also +Details and Notes sections below). Use \code{missing = 0} to resemble the +behaviour of the \code{getEIC} from the \code{old} user interface.} + +\item{adjustedRtime}{For \code{extractChromatograms,XCMSnExp}: whether the +adjusted (\code{adjustedRtime = TRUE}) or raw retention times +(\code{adjustedRtime = FALSE}) should be used for filtering and returned +in the resulting \code{\link{Chromatogram}} object. Adjusted retention +times are used by default if available.} +} +\value{ +If a single \code{rt} and \code{mz} range was specified, + \code{extractChromatograms} returns a \code{list} of + \code{\link{Chromatogram}} classes each element being the chromatogram + for one of the samples for the specified range. + If multiple \code{rt} and \code{mz} ranges were provided (i.e. by passing + a multi-row \code{matrix} to parameters \code{rt} or \code{mz}), the + function returns a \code{list} of \code{list}s. The outer list + representing results for the various ranges, the inner the result across + files. In other words, \code{result[[1]]} returns a \code{list} with + \code{Chromatogram} classes length equal to the number of files, each + element representing the \code{Chromatogram} for the first rt/mz range + for one file. + An empty \code{list} is returned if no MS1 data is present in + \code{object} or if not a single spectrum is available for any of the + provided retention time ranges in \code{rt}. An empty \code{Chromatogram} + object is returned at the correponding position in the result \code{list} + if for the specific file no scan/spectrum was measured in the provided + rt window. In all other cases, a \code{Chromatogram} with length equal + to the number of scans/spectra in the provided rt range is returned. +} +\description{ +\code{extractChromatograms}: the method allows to extract + chromatograms from \code{\link[MSnbase]{OnDiskMSnExp}} and + \code{\link{XCMSnExp}} objects. +} +\details{ +Arguments \code{rt} and \code{mz} allow to specify the MS + data slice from which the chromatogram should be extracted. + The parameter \code{aggregationSum} allows to specify the function to be + used to aggregate the intensities across the mz range for the same + retention time. Setting \code{aggregationFun = "sum"} would e.g. allow + to calculate the \emph{total ion chromatogram} (TIC), + \code{aggregationFun = "max"} the \emph{base peak chromatogram} (BPC). + The length of the extracted \code{Chromatogram} object, i.e. the number + of available data points, corresponds to the number of scans/spectra + measured in the specified retention time range. If in a specific scan + (for a give retention time) no signal was measured in the specified mz + range, a \code{NA_real_} is reported as intensity for the retention time + (see Notes for more information). This can be changed using the + \code{missing} parameter. +} +\note{ +\code{Chromatogram} objects extracted with \code{extractChromatogram} + contain \code{NA_real_} values if, for a given retention time, no + signal was measured in the specified mz range. If no spectrum/scan is + present in the defined retention time window a \code{Chromatogram} object + of length 0 is returned. + + For \code{\link{XCMSnExp}} objects, if adjusted retention times are + available, the \code{extractChromatograms} method will by default report + and use these (for the subsetting based on the provided parameter + \code{rt}). This can be overwritten with the parameter + \code{adjustedRtime}. +} +\examples{ +## Read some files from the faahKO package. +library(xcms) +library(faahKO) +faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_3_files) + +## Extract the ion chromatogram for one chromatographic peak in the data. +chrs <- extractChromatograms(od, rt = c(2700, 2900), mz = 335) + +## plot the data +plot(rtime(chrs[[2]]), intensity(chrs[[2]]), type = "l", xlab = "rtime", + ylab = "intensity", col = "000080") +for(i in c(1, 3)) { + points(rtime(chrs[[i]]), intensity(chrs[[i]]), type = "l", col = "00000080") +} + +## Plot the chromatogram using plotChromatogram +plotChromatogram(chrs) + +## Extract chromatograms for multiple ranges. +mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE) +rtr <- matrix(c(2700, 2900, 2600, 2750), ncol = 2, byrow = TRUE) +chrs <- extractChromatograms(od, mz = mzr, rt = rtr) + +## Plot the extracted chromatograms +par(mfrow = c(1, 2)) +plotChromatogram(chrs[[1]]) +plotChromatogram(chrs[[2]]) +} +\seealso{ +\code{\link{XCMSnExp}} for the data object. + \code{\link{Chromatogram}} for the object representing chromatographic + data. + + \code{\link{plotChromatogram}} to plot a \code{Chromatogram} or + \code{list} of such objects. + + \code{\link{extractMsData}} for a method to extract the MS data as + \code{data.frame}. +} +\author{ +Johannes Rainer +} diff --git a/man/extractMsData-method.Rd b/man/extractMsData-method.Rd new file mode 100644 index 000000000..ac527ff96 --- /dev/null +++ b/man/extractMsData-method.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods-OnDiskMSnExp.R, R/methods-XCMSnExp.R +\docType{methods} +\name{extractMsData,OnDiskMSnExp-method} +\alias{extractMsData,OnDiskMSnExp-method} +\alias{extractMsData,XCMSnExp-method} +\alias{extractMsData} +\title{Extract a \code{data.frame} containing MS data} +\usage{ +\S4method{extractMsData}{OnDiskMSnExp}(object, rt, mz) + +\S4method{extractMsData}{XCMSnExp}(object, rt, mz, + adjustedRtime = hasAdjustedRtime(object)) +} +\arguments{ +\item{object}{A \code{XCMSnExp} or \code{OnDiskMSnExp} object.} + +\item{rt}{\code{numeric(2)} with the retention time range from which the +data should be extracted.} + +\item{mz}{\code{numeric(2)} with the mz range.} + +\item{adjustedRtime}{(for \code{extractMsData,XCMSnExp}): \code{logical(1)} +specifying if adjusted or raw retention times should be reported. +Defaults to adjusted retention times, if these are present in +\code{object}.} +} +\value{ +A \code{list} of length equal to the number of samples/files in + \code{object}. Each element being a \code{data.frame} with columns + \code{"rt"}, \code{"mz"} and \code{"i"} with the retention time, mz and + intensity tuples of a file. If no data is available for the mz-rt range + in a file a \code{data.frame} with 0 rows is returned for that file. +} +\description{ +Extract a \code{data.frame} of retention time, mz and intensity + values from each file/sample in the provided rt-mz range (or for the full + data range if \code{rt} and \code{mz} are not defined). +} +\examples{ +## Read some files from the test data package. +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) +raw_data <- readMSData2(fls[1:2]) + +## Read the full MS data for a defined mz-rt region. +res <- extractMsData(raw_data, mz = c(300, 320), rt = c(2700, 2900)) + +## We've got one data.frame per file +length(res) + +## With number of rows: +nrow(res[[1]]) + +head(res[[1]]) +} +\seealso{ +\code{\link{XCMSnExp}} for the data object. +} +\author{ +Johannes Rainer +} diff --git a/man/featureDetection-centWaveWithPredIsoROIs.Rd b/man/featureDetection-centWaveWithPredIsoROIs.Rd deleted file mode 100644 index d1ce06883..000000000 --- a/man/featureDetection-centWaveWithPredIsoROIs.Rd +++ /dev/null @@ -1,269 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-OnDiskMSnExp.R, R/methods-Params.R -\docType{class} -\name{featureDetection-centWaveWithPredIsoROIs} -\alias{CentWavePredIsoParam} -\alias{CentWavePredIsoParam-class} -\alias{centWaveWithPredIsoROIs} -\alias{detectFeatures,MSnExp,CentWavePredIsoParam-method} -\alias{detectFeatures,OnDiskMSnExp,CentWavePredIsoParam-method} -\alias{featureDetection-centWaveWithPredIsoROIs} -\alias{maxCharge} -\alias{maxCharge,CentWavePredIsoParam-method} -\alias{maxCharge<-} -\alias{maxCharge<-,CentWavePredIsoParam-method} -\alias{maxIso} -\alias{maxIso,CentWavePredIsoParam-method} -\alias{maxIso<-} -\alias{maxIso<-,CentWavePredIsoParam-method} -\alias{mzIntervalExtension} -\alias{mzIntervalExtension,CentWavePredIsoParam-method} -\alias{mzIntervalExtension<-} -\alias{mzIntervalExtension<-,CentWavePredIsoParam-method} -\alias{polarity,CentWavePredIsoParam-method} -\alias{polarity<-} -\alias{polarity<-,CentWavePredIsoParam-method} -\alias{show,CentWavePredIsoParam-method} -\alias{snthreshIsoROIs} -\alias{snthreshIsoROIs,CentWavePredIsoParam-method} -\alias{snthreshIsoROIs<-} -\alias{snthreshIsoROIs<-,CentWavePredIsoParam-method} -\title{Two-step centWave feature detection considering also feature isotopes} -\usage{ -CentWavePredIsoParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, - prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, - mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, - roiList = list(), firstBaselineCheck = TRUE, roiScales = numeric(), - snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, - mzIntervalExtension = TRUE, polarity = "unknown") - -\S4method{detectFeatures}{OnDiskMSnExp,CentWavePredIsoParam}(object, param, - BPPARAM = bpparam(), return.type = "XCMSnExp") - -\S4method{detectFeatures}{MSnExp,CentWavePredIsoParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - -\S4method{show}{CentWavePredIsoParam}(object) - -\S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) - -\S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) <- value - -\S4method{maxCharge}{CentWavePredIsoParam}(object) - -\S4method{maxCharge}{CentWavePredIsoParam}(object) <- value - -\S4method{maxIso}{CentWavePredIsoParam}(object) - -\S4method{maxIso}{CentWavePredIsoParam}(object) <- value - -\S4method{mzIntervalExtension}{CentWavePredIsoParam}(object) - -\S4method{mzIntervalExtension}{CentWavePredIsoParam}(object) <- value - -\S4method{polarity}{CentWavePredIsoParam}(object) - -\S4method{polarity}{CentWavePredIsoParam}(object) <- value -} -\arguments{ -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} - -\item{peakwidth}{numeric(2) with the expected approximate -feature/peak width in chromatographic space. Given as a range (min, max) -in seconds.} - -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} - -\item{prefilter}{numeric(2): \code{c(k, I)} specifying the prefilter -step for the first analysis step (ROI detection). Mass traces are only -retained if they contain at least \code{k} peaks with intensity \code{>= I}.} - -\item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} - -\item{integrate}{Integration method. For \code{integrate = 1} peak limits -are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} - -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} - -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} - -\item{noise}{numeric(1) allowing to set a minimum intensity required -for centroids to be considered in the first analysis step (centroids with -intensity \code{< noise} are omitted from ROI detection).} - -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} - -\item{roiList}{An optional list of regions-of-interest (ROI) representing -detected mass traces. If ROIs are submitted the first analysis step is -omitted and feature detection is performed on the submitted ROIs. Each -ROI is expected to have the following elements specified: -\code{scmin} (start scan index), \code{scmax} (end scan index), -\code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} -(number of scans), \code{intensity} (summed intensity). Each ROI should be -represented by a \code{list} of elements or a single row \code{data.frame}.} - -\item{firstBaselineCheck}{logical(1). If \code{TRUE} continuous -data within regions of interest is checked to be above the first baseline.} - -\item{roiScales}{Optional numeric vector with length equal to \code{roiList} -defining the scale for each region of interest in \code{roiList} that should -be used for the centWave-wavelets.} - -\item{snthreshIsoROIs}{numeric(1) defining the signal to noise ratio cutoff -to be used in the second centWave run to identify features for predicted -isotope ROIs.} - -\item{maxCharge}{integer(1) defining the maximal isotope charge. Isotopes -will be defined for charges \code{1:maxCharge}.} - -\item{maxIso}{integer(1) defining the number of isotope peaks that should be -predicted for each feature identified in the first centWave run.} - -\item{mzIntervalExtension}{logical(1) whether the mz range for the predicted -isotope ROIs should be extended to increase detection of low intensity peaks.} - -\item{polarity}{character(1) specifying the polarity of the data. Currently -not used, but has to be \code{"positive"}, \code{"negative"} or -\code{"unknown"} if provided.} - -\item{object}{For \code{detectFeatures}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. - -For all other methods: a parameter object.} - -\item{param}{An \code{CentWaveParam} object containing all settings for the -centWave algorithm.} - -\item{BPPARAM}{A parameter class specifying if and how parallel processing -should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. -See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, feature detection is performed in parallel on several -of the input samples.} - -\item{return.type}{Character specifying what type of object the method should -return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or -\code{"xcmsSet"}.} - -\item{value}{The value for the slot.} -} -\value{ -The \code{CentWavePredIsoParam} function returns a -\code{CentWavePredIsoParam} class instance with all of the settings -specified for the two-step centWave-based feature detection considering also -feature isotopes. - -For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the feature detection. -If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified features/peaks. -If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the feature detection. -} -\description{ -This method performs a two-step centWave-based feature -detection: in a first centWave run features are identified for which then -the location of their potential isotopes in the mz-retention time is -predicted. A second centWave run is then performed on these -\emph{regions of interest} (ROIs). The final list of features comprises all -non-overlapping features from both centWave runs. - -The \code{CentWavePredIsoParam} class allows to specify all -settings for the two-step centWave-based feature detection considering also -predicted isotopes of features identified in the first centWave run. -Instances should be created with the \code{CentWavePredIsoParam} constructor. -See also the documentation of the \code{\link{CentWaveParam}} for all methods -and arguments this class inherits. - -The \code{detectFeatures,OnDiskMSnExp,CentWavePredIsoParam} method -performs a two-step centWave-based feature detection on all samples from an -\code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} -objects encapsule all experiment specific data and load the spectra data -(mz and intensity values) on the fly from the original files applying also -all eventual data manipulations. - -The \code{detectFeatures,MSnExp,CentWavePredIsoParam} method -performs a two-step centWave-based feature detection on all samples from -an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -intensity values of all spectra hence no additional data input from the -original files is required. - -\code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and -setter for the \code{snthreshIsoROIs} slot of the object. - -\code{maxCharge},\code{maxCharge<-}: getter and -setter for the \code{maxCharge} slot of the object. - -\code{maxIso},\code{maxIso<-}: getter and -setter for the \code{maxIso} slot of the object. - -\code{mzIntervalExtension},\code{mzIntervalExtension<-}: getter -and setter for the \code{mzIntervalExtension} slot of the object. - -\code{polarity},\code{polarity<-}: getter and -setter for the \code{polarity} slot of the object. -} -\details{ -See \code{\link{centWave}} for details on the centWave method. - -Parallel processing (one process per sample) is supported and can -be configured either by the \code{BPPARAM} parameter or by globally defining -the parallel processing mode using the \code{\link[BiocParallel]{register}} -method from the \code{BiocParallel} package. -} -\section{Slots}{ - -\describe{ -\item{\code{.__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity}}{See corresponding parameter above. \code{.__classVersion__} stores -the version from the class. Slots values should exclusively be accessed -\emph{via} the corresponding getter and setter methods listed above.} -}} -\note{ -These methods and classes are part of the updated and modernized -\code{xcms} user interface which will eventually replace the -\code{\link{findPeaks}} methods. It supports feature detection on -\code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -objects (both defined in the \code{MSnbase} package). All of the settings -to the centWave algorithm can be passed with a \code{CentWaveParam} object. -} -\examples{ - -## Create a CentWaveParam object -p <- CentWavePredIsoParam(maxCharge = 4) -## Change snthresh parameter -snthresh(p) <- 25 -p - -} -\author{ -Hendrik Treutler, Johannes Rainer -} -\seealso{ -The \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}} core -API function and \code{\link{findPeaks.centWave}} for the old user interface. -\code{\link{CentWaveParam}} for the class the \code{CentWavePredIsoParam} -extends. - -\code{\link{XCMSnExp}} for the object containing the results of -the feature detection. - -Other feature detection methods: \code{\link{detectFeatures}}, - \code{\link{featureDetection-MSW}}, - \code{\link{featureDetection-centWave}}, - \code{\link{featureDetection-massifquant}}, - \code{\link{featureDetection-matchedFilter}} -} - diff --git a/man/fillChromPeaks.Rd b/man/fillChromPeaks.Rd new file mode 100644 index 000000000..6299d1b08 --- /dev/null +++ b/man/fillChromPeaks.Rd @@ -0,0 +1,209 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{FillChromPeaksParam-class} +\alias{FillChromPeaksParam-class} +\alias{FillChromPeaksParam} +\alias{show,FillChromPeaksParam-method} +\alias{expandMz,FillChromPeaksParam-method} +\alias{expandMz} +\alias{expandMz<-,FillChromPeaksParam-method} +\alias{expandMz<-} +\alias{expandRt,FillChromPeaksParam-method} +\alias{expandRt} +\alias{expandRt<-,FillChromPeaksParam-method} +\alias{expandRt<-} +\alias{ppm,FillChromPeaksParam-method} +\alias{ppm<-,FillChromPeaksParam-method} +\alias{fillChromPeaks,XCMSnExp,FillChromPeaksParam-method} +\alias{fillChromPeaks} +\alias{fillChromPeaks,XCMSnExp,missing-method} +\title{Integrate areas of missing peaks} +\usage{ +FillChromPeaksParam(expandMz = 0, expandRt = 0, ppm = 0) + +\S4method{show}{FillChromPeaksParam}(object) + +\S4method{expandMz}{FillChromPeaksParam}(object) + +\S4method{expandMz}{FillChromPeaksParam}(object) <- value + +\S4method{expandRt}{FillChromPeaksParam}(object) + +\S4method{expandRt}{FillChromPeaksParam}(object) <- value + +\S4method{ppm}{FillChromPeaksParam}(object) + +\S4method{ppm}{FillChromPeaksParam}(object) <- value + +\S4method{fillChromPeaks}{XCMSnExp,FillChromPeaksParam}(object, param, + BPPARAM = bpparam()) + +\S4method{fillChromPeaks}{XCMSnExp,missing}(object, param, + BPPARAM = bpparam()) +} +\arguments{ +\item{expandMz}{\code{numeric(1)} defining the value by which the mz width of +peaks should be expanded. Each peak is expanded in mz direction by +\code{expandMz *} their original mz width. A value of \code{0} means no +expansion, a value of \code{1} grows each peak by 1 * the mz width of +the peak resulting in peakswith twice their original size in mz +direction (expansion by half mz width to both sides).} + +\item{expandRt}{\code{numeric(1)}, same as \code{expandRt} but for the +retention time width.} + +\item{ppm}{\code{numeric(1)} optionally specifying a \emph{ppm} by which the +mz width of the peak region should be expanded. For peaks with an mz +width smaller than \code{mean(c(mzmin, mzmax)) * ppm / 1e6}, the +\code{mzmin} will be replaced by +\code{mean(c(mzmin, mzmax)) - (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)} +and \code{mzmax} by +\code{mean(c(mzmin, mzmax)) + (mean(c(mzmin, mzmax)) * ppm / 2 / 1e6)}. +This is applied before eventually expanding the mz width using the +\code{expandMz} parameter.} + +\item{object}{\code{XCMSnExp} object with identified and grouped +chromatographic peaks.} + +\item{value}{The value for the slot.} + +\item{param}{A \code{FillChromPeaksParam} object with all settings.} + +\item{BPPARAM}{Parallel processing settings.} +} +\value{ +The \code{FillChromPeaksParam} function returns a +\code{FillChromPeaksParam} object. + +A \code{\link{XCMSnExp}} object with previously missing + chromatographic peaks for features filled into its \code{chromPeaks} + matrix. +} +\description{ +The \code{FillChromPeaksParam} object encapsules all settings for +the signal integration for missing peaks. + +\code{expandMz},\code{expandMz<-}: getter and setter + for the \code{expandMz} slot of the object. + +\code{expandRt},\code{expandRt<-}: getter and setter + for the \code{expandRt} slot of the object. + +\code{ppm},\code{ppm<-}: getter and setter + for the \code{ppm} slot of the object. + +Integrate signal in the mz-rt area of a feature (chromatographic + peak group) for samples in which no chromatographic peak for this + feature was identified and add it to the \code{chromPeaks}. Such peaks + will have a value of \code{1} in the \code{"is_filled"} column of the + \code{\link{chromPeaks}} matrix of the object. +} +\details{ +After correspondence (i.e. grouping of chromatographic peaks across + samples) there will always be features (peak groups) that do not include + peaks from every sample. The \code{fillChromPeaks} method defines + intensity values for such features in the missing samples by integrating + the signal in the mz-rt region of the feature. The mz-rt area is defined + by the median mz and rt start and end points of the other detected + chromatographic peaks for a given feature. + + Adjusted retention times will be used if available. + + Based on the peak finding algorithm that was used to identify the + (chromatographic) peaks different internal functions are employed to + guarantee that the integrated peak signal matches as much as possible + the peak signal integration used during the peak detection. For peaks + identified with the \code{\link{matchedFilter}} method, signal + integration is performed on the \emph{profile matrix} generated with + the same settings used also during peak finding (using the same + \code{bin} size for example). For direct injection data and peaks + identified with the \code{\link{MSW}} algorithm signal is integrated + only along the mz dimension. For all other methods the complete (raw) + signal within the area defined by \code{"mzmin"}, \code{"mzmax"}, + \code{"rtmin"} and \code{"rtmax"} is used. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,expandMz,expandRt,ppm}}{See corresponding parameter above. \code{.__classVersion__} stores the version of the class.} +}} + +\note{ +The reported \code{"mzmin"}, \code{"mzmax"}, \code{"rtmin"} and + \code{"rtmax"} for the filled peaks represents the actual MS area from + which the signal was integrated. + Note that no peak is filled in if no signal was present in a file/sample + in the respective mz-rt area. These samples will still show a \code{NA} + in the matrix returned by the \code{\link{featureValues}} method. This + is in contrast to the \code{\link{fillPeaks.chrom}} method that returned + an \code{"into"} and \code{"maxo"} of \code{0} for such peak areas. + Growing the mz-rt area using the \code{expandMz} and \code{expandRt} + might help to reduce the number of missing peak signals after filling. +} +\examples{ + +## Perform the peak detection using centWave on some of the files from the +## faahKO package. Files are read using the readMSData2 from the MSnbase +## package +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) +raw_data <- readMSData2(fls[1:2]) + +## Create a CentWaveParam object. Note that the noise is set to 10000 to +## speed up the execution of the example - in a real use case the default +## value should be used, or it should be set to a reasonable value. +cwp <- CentWaveParam(ppm = 20, noise = 10000, snthresh = 25) + +res <- findChromPeaks(raw_data, param = cwp) + +## Perform the correspondence. +res <- groupChromPeaks(res, param = PeakDensityParam()) + +## For how many features do we lack an integrated peak signal? +sum(is.na(featureValues(res))) + +## Filling missing peak data using default settings. +res <- fillChromPeaks(res) + +## Get the peaks that have been filled in: +fp <- chromPeaks(res)[chromPeaks(res)[, "is_filled"] == 1, ] +head(fp) + +## Did we get a signal for all missing peaks? +sum(is.na(featureValues(res))) + +## No. + +## Get the process history step along with the parameters used to perform +## The peak filling: +ph <- processHistory(res, type = "Missing peak filling")[[1]] +ph + +## The parameter class: +ph@param + +## Drop the filled in peaks: +res <- dropFilledChromPeaks(res) + +## Perform the peak filling with modified settings: allow expansion of the +## mz range by a specified ppm and expanding the mz range by mz width/2 +prm <- FillChromPeaksParam(ppm = 40, expandMz = 0.5) +res <- fillChromPeaks(res, param = prm) + +## Did we get a signal for all missing peaks? +sum(is.na(featureValues(res))) + +## Still the same missing peaks. +} +\seealso{ +\code{\link{groupChromPeaks}} for methods to perform the + correspondence. + \code{\link{dropFilledChromPeaks}} for the method to remove filled in peaks. +} +\author{ +Johannes Rainer +} diff --git a/man/fillPeaks.MSW-methods.Rd b/man/fillPeaks.MSW-methods.Rd index 18521b5d6..2e6d6eca6 100644 --- a/man/fillPeaks.MSW-methods.Rd +++ b/man/fillPeaks.MSW-methods.Rd @@ -27,6 +27,13 @@ end points of the other detected peaks. } +\note{ + In contrast to the \code{\link{fillPeaks.chrom}} method the maximum + intensity reported in column \code{"maxo"} is not the maximum + intensity measured in the expected peak area (defined by columns + \code{"mzmin"} and \code{"mzmax"}), but the largest intensity of mz + value(s) closest to the \code{"mzmed"} of the feature. +} \value{ A \code{xcmsSet} objects with filled in peak groups. } diff --git a/man/fillPeaks.chrom-methods.Rd b/man/fillPeaks.chrom-methods.Rd index 6ae615dd1..76d7ac4c9 100644 --- a/man/fillPeaks.chrom-methods.Rd +++ b/man/fillPeaks.chrom-methods.Rd @@ -12,19 +12,27 @@ \section{Methods}{ \describe{ \item{object = "xcmsSet"}{ - \code{fillPeaks.chrom(object, nSlaves=0,expand.mz=1,expand.rt=1)} + \code{fillPeaks.chrom(object, nSlaves=0,expand.mz=1,expand.rt=1, + BPPARAM = bpparam())} } }} \arguments{ \item{object}{the \code{xcmsSet} object} - \item{nSlaves}{number of slaves/cores to be used for parallel peak filling. + \item{nSlaves}{(DEPRECATED): number of slaves/cores to be used for + parallel peak filling. MPI is used if installed, otherwise the snow package is employed for multicore support. If none of the two packages is available it uses the parallel package for parallel processing on multiple CPUs of the - current machine.} + current machine. Users are advised to use the \code{BPPARAM} + parameter instead.} \item{expand.mz}{Expansion factor for the m/z range used for integration.} - \item{expand.rt}{Expansion factor for the rentention time range used for integration.} + \item{expand.rt}{Expansion factor for the rentention time range used + for integration.} + \item{BPPARAM}{allows to define a specific parallel processing setup + for the current task (see \code{\link[BiocParallel]{bpparam}} from the + \code{BiocParallel} package help more information). The default uses + the globally defined parallel setup.} } \details{ After peak grouping, there will always be peak groups that do not diff --git a/man/featureDetection-centWave.Rd b/man/findChromPeaks-centWave.Rd similarity index 59% rename from man/featureDetection-centWave.Rd rename to man/findChromPeaks-centWave.Rd index 3c1cf44e6..9a9bacfb1 100644 --- a/man/featureDetection-centWave.Rd +++ b/man/findChromPeaks-centWave.Rd @@ -1,78 +1,75 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-OnDiskMSnExp.R, R/methods-Params.R +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R \docType{class} -\name{featureDetection-centWave} -\alias{CentWaveParam} -\alias{CentWaveParam-class} +\name{findChromPeaks-centWave} +\alias{findChromPeaks-centWave} \alias{centWave} -\alias{detectFeatures,MSnExp,CentWaveParam-method} -\alias{detectFeatures,OnDiskMSnExp,CentWaveParam-method} -\alias{featureDetection-centWave} -\alias{firstBaselineCheck} -\alias{firstBaselineCheck,CentWaveParam-method} -\alias{firstBaselineCheck<-} -\alias{firstBaselineCheck<-,CentWaveParam-method} -\alias{fitgauss} -\alias{fitgauss,CentWaveParam-method} -\alias{fitgauss<-} -\alias{fitgauss<-,CentWaveParam-method} -\alias{integrate,CentWaveParam-method} -\alias{integrate<-} -\alias{integrate<-,CentWaveParam-method} -\alias{mzCenterFun} +\alias{CentWaveParam-class} +\alias{CentWaveParam} +\alias{findChromPeaks,OnDiskMSnExp,CentWaveParam-method} +\alias{show,CentWaveParam-method} +\alias{ppm,CentWaveParam-method} +\alias{ppm} +\alias{ppm<-,CentWaveParam-method} +\alias{ppm<-} +\alias{peakwidth,CentWaveParam-method} +\alias{peakwidth} +\alias{peakwidth<-,CentWaveParam-method} +\alias{peakwidth<-} +\alias{snthresh,CentWaveParam-method} +\alias{snthresh} +\alias{snthresh<-,CentWaveParam-method} +\alias{snthresh<-} +\alias{prefilter,CentWaveParam-method} +\alias{prefilter} +\alias{prefilter<-,CentWaveParam-method} +\alias{prefilter<-} \alias{mzCenterFun,CentWaveParam-method} -\alias{mzCenterFun<-} +\alias{mzCenterFun} \alias{mzCenterFun<-,CentWaveParam-method} -\alias{mzdiff} +\alias{mzCenterFun<-} +\alias{integrate,CentWaveParam-method} +\alias{integrate<-,CentWaveParam-method} +\alias{integrate<-} \alias{mzdiff,CentWaveParam-method} -\alias{mzdiff<-} +\alias{mzdiff} \alias{mzdiff<-,CentWaveParam-method} -\alias{noise} +\alias{mzdiff<-} +\alias{fitgauss,CentWaveParam-method} +\alias{fitgauss} +\alias{fitgauss<-,CentWaveParam-method} +\alias{fitgauss<-} \alias{noise,CentWaveParam-method} -\alias{noise<-} +\alias{noise} \alias{noise<-,CentWaveParam-method} -\alias{peakwidth} -\alias{peakwidth,CentWaveParam-method} -\alias{peakwidth<-} -\alias{peakwidth<-,CentWaveParam-method} -\alias{ppm} -\alias{ppm,CentWaveParam-method} -\alias{ppm<-} -\alias{ppm<-,CentWaveParam-method} -\alias{prefilter} -\alias{prefilter,CentWaveParam-method} -\alias{prefilter<-} -\alias{prefilter<-,CentWaveParam-method} -\alias{roiList} +\alias{noise<-} +\alias{verboseColumns,CentWaveParam-method} +\alias{verboseColumns} +\alias{verboseColumns<-,CentWaveParam-method} +\alias{verboseColumns<-} \alias{roiList,CentWaveParam-method} -\alias{roiList<-} +\alias{roiList} \alias{roiList<-,CentWaveParam-method} -\alias{roiScales} +\alias{roiList<-} +\alias{firstBaselineCheck,CentWaveParam-method} +\alias{firstBaselineCheck} +\alias{firstBaselineCheck<-,CentWaveParam-method} +\alias{firstBaselineCheck<-} \alias{roiScales,CentWaveParam-method} -\alias{roiScales<-} +\alias{roiScales} \alias{roiScales<-,CentWaveParam-method} -\alias{show,CentWaveParam-method} -\alias{snthresh} -\alias{snthresh,CentWaveParam-method} -\alias{snthresh<-} -\alias{snthresh<-,CentWaveParam-method} -\alias{verboseColumns} -\alias{verboseColumns,CentWaveParam-method} -\alias{verboseColumns<-} -\alias{verboseColumns<-,CentWaveParam-method} -\title{Feature detection using the centWave method} +\alias{roiScales<-} +\title{Chromatographic peak detection using the centWave method} \usage{ CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, roiList = list(), firstBaselineCheck = TRUE, roiScales = numeric()) -\S4method{detectFeatures}{OnDiskMSnExp,CentWaveParam}(object, param, +\S4method{findChromPeaks}{OnDiskMSnExp,CentWaveParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{detectFeatures}{MSnExp,CentWaveParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{CentWaveParam}(object) \S4method{ppm}{CentWaveParam}(object) @@ -128,65 +125,70 @@ CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, \S4method{roiScales}{CentWaveParam}(object) <- value } \arguments{ -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} -\item{peakwidth}{numeric(2) with the expected approximate -feature/peak width in chromatographic space. Given as a range (min, max) +\item{peakwidth}{\code{numeric(2)} with the expected approximate +peak width in chromatographic space. Given as a range (min, max) in seconds.} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{prefilter}{numeric(2): \code{c(k, I)} specifying the prefilter +\item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter step for the first analysis step (ROI detection). Mass traces are only -retained if they contain at least \code{k} peaks with intensity \code{>= I}.} +retained if they contain at least \code{k} peaks with intensity +\code{>= I}.} \item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} \item{integrate}{Integration method. For \code{integrate = 1} peak limits are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} -\item{noise}{numeric(1) allowing to set a minimum intensity required +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity \code{< noise} are omitted from ROI detection).} -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} \item{roiList}{An optional list of regions-of-interest (ROI) representing detected mass traces. If ROIs are submitted the first analysis step is -omitted and feature detection is performed on the submitted ROIs. Each -ROI is expected to have the following elements specified: +omitted and chromatographic peak detection is performed on the submitted +ROIs. Each ROI is expected to have the following elements specified: \code{scmin} (start scan index), \code{scmax} (end scan index), \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} -(number of scans), \code{intensity} (summed intensity). Each ROI should be -represented by a \code{list} of elements or a single row \code{data.frame}.} +(number of scans), \code{intensity} (summed intensity). Each ROI should +be represented by a \code{list} of elements or a single row +\code{data.frame}.} -\item{firstBaselineCheck}{logical(1). If \code{TRUE} continuous +\item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous data within regions of interest is checked to be above the first baseline.} \item{roiScales}{Optional numeric vector with length equal to \code{roiList} -defining the scale for each region of interest in \code{roiList} that should -be used for the centWave-wavelets.} +defining the scale for each region of interest in \code{roiList} that +should be used for the centWave-wavelets.} -\item{object}{For \code{detectFeatures}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -196,11 +198,11 @@ centWave algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, feature detection is performed in parallel on several +processing is enables, peak detection is performed in parallel on several of the input samples.} \item{return.type}{Character specifying what type of object the method should -return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} \item{value}{The value for the slot.} @@ -209,86 +211,81 @@ return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or } \value{ The \code{CentWaveParam} function returns a \code{CentWaveParam} -class instance with all of the settings specified for feature detection by -the centWave method. +class instance with all of the settings specified for chromatographic peak +detection by the centWave method. -For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the feature detection. +For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +\code{\link{XCMSnExp}} object with the results of the peak detection. If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified features/peaks. +samples with matrices specifying the identified peaks. If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the feature detection. +with the results of the peak detection. } \description{ The centWave algorithm perform peak density and wavelet based -feature detection for high resolution LC/MS data in centroid -mode [Tautenhahn 2008]. + chromatographic peak detection for high resolution LC/MS data in centroid + mode [Tautenhahn 2008]. -The \code{CentWaveParam} class allows to specify all settings for -a feature detection using the centWave method. Instances should be created -with the \code{CentWaveParam} constructor. +The \code{CentWaveParam} class allows to specify all settings + for a chromatographic peak detection using the centWave method. Instances + should be created with the \code{CentWaveParam} constructor. -The \code{detectFeatures,OnDiskMSnExp,CentWaveParam} method -performs feature detection using the \emph{centWave} algorithm on all -samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +The \code{detectChromPeaks,OnDiskMSnExp,CentWaveParam} method +performs chromatographic peak detection using the \emph{centWave} algorithm +on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{detectFeatures,MSnExp,CentWaveParam} method performs -feature detection using the \emph{centWave} algorithm on all samples from -an \code{\link[MSnbase]{MSnExp}} object. These objects contain mz and -intensity values of all spectra hence no additional data input from the -original files is required. - \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} -slot of the object. + slot of the object. \code{peakwidth},\code{peakwidth<-}: getter and setter for the -\code{peakwidth} slot of the object. + \code{peakwidth} slot of the object. \code{snthresh},\code{snthresh<-}: getter and setter for the -\code{snthresh} slot of the object. + \code{snthresh} slot of the object. \code{prefilter},\code{prefilter<-}: getter and setter for the -\code{prefilter} slot of the object. + \code{prefilter} slot of the object. \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the -\code{mzCenterFun} slot of the object. + \code{mzCenterFun} slot of the object. \code{integrate},\code{integrate<-}: getter and setter for the -\code{integrate} slot of the object. + \code{integrate} slot of the object. \code{mzdiff},\code{mzdiff<-}: getter and setter for the -\code{mzdiff} slot of the object. + \code{mzdiff} slot of the object. \code{fitgauss},\code{fitgauss<-}: getter and setter for the -\code{fitgauss} slot of the object. + \code{fitgauss} slot of the object. \code{noise},\code{noise<-}: getter and setter for the -\code{noise} slot of the object. + \code{noise} slot of the object. \code{verboseColumns},\code{verboseColumns<-}: getter and -setter for the \code{verboseColumns} slot of the object. + setter for the \code{verboseColumns} slot of the object. \code{roiList},\code{roiList<-}: getter and setter for the -\code{roiList} slot of the object. + \code{roiList} slot of the object. \code{fistBaselineCheck},\code{firstBaselineCheck<-}: getter -and setter for the \code{firstBaselineCheck} slot of the object. + and setter for the \code{firstBaselineCheck} slot of the object. \code{roiScales},\code{roiScales<-}: getter and setter for the -\code{roiScales} slot of the object. + \code{roiScales} slot of the object. } \details{ The centWave algorithm is most suitable for high resolution -LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase the -method identifies \emph{regions of interest} (ROIs) representing mass traces -that are characterized as regions with less than \code{ppm} m/z deviation in -consecutive scans in the LC/MS map. These ROIs are then subsequently -analyzed using continuous wavelet transform (CWT) to locate chromatographic -peaks on different scales. The first analysis step is skipped, if regions -of interest are passed \emph{via} the \code{param} parameter. + LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase + the method identifies \emph{regions of interest} (ROIs) representing + mass traces that are characterized as regions with less than \code{ppm} + m/z deviation in consecutive scans in the LC/MS map. These ROIs are + then subsequently analyzed using continuous wavelet transform (CWT) + to locate chromatographic peaks on different scales. The first analysis + step is skipped, if regions of interest are passed \emph{via} the + \code{param} parameter. Parallel processing (one process per sample) is supported and can be configured either by the \code{BPPARAM} parameter or by globally defining @@ -302,13 +299,15 @@ method from the \code{BiocParallel} package. the version from the class. Slots values should exclusively be accessed \emph{via} the corresponding getter and setter methods listed above.} }} + \note{ These methods and classes are part of the updated and modernized -\code{xcms} user interface which will eventually replace the -\code{\link{findPeaks}} methods. It supports feature detection on -\code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -objects (both defined in the \code{MSnbase} package). All of the settings -to the centWave algorithm can be passed with a \code{CentWaveParam} object. + \code{xcms} user interface which will eventually replace the + \code{\link{findPeaks}} methods. It supports peak detection on + \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} + objects (both defined in the \code{MSnbase} package). All of the settings + to the centWave algorithm can be passed with a \code{CentWaveParam} + object. } \examples{ @@ -320,22 +319,18 @@ cwp <- CentWaveParam(ppm = 20, noise = 10000) snthresh(cwp) <- 25 cwp -## Perform the feature detection using centWave on some of the files from the +## Perform the peak detection using centWave on some of the files from the ## faahKO package. Files are read using the readMSData2 from the MSnbase ## package library(faahKO) -library(MSnbase) +library(xcms) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) raw_data <- readMSData2(fls[1:2]) -## Perform the feature detection using the settings defined above. We're -## returning the results as an xcmsSet object. -res <- detectFeatures(raw_data, param = cwp, return.type = "xcmsSet") -head(peaks(res)) -} -\author{ -Ralf Tautenhahn, Johannes Rainer +## Perform the peak detection using the settings defined above. +res <- findChromPeaks(raw_data, param = cwp) +head(chromPeaks(res)) } \references{ Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly @@ -343,16 +338,18 @@ sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics} 2008, 9:504 } \seealso{ -The \code{\link{do_detectFeatures_centWave}} core API function and -\code{\link{findPeaks.centWave}} for the old user interface. +The \code{\link{do_findChromPeaks_centWave}} core API function and + \code{\link{findPeaks.centWave}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the feature detection. +the peak detection. -Other feature detection methods: \code{\link{detectFeatures}}, - \code{\link{featureDetection-MSW}}, - \code{\link{featureDetection-centWaveWithPredIsoROIs}}, - \code{\link{featureDetection-massifquant}}, - \code{\link{featureDetection-matchedFilter}} +Other peak detection methods: \code{\link{chromatographic-peak-detection}}, + \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, + \code{\link{findChromPeaks-massifquant}}, + \code{\link{findChromPeaks-matchedFilter}}, + \code{\link{findPeaks-MSW}} +} +\author{ +Ralf Tautenhahn, Johannes Rainer } - diff --git a/man/findChromPeaks-centWaveWithPredIsoROIs.Rd b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd new file mode 100644 index 000000000..a86b35f24 --- /dev/null +++ b/man/findChromPeaks-centWaveWithPredIsoROIs.Rd @@ -0,0 +1,268 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R +\docType{class} +\name{findChromPeaks-centWaveWithPredIsoROIs} +\alias{findChromPeaks-centWaveWithPredIsoROIs} +\alias{centWaveWithPredIsoROIs} +\alias{CentWavePredIsoParam-class} +\alias{CentWavePredIsoParam} +\alias{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam-method} +\alias{show,CentWavePredIsoParam-method} +\alias{snthreshIsoROIs,CentWavePredIsoParam-method} +\alias{snthreshIsoROIs} +\alias{snthreshIsoROIs<-,CentWavePredIsoParam-method} +\alias{snthreshIsoROIs<-} +\alias{maxCharge,CentWavePredIsoParam-method} +\alias{maxCharge} +\alias{maxCharge<-,CentWavePredIsoParam-method} +\alias{maxCharge<-} +\alias{maxIso,CentWavePredIsoParam-method} +\alias{maxIso} +\alias{maxIso<-,CentWavePredIsoParam-method} +\alias{maxIso<-} +\alias{mzIntervalExtension,CentWavePredIsoParam-method} +\alias{mzIntervalExtension} +\alias{mzIntervalExtension<-,CentWavePredIsoParam-method} +\alias{mzIntervalExtension<-} +\alias{polarity,CentWavePredIsoParam-method} +\alias{polarity<-,CentWavePredIsoParam-method} +\alias{polarity<-} +\title{Two-step centWave peak detection considering also isotopes} +\usage{ +CentWavePredIsoParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, + prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, + mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE, + roiList = list(), firstBaselineCheck = TRUE, roiScales = numeric(), + snthreshIsoROIs = 6.25, maxCharge = 3, maxIso = 5, + mzIntervalExtension = TRUE, polarity = "unknown") + +\S4method{findChromPeaks}{OnDiskMSnExp,CentWavePredIsoParam}(object, param, + BPPARAM = bpparam(), return.type = "XCMSnExp") + +\S4method{show}{CentWavePredIsoParam}(object) + +\S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) + +\S4method{snthreshIsoROIs}{CentWavePredIsoParam}(object) <- value + +\S4method{maxCharge}{CentWavePredIsoParam}(object) + +\S4method{maxCharge}{CentWavePredIsoParam}(object) <- value + +\S4method{maxIso}{CentWavePredIsoParam}(object) + +\S4method{maxIso}{CentWavePredIsoParam}(object) <- value + +\S4method{mzIntervalExtension}{CentWavePredIsoParam}(object) + +\S4method{mzIntervalExtension}{CentWavePredIsoParam}(object) <- value + +\S4method{polarity}{CentWavePredIsoParam}(object) + +\S4method{polarity}{CentWavePredIsoParam}(object) <- value +} +\arguments{ +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} + +\item{peakwidth}{\code{numeric(2)} with the expected approximate +peak width in chromatographic space. Given as a range (min, max) +in seconds.} + +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} + +\item{prefilter}{\code{numeric(2)}: \code{c(k, I)} specifying the prefilter +step for the first analysis step (ROI detection). Mass traces are only +retained if they contain at least \code{k} peaks with intensity +\code{>= I}.} + +\item{mzCenterFun}{Name of the function to calculate the m/z center of the +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} + +\item{integrate}{Integration method. For \code{integrate = 1} peak limits +are found through descent on the mexican hat filtered data, for +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} + +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} + +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} + +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required +for centroids to be considered in the first analysis step (centroids with +intensity \code{< noise} are omitted from ROI detection).} + +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} + +\item{roiList}{An optional list of regions-of-interest (ROI) representing +detected mass traces. If ROIs are submitted the first analysis step is +omitted and chromatographic peak detection is performed on the submitted +ROIs. Each ROI is expected to have the following elements specified: +\code{scmin} (start scan index), \code{scmax} (end scan index), +\code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length} +(number of scans), \code{intensity} (summed intensity). Each ROI should +be represented by a \code{list} of elements or a single row +\code{data.frame}.} + +\item{firstBaselineCheck}{\code{logical(1)}. If \code{TRUE} continuous +data within regions of interest is checked to be above the first baseline.} + +\item{roiScales}{Optional numeric vector with length equal to \code{roiList} +defining the scale for each region of interest in \code{roiList} that +should be used for the centWave-wavelets.} + +\item{snthreshIsoROIs}{\code{numeric(1)} defining the signal to noise ratio +cutoff to be used in the second centWave run to identify peaks for +predicted isotope ROIs.} + +\item{maxCharge}{\code{integer(1)} defining the maximal isotope charge. +Isotopes will be defined for charges \code{1:maxCharge}.} + +\item{maxIso}{\code{integer(1)} defining the number of isotope peaks that +should be predicted for each peak identified in the first centWave run.} + +\item{mzIntervalExtension}{\code{logical(1)} whether the mz range for the +predicted isotope ROIs should be extended to increase detection of low +intensity peaks.} + +\item{polarity}{\code{character(1)} specifying the polarity of the data. +Currently not used, but has to be \code{"positive"}, \code{"negative"} or +\code{"unknown"} if provided.} + +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. + +For all other methods: a parameter object.} + +\item{param}{An \code{CentWavePredIsoParam} object with the settings for the +chromatographic peak detection algorithm.} + +\item{BPPARAM}{A parameter class specifying if and how parallel processing +should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. +See documentation of the \code{BiocParallel} for more details. If parallel +processing is enables, peak detection is performed in parallel on several +of the input samples.} + +\item{return.type}{Character specifying what type of object the method should +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or +\code{"xcmsSet"}.} + +\item{value}{The value for the slot.} +} +\value{ +The \code{CentWavePredIsoParam} function returns a +\code{CentWavePredIsoParam} class instance with all of the settings +specified for the two-step centWave-based peak detection considering also +isotopes. + +For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +\code{\link{XCMSnExp}} object with the results of the peak detection. +If \code{return.type = "list"} a list of length equal to the number of +samples with matrices specifying the identified peaks. +If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object +with the results of the peak detection. +} +\description{ +This method performs a two-step centWave-based chromatographic + peak detection: in a first centWave run peaks are identified for which + then the location of their potential isotopes in the mz-retention time is + predicted. A second centWave run is then performed on these + \emph{regions of interest} (ROIs). The final list of chromatographic + peaks comprises all non-overlapping peaks from both centWave runs. + +The \code{CentWavePredIsoParam} class allows to specify all + settings for the two-step centWave-based peak detection considering also + predicted isotopes of peaks identified in the first centWave run. + Instances should be created with the \code{CentWavePredIsoParam} + constructor. See also the documentation of the + \code{\link{CentWaveParam}} for all methods and arguments this class + inherits. + +The \code{findChromPeaks,OnDiskMSnExp,CentWavePredIsoParam} method +performs a two-step centWave-based chromatographic peak detection on all +samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +\code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific +data and load the spectra data (mz and intensity values) on the fly from +the original files applying also all eventual data manipulations. + +\code{snthreshIsoROIs},\code{snthreshIsoROIs<-}: getter and + setter for the \code{snthreshIsoROIs} slot of the object. + +\code{maxCharge},\code{maxCharge<-}: getter and + setter for the \code{maxCharge} slot of the object. + +\code{maxIso},\code{maxIso<-}: getter and + setter for the \code{maxIso} slot of the object. + +\code{mzIntervalExtension},\code{mzIntervalExtension<-}: getter + and setter for the \code{mzIntervalExtension} slot of the object. + +\code{polarity},\code{polarity<-}: getter and + setter for the \code{polarity} slot of the object. +} +\details{ +See \code{\link{centWave}} for details on the centWave method. + +Parallel processing (one process per sample) is supported and can +be configured either by the \code{BPPARAM} parameter or by globally defining +the parallel processing mode using the \code{\link[BiocParallel]{register}} +method from the \code{BiocParallel} package. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{findPeaks}} methods. It supports chromatographic peak + detection on \code{\link[MSnbase]{MSnExp}} and + \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the + \code{MSnbase} package). All of the settings to the algorithm can be + passed with a \code{CentWavePredIsoParam} object. +} +\examples{ + +## Create a param object +p <- CentWavePredIsoParam(maxCharge = 4) +## Change snthresh parameter +snthresh(p) <- 25 +p + +} +\seealso{ +The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core + API function and \code{\link{findPeaks.centWave}} for the old user + interface. \code{\link{CentWaveParam}} for the class the + \code{CentWavePredIsoParam} extends. + +\code{\link{XCMSnExp}} for the object containing the results of +the peak detection. + +Other peak detection methods: \code{\link{chromatographic-peak-detection}}, + \code{\link{findChromPeaks-centWave}}, + \code{\link{findChromPeaks-massifquant}}, + \code{\link{findChromPeaks-matchedFilter}}, + \code{\link{findPeaks-MSW}} +} +\author{ +Hendrik Treutler, Johannes Rainer +} diff --git a/man/featureDetection-massifquant.Rd b/man/findChromPeaks-massifquant.Rd similarity index 53% rename from man/featureDetection-massifquant.Rd rename to man/findChromPeaks-massifquant.Rd index 590b9da1a..ee03f4bd2 100644 --- a/man/featureDetection-massifquant.Rd +++ b/man/findChromPeaks-massifquant.Rd @@ -1,55 +1,55 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-OnDiskMSnExp.R, R/methods-Params.R +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R \docType{class} -\name{featureDetection-massifquant} -\alias{MassifquantParam} -\alias{MassifquantParam-class} -\alias{checkBack} -\alias{checkBack,MassifquantParam-method} -\alias{checkBack<-} -\alias{checkBack<-,MassifquantParam-method} -\alias{consecMissedLimit} -\alias{consecMissedLimit,MassifquantParam-method} -\alias{consecMissedLimit<-} -\alias{consecMissedLimit<-,MassifquantParam-method} -\alias{criticalValue} -\alias{criticalValue,MassifquantParam-method} -\alias{criticalValue<-} -\alias{criticalValue<-,MassifquantParam-method} -\alias{detectFeatures,MSnExp,MassifquantParam-method} -\alias{detectFeatures,OnDiskMSnExp,MassifquantParam-method} -\alias{featureDetection-massifquant} -\alias{fitgauss,MassifquantParam-method} -\alias{fitgauss<-,MassifquantParam-method} -\alias{integrate,MassifquantParam-method} -\alias{integrate<-,MassifquantParam-method} +\name{findChromPeaks-massifquant} +\alias{findChromPeaks-massifquant} \alias{massifquant} +\alias{MassifquantParam-class} +\alias{MassifquantParam} +\alias{findChromPeaks,OnDiskMSnExp,MassifquantParam-method} +\alias{show,MassifquantParam-method} +\alias{ppm,MassifquantParam-method} +\alias{ppm<-,MassifquantParam-method} +\alias{peakwidth,MassifquantParam-method} +\alias{peakwidth<-,MassifquantParam-method} +\alias{snthresh,MassifquantParam-method} +\alias{snthresh<-,MassifquantParam-method} +\alias{prefilter,MassifquantParam-method} +\alias{prefilter<-,MassifquantParam-method} \alias{mzCenterFun,MassifquantParam-method} \alias{mzCenterFun<-,MassifquantParam-method} +\alias{integrate,MassifquantParam-method} +\alias{integrate<-,MassifquantParam-method} \alias{mzdiff,MassifquantParam-method} \alias{mzdiff<-,MassifquantParam-method} +\alias{fitgauss,MassifquantParam-method} +\alias{fitgauss<-,MassifquantParam-method} \alias{noise,MassifquantParam-method} \alias{noise<-,MassifquantParam-method} -\alias{peakwidth,MassifquantParam-method} -\alias{peakwidth<-,MassifquantParam-method} -\alias{ppm,MassifquantParam-method} -\alias{ppm<-,MassifquantParam-method} -\alias{prefilter,MassifquantParam-method} -\alias{prefilter<-,MassifquantParam-method} -\alias{show,MassifquantParam-method} -\alias{snthresh,MassifquantParam-method} -\alias{snthresh<-,MassifquantParam-method} -\alias{unions} -\alias{unions,MassifquantParam-method} -\alias{unions<-} -\alias{unions<-,MassifquantParam-method} \alias{verboseColumns,MassifquantParam-method} \alias{verboseColumns<-,MassifquantParam-method} -\alias{withWave} +\alias{criticalValue,MassifquantParam-method} +\alias{criticalValue} +\alias{criticalValue<-,MassifquantParam-method} +\alias{criticalValue<-} +\alias{consecMissedLimit,MassifquantParam-method} +\alias{consecMissedLimit} +\alias{consecMissedLimit<-,MassifquantParam-method} +\alias{consecMissedLimit<-} +\alias{unions,MassifquantParam-method} +\alias{unions} +\alias{unions<-,MassifquantParam-method} +\alias{unions<-} +\alias{checkBack,MassifquantParam-method} +\alias{checkBack} +\alias{checkBack<-,MassifquantParam-method} +\alias{checkBack<-} \alias{withWave,MassifquantParam-method} -\alias{withWave<-} +\alias{withWave} \alias{withWave<-,MassifquantParam-method} -\title{Feature detection using the massifquant method} +\alias{withWave<-} +\title{Chromatographic peak detection using the massifquant method} \usage{ MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L, @@ -57,12 +57,9 @@ MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, criticalValue = 1.125, consecMissedLimit = 2, unions = 1, checkBack = 0, withWave = FALSE) -\S4method{detectFeatures}{OnDiskMSnExp,MassifquantParam}(object, param, +\S4method{findChromPeaks}{OnDiskMSnExp,MassifquantParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{detectFeatures}{MSnExp,MassifquantParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{MassifquantParam}(object) \S4method{ppm}{MassifquantParam}(object) @@ -126,94 +123,97 @@ MassifquantParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10, \S4method{withWave}{MassifquantParam}(object) <- value } \arguments{ -\item{ppm}{Maximal tolerated m/z deviation in consecutive scans in parts -per million (ppm).} +\item{ppm}{\code{numeric(1)} defining the maximal tolerated m/z deviation in +consecutive scans in parts per million (ppm) for the initial ROI +definition.} -\item{peakwidth}{numeric(2). Only the first element is used by -massifquant, which specifices the minimum feature length in time scans. +\item{peakwidth}{\code{numeric(2)}. Only the first element is used by +massifquant, which specifices the minimum peak length in time scans. For \code{withWave = TRUE} the second argument represents the maximum -feature length subject to being greater than the mininum feature length -(see also documentation of \code{\link{do_detectFeatures_centWave}}).} +peak length subject to being greater than the mininum peak length +(see also documentation of \code{\link{do_findChromPeaks_centWave}}).} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{prefilter}{numeric(2). The first argument is only used -if (\code{withWave = TRUE}); see \code{\link{do_detectFeatures_centWave}} +\item{prefilter}{\code{numeric(2)}. The first argument is only used +if (\code{withWave = TRUE}); see \code{\link{findChromPeaks-centWave}} for details. The second argument specifies the minimum threshold for the -maximum intensity of a feature that must be met.} +maximum intensity of a chromatographic peak that must be met.} \item{mzCenterFun}{Name of the function to calculate the m/z center of the -feature. Allowed are: \code{"wMean"}: intensity weighted mean of the feature's -m/z values, \code{"mean"}: mean of the feature's m/z values, \code{"apex"}: -use the m/z value at the peak apex, \code{"wMeanApex3"}: intensity weighted -mean of the m/z value at the peak apex and the m/z values left and right of -it and \code{"meanApex3"}: mean of the m/z value of the peak apex and the -m/z values left and right of it.} +chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted +mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z +values, \code{"apex"}: use the m/z value at the peak apex, +\code{"wMeanApex3"}: intensity weighted mean of the m/z value at the +peak apex and the m/z values left and right of it and \code{"meanApex3"}: +mean of the m/z value of the peak apex and the m/z values left and right +of it.} \item{integrate}{Integration method. For \code{integrate = 1} peak limits are found through descent on the mexican hat filtered data, for -\code{integrate = 2} the descent is done on the real data. The latter method -is more accurate but prone to noise, while the former is more robust, but -less exact.} +\code{integrate = 2} the descent is done on the real data. The latter +method is more accurate but prone to noise, while the former is more +robust, but less exact.} -\item{mzdiff}{Numeric representing the minimum difference in m/z dimension -for peaks with overlapping retention times; can be negatove to allow overlap.} +\item{mzdiff}{\code{numeric(1)} representing the minimum difference in m/z +dimension for peaks with overlapping retention times; can be negatove to +allow overlap.} -\item{fitgauss}{Logical whether or not a Gaussian should be fitted to each -peak.} +\item{fitgauss}{\code{logical(1)} whether or not a Gaussian should be fitted +to each peak.} -\item{noise}{numeric(1) allowing to set a minimum intensity required +\item{noise}{\code{numeric(1)} allowing to set a minimum intensity required for centroids to be considered in the first analysis step (centroids with intensity \code{< noise} are omitted from ROI detection).} -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} -\item{criticalValue}{numeric(1). Suggested values: +\item{criticalValue}{\code{numeric(1)}. Suggested values: (\code{0.1-3.0}). This setting helps determine the the Kalman Filter prediciton margin of error. A real centroid belonging to a bonafide -feature must fall within the KF prediction margin of error. Much like +peak must fall within the KF prediction margin of error. Much like in the construction of a confidence interval, \code{criticalVal} loosely translates to be a multiplier of the standard error of the prediction -reported by the Kalman Filter. If the features in the XC-MS sample have +reported by the Kalman Filter. If the peak in the XC-MS sample have a small mass deviance in ppm error, a smaller critical value might be better and vice versa.} -\item{consecMissedLimit}{Integer: Suggested values: (\code{1,2,3}). While -a feature is in the proces of being detected by a Kalman Filter, the +\item{consecMissedLimit}{\code{integer(1)} Suggested values: (\code{1,2,3}). +While a peak is in the proces of being detected by a Kalman Filter, the Kalman Filter may not find a predicted centroid in every scan. After 1 or more consecutive failed predictions, this setting informs Massifquant -when to stop a Kalman Filter from following a candidate feature.} +when to stop a Kalman Filter from following a candidate peak.} -\item{unions}{Integer: set to \code{1} if apply t-test union on +\item{unions}{\code{integer(1)} set to \code{1} if apply t-test union on segmentation; set to \code{0} if no t-test to be applied on -chromatographically continous features sharing same m/z range. +chromatographically continous peaks sharing same m/z range. Explanation: With very few data points, sometimes a Kalman Filter stops -tracking a feature prematurely. Another Kalman Filter is instantiated +tracking a peak prematurely. Another Kalman Filter is instantiated and begins following the rest of the signal. Because tracking is done -backwards to forwards, this algorithmic defect leaves a real feature +backwards to forwards, this algorithmic defect leaves a real peak divided into two segments or more. With this option turned on, the -program identifies segmented features and combines them (merges them) +program identifies segmented peaks and combines them (merges them) into one with a two sample t-test. The potential danger of this option -is that some truly distinct features may be merged.} - -\item{checkBack}{Integer: set to \code{1} if turned on; set to \code{0} -if turned off. The convergence of a Kalman Filter to a feature's precise -m/z mapping is very fast, but sometimes it incorporates erroneous centroids -as part of a feature (especially early on). The \code{scanBack} option is an -attempt to remove the occasional outlier that lies beyond the converged -bounds of the Kalman Filter. The option does not directly affect -identification of a feature because it is a postprocessing measure; it -has not shown to be a extremely useful thus far and the default is set +is that some truly distinct peaks may be merged.} + +\item{checkBack}{\code{integer(1)} set to \code{1} if turned on; set to +\code{0} if turned off. The convergence of a Kalman Filter to a peak's +precise m/z mapping is very fast, but sometimes it incorporates erroneous +centroids as part of a peak (especially early on). The \code{scanBack} +option is an attempt to remove the occasional outlier that lies beyond +the converged bounds of the Kalman Filter. The option does not directly +affect identification of a peak because it is a postprocessing measure; +it has not shown to be a extremely useful thus far and the default is set to being turned off.} -\item{withWave}{Logical: if \code{TRUE}, the features identified first +\item{withWave}{\code{logical(1)} if \code{TRUE}, the peaks identified first with Massifquant are subsequently filtered with the second step of the centWave algorithm, which includes wavelet estimation.} -\item{object}{For \code{detectFeatures}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -223,11 +223,11 @@ the massifquant algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, feature detection is performed in parallel on several +processing is enables, peak detection is performed in parallel on several of the input samples.} \item{return.type}{Character specifying what type of object the method should -return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} \item{value}{The value for the slot.} @@ -236,103 +236,97 @@ return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or } \value{ The \code{MassifquantParam} function returns a \code{MassifquantParam} -class instance with all of the settings specified for feature detection by -the centWave method. +class instance with all of the settings specified for chromatographic peak +detection by the \emph{massifquant} method. -For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the feature detection. +For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +\code{\link{XCMSnExp}} object with the results of the peak detection. If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified features/peaks. +samples with matrices specifying the identified peaks. If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the feature detection. +with the results of the peak detection. } \description{ -Massifquant is a Kalman filter (KF)-based feature -detection for XC-MS data in centroid mode. The identified features -can be further refined with the \emph{centWave} method (see -\code{\link{do_detectFeatures_centWave}} for details on centWave) -by specifying \code{withWave = TRUE}. +Massifquant is a Kalman filter (KF)-based chromatographic peak + detection for XC-MS data in centroid mode. The identified peaks + can be further refined with the \emph{centWave} method (see + \code{\link{findChromPeaks-centWave}} for details on centWave) + by specifying \code{withWave = TRUE}. The \code{MassifquantParam} class allows to specify all -settings for a feature detection using the massifquant method eventually in -combination with the centWave algorithm. Instances should be created with -the \code{MassifquantParam} constructor. + settings for a chromatographic peak detection using the massifquant + method eventually in combination with the centWave algorithm. Instances + should be created with the \code{MassifquantParam} constructor. -The \code{detectFeatures,OnDiskMSnExp,MassifquantParam} -method performs feature detection using the \emph{massifquant} algorithm -on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. +The \code{findChromPeaks,OnDiskMSnExp,MassifquantParam} +method performs chromatographic peak detection using the \emph{massifquant} +algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{detectFeatures,MSnExp,MassifquantParam} method -performs feature detection using the \emph{massifquant} method on all -samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} -slot of the object. + slot of the object. \code{peakwidth},\code{peakwidth<-}: getter and setter for the -\code{peakwidth} slot of the object. + \code{peakwidth} slot of the object. \code{snthresh},\code{snthresh<-}: getter and setter for the -\code{snthresh} slot of the object. + \code{snthresh} slot of the object. \code{prefilter},\code{prefilter<-}: getter and setter for the -\code{prefilter} slot of the object. + \code{prefilter} slot of the object. \code{mzCenterFun},\code{mzCenterFun<-}: getter and setter for the -\code{mzCenterFun} slot of the object. + \code{mzCenterFun} slot of the object. \code{integrate},\code{integrate<-}: getter and setter for the -\code{integrate} slot of the object. + \code{integrate} slot of the object. \code{mzdiff},\code{mzdiff<-}: getter and setter for the -\code{mzdiff} slot of the object. + \code{mzdiff} slot of the object. \code{fitgauss},\code{fitgauss<-}: getter and setter for the -\code{fitgauss} slot of the object. + \code{fitgauss} slot of the object. \code{noise},\code{noise<-}: getter and setter for the -\code{noise} slot of the object. + \code{noise} slot of the object. \code{verboseColumns},\code{verboseColumns<-}: getter and -setter for the \code{verboseColumns} slot of the object. + setter for the \code{verboseColumns} slot of the object. \code{criticalValue},\code{criticalValue<-}: getter and -setter for the \code{criticalValue} slot of the object. + setter for the \code{criticalValue} slot of the object. \code{consecMissedLimit},\code{consecMissedLimit<-}: getter and -setter for the \code{consecMissedLimit} slot of the object. + setter for the \code{consecMissedLimit} slot of the object. \code{unions},\code{unions<-}: getter and -setter for the \code{unions} slot of the object. + setter for the \code{unions} slot of the object. \code{checkBack},\code{checkBack<-}: getter and -setter for the \code{checkBack} slot of the object. + setter for the \code{checkBack} slot of the object. \code{withWave},\code{withWave<-}: getter and -setter for the \code{withWave} slot of the object. + setter for the \code{withWave} slot of the object. } \details{ This algorithm's performance has been tested rigorously -on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. -Simultaneous kalman filters identify features and calculate their -area under the curve. The default parameters are set to operate on -a complex LC-MS Orbitrap sample. Users will find it useful to do some -simple exploratory data analysis to find out where to set a minimum -intensity, and identify how many scans an average feature spans. The -\code{consecMissedLimit} parameter has yielded good performance on -Orbitrap data when set to (\code{2}) and on TOF data it was found best -to be at (\code{1}). This may change as the algorithm has yet to be -tested on many samples. The \code{criticalValue} parameter is perhaps -most dificult to dial in appropriately and visual inspection of peak -identification is the best suggested tool for quick optimization. -The \code{ppm} and \code{checkBack} parameters have shown less influence -than the other parameters and exist to give users flexibility and -better accuracy. + on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode. + Simultaneous kalman filters identify chromatographic peaks and calculate + their area under the curve. The default parameters are set to operate on + a complex LC-MS Orbitrap sample. Users will find it useful to do some + simple exploratory data analysis to find out where to set a minimum + intensity, and identify how many scans an average peak spans. The + \code{consecMissedLimit} parameter has yielded good performance on + Orbitrap data when set to (\code{2}) and on TOF data it was found best + to be at (\code{1}). This may change as the algorithm has yet to be + tested on many samples. The \code{criticalValue} parameter is perhaps + most dificult to dial in appropriately and visual inspection of peak + identification is the best suggested tool for quick optimization. + The \code{ppm} and \code{checkBack} parameters have shown less influence + than the other parameters and exist to give users flexibility and + better accuracy. Parallel processing (one process per sample) is supported and can be configured either by the \code{BPPARAM} parameter or by globally defining @@ -346,14 +340,15 @@ method from the \code{BiocParallel} package. the version from the class. Slots values should exclusively be accessed \emph{via} the corresponding getter and setter methods listed above.} }} + \note{ These methods and classes are part of the updated and modernized -\code{xcms} user interface which will eventually replace the -\code{\link{findPeaks}} methods. It supports feature detection on -\code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -objects (both defined in the \code{MSnbase} package). All of the settings -to the massifquant and centWave algorithm can be passed with a -\code{MassifquantParam} object. + \code{xcms} user interface which will eventually replace the + \code{\link{findPeaks}} methods. It supports chromatographic peak + detection on \code{\link[MSnbase]{MSnExp}} and + \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the + \code{MSnbase} package). All of the settings to the massifquant and + centWave algorithm can be passed with a \code{MassifquantParam} object. } \examples{ @@ -363,7 +358,7 @@ mqp <- MassifquantParam() snthresh(mqp) <- 30 mqp -## Perform the feature detection using massifquant on the files from the +## Perform the peak detection using massifquant on the files from the ## faahKO package. Files are read using the readMSData2 from the MSnbase ## package library(faahKO) @@ -371,13 +366,9 @@ library(MSnbase) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) raw_data <- readMSData2(fls[1:2]) -## Perform the feature detection using the settings defined above. We're -## returning the results as an xcmsSet object. -res <- detectFeatures(raw_data, param = mqp, return.type = "xcmsSet") -head(peaks(res)) -} -\author{ -Christopher Conley, Johannes Rainer +## Perform the peak detection using the settings defined above. +res <- findChromPeaks(raw_data, param = mqp) +head(chromPeaks(res)) } \references{ Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT @@ -385,16 +376,18 @@ Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT detection" \emph{Bioinformatics} 2014, 30(18):2636-43. } \seealso{ -The \code{\link{do_detectFeatures_massifquant}} core API function -and \code{\link{findPeaks.massifquant}} for the old user interface. +The \code{\link{do_findChromPeaks_massifquant}} core API function + and \code{\link{findPeaks.massifquant}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the feature detection. +the peak detection. -Other feature detection methods: \code{\link{detectFeatures}}, - \code{\link{featureDetection-MSW}}, - \code{\link{featureDetection-centWaveWithPredIsoROIs}}, - \code{\link{featureDetection-centWave}}, - \code{\link{featureDetection-matchedFilter}} +Other peak detection methods: \code{\link{chromatographic-peak-detection}}, + \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, + \code{\link{findChromPeaks-centWave}}, + \code{\link{findChromPeaks-matchedFilter}}, + \code{\link{findPeaks-MSW}} +} +\author{ +Christopher Conley, Johannes Rainer } - diff --git a/man/featureDetection-matchedFilter.Rd b/man/findChromPeaks-matchedFilter.Rd similarity index 58% rename from man/featureDetection-matchedFilter.Rd rename to man/findChromPeaks-matchedFilter.Rd index dcdf15f48..9bac5e9ef 100644 --- a/man/featureDetection-matchedFilter.Rd +++ b/man/findChromPeaks-matchedFilter.Rd @@ -1,52 +1,52 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-OnDiskMSnExp.R, R/methods-Params.R +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R \docType{class} -\name{featureDetection-matchedFilter} -\alias{MatchedFilterParam} +\name{findChromPeaks-matchedFilter} +\alias{findChromPeaks-matchedFilter} +\alias{matchedFilter} \alias{MatchedFilterParam-class} -\alias{baseValue} -\alias{baseValue,MatchedFilterParam-method} -\alias{baseValue<-} -\alias{baseValue<-,MatchedFilterParam-method} -\alias{binSize} +\alias{MatchedFilterParam} +\alias{findChromPeaks,OnDiskMSnExp,MatchedFilterParam-method} +\alias{show,MatchedFilterParam-method} \alias{binSize,MatchedFilterParam-method} -\alias{binSize<-} +\alias{binSize} \alias{binSize<-,MatchedFilterParam-method} -\alias{detectFeatures,MSnExp,MatchedFilterParam-method} -\alias{detectFeatures,OnDiskMSnExp,MatchedFilterParam-method} -\alias{distance} +\alias{binSize<-} +\alias{impute,MatchedFilterParam-method} +\alias{impute<-,MatchedFilterParam-method} +\alias{impute<-} +\alias{baseValue,MatchedFilterParam-method} +\alias{baseValue} +\alias{baseValue<-,MatchedFilterParam-method} +\alias{baseValue<-} \alias{distance,MatchedFilterParam-method} -\alias{distance<-} +\alias{distance} \alias{distance<-,MatchedFilterParam-method} -\alias{featureDetection-matchedFilter} -\alias{fwhm} +\alias{distance<-} \alias{fwhm,MatchedFilterParam-method} -\alias{fwhm<-} +\alias{fwhm} \alias{fwhm<-,MatchedFilterParam-method} -\alias{impute,MatchedFilterParam-method} -\alias{impute<-} -\alias{impute<-,MatchedFilterParam-method} -\alias{index} -\alias{index,MatchedFilterParam-method} -\alias{index<-} -\alias{index<-,MatchedFilterParam-method} -\alias{matchedFilter} -\alias{max,MatchedFilterParam-method} -\alias{max<-} -\alias{max<-,MatchedFilterParam-method} -\alias{mzdiff,MatchedFilterParam-method} -\alias{mzdiff<-,MatchedFilterParam-method} -\alias{show,MatchedFilterParam-method} -\alias{sigma} +\alias{fwhm<-} \alias{sigma,MatchedFilterParam-method} -\alias{sigma<-} +\alias{sigma} \alias{sigma<-,MatchedFilterParam-method} +\alias{sigma<-} +\alias{max,MatchedFilterParam-method} +\alias{max<-,MatchedFilterParam-method} +\alias{max<-} \alias{snthresh,MatchedFilterParam-method} \alias{snthresh<-,MatchedFilterParam-method} -\alias{steps} \alias{steps,MatchedFilterParam-method} -\alias{steps<-} +\alias{steps} \alias{steps<-,MatchedFilterParam-method} +\alias{steps<-} +\alias{mzdiff,MatchedFilterParam-method} +\alias{mzdiff<-,MatchedFilterParam-method} +\alias{index,MatchedFilterParam-method} +\alias{index} +\alias{index<-,MatchedFilterParam-method} +\alias{index<-} \title{Peak detection in the chromatographic time domain} \usage{ MatchedFilterParam(binSize = 0.1, impute = "none", baseValue = numeric(), @@ -54,12 +54,9 @@ MatchedFilterParam(binSize = 0.1, impute = "none", baseValue = numeric(), snthresh = 10, steps = 2, mzdiff = 0.8 - binSize * steps, index = FALSE) -\S4method{detectFeatures}{OnDiskMSnExp,MatchedFilterParam}(object, param, +\S4method{findChromPeaks}{OnDiskMSnExp,MatchedFilterParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{detectFeatures}{MSnExp,MatchedFilterParam}(object, param, - BPPARAM = bpparam(), return.type = "list") - \S4method{show}{MatchedFilterParam}(object) \S4method{binSize}{MatchedFilterParam}(object) @@ -107,14 +104,14 @@ MatchedFilterParam(binSize = 0.1, impute = "none", baseValue = numeric(), \S4method{index}{MatchedFilterParam}(object) <- value } \arguments{ -\item{binSize}{numeric(1) specifying the width of the +\item{binSize}{\code{numeric(1)} specifying the width of the bins/slices in m/z dimension.} \item{impute}{Character string specifying the method to be used for missing -value imputation. Allowed values are \code{"none"} (no linear interpolation), -\code{"lin"} (linear interpolation), \code{"linbase"} (linear interpolation -within a certain bin-neighborhood) and \code{"intlin"}. See -\code{\link{imputeLinInterpol}} for more details.} +value imputation. Allowed values are \code{"none"} (no linear +interpolation), \code{"lin"} (linear interpolation), \code{"linbase"} +(linear interpolation within a certain bin-neighborhood) and +\code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details.} \item{baseValue}{The base value to which empty elements should be set. This is only considered for \code{method = "linbase"} and corresponds to the @@ -124,33 +121,33 @@ is only considered for \code{method = "linbase"} and corresponds to the element of an empty element that should be considered for linear interpolation. See details section for more information.} -\item{fwhm}{numeric(1) specifying the full width at half maximum -of matched filtration gaussian model peak. Only used to calculate the actual -sigma, see below.} +\item{fwhm}{\code{numeric(1)} specifying the full width at half maximum +of matched filtration gaussian model peak. Only used to calculate the +actual sigma, see below.} -\item{sigma}{numeric(1) specifying the standard deviation (width) +\item{sigma}{\code{numeric(1)} specifying the standard deviation (width) of the matched filtration model peak.} -\item{max}{numeric(1) representing the maximum number of peaks +\item{max}{\code{numeric(1)} representing the maximum number of peaks that are expected/will be identified per slice.} -\item{snthresh}{numeric(1) defining the signal to noise cutoff -to be used in the feature detection step.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise cutoff +to be used in the chromatographic peak detection step.} -\item{steps}{numeric(1) defining the number of bins to be -merged before filtration (i.e. the number of neighboring bins that will be -joined to the slice in which filtration and peak detection will be +\item{steps}{\code{numeric(1)} defining the number of bins to be +merged before filtration (i.e. the number of neighboring bins that will +be joined to the slice in which filtration and peak detection will be performed).} -\item{mzdiff}{numeric(1) defining the minimum difference +\item{mzdiff}{\code{numeric(1)} defining the minimum difference in m/z for peaks with overlapping retention times} -\item{index}{Logical specifying whether indicies should be returned instead -of values for m/z and retention times.} +\item{index}{\code{logical(1)} specifying whether indicies should be +returned instead of values for m/z and retention times.} -\item{object}{For \code{detectFeatures}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -160,11 +157,11 @@ the matchedFilter algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, feature detection is performed in parallel on several +processing is enables, peak detection is performed in parallel on several of the input samples.} \item{return.type}{Character specifying what type of object the method should -return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} \item{value}{The value for the slot.} @@ -174,86 +171,81 @@ return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or \value{ The \code{MatchedFilterParam} function returns a \code{MatchedFilterParam} class instance with all of the settings specified -for feature detection by the centWave method. +for chromatographic detection by the \emph{matchedFilter} method. -For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the feature detection. +For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +\code{\link{XCMSnExp}} object with the results of the peak detection. If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified features/peaks. +samples with matrices specifying the identified peaks. If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the feature detection. +with the results of the peak detection. } \description{ -The \emph{matchedFilter} algorithm identifies features in the -chromatographic time domain as described in [Smith 2006]. The intensity -values are binned by cutting The LC/MS data into slices (bins) of a mass unit -(\code{binSize} m/z) wide. Within each bin the maximal intensity is selected. -The feature detection is then performed in each bin by extending it based on -the \code{steps} parameter to generate slices comprising bins -\code{current_bin - steps +1} to \code{current_bin + steps - 1}. Each of -these slices is then filtered with matched filtration using a second-derative -Gaussian as the model feature/peak shape. After filtration features are -detected using a signal-to-ration cut-off. For more details and -illustrations see [Smith 2006]. +The \emph{matchedFilter} algorithm identifies peaks in the + chromatographic time domain as described in [Smith 2006]. The intensity + values are binned by cutting The LC/MS data into slices (bins) of a mass + unit (\code{binSize} m/z) wide. Within each bin the maximal intensity is + selected. The chromatographic peak detection is then performed in each + bin by extending it based on the \code{steps} parameter to generate + slices comprising bins \code{current_bin - steps +1} to + \code{current_bin + steps - 1}. Each of these slices is then filtered + with matched filtration using a second-derative Gaussian as the model + peak shape. After filtration peaks are detected using a signal-to-ratio + cut-off. For more details and illustrations see [Smith 2006]. The \code{MatchedFilterParam} class allows to specify all -settings for a feature detection using the matchedFilter method. Instances -should be created with the \code{MatchedFilterParam} constructor. + settings for a chromatographic peak detection using the matchedFilter + method. Instances should be created with the \code{MatchedFilterParam} + constructor. -The \code{detectFeatures,OnDiskMSnExp,MatchedFilterParam} -method performs feature detection using the \emph{matchedFilter} algorithm +The \code{findChromPeaks,OnDiskMSnExp,MatchedFilterParam} +method performs peak detection using the \emph{matchedFilter} algorithm on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{detectFeatures,MSnExp,MatchedFilterParam} method -performs feature detection using the \emph{matchedFilter} method on all -samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{binSize},\code{binSize<-}: getter and setter for the -\code{binSize} slot of the object. + \code{binSize} slot of the object. \code{impute},\code{impute<-}: getter and setter for the -\code{impute} slot of the object. + \code{impute} slot of the object. \code{baseValue},\code{baseValue<-}: getter and setter for the -\code{baseValue} slot of the object. + \code{baseValue} slot of the object. \code{distance},\code{distance<-}: getter and setter for the -\code{distance} slot of the object. + \code{distance} slot of the object. \code{fwhm},\code{fwhm<-}: getter and setter for the -\code{fwhm} slot of the object. + \code{fwhm} slot of the object. \code{sigma},\code{sigma<-}: getter and setter for the -\code{sigma} slot of the object. + \code{sigma} slot of the object. \code{max},\code{max<-}: getter and setter for the -\code{max} slot of the object. + \code{max} slot of the object. \code{snthresh},\code{snthresh<-}: getter and setter for the -\code{snthresh} slot of the object. + \code{snthresh} slot of the object. \code{steps},\code{steps<-}: getter and setter for the -\code{steps} slot of the object. + \code{steps} slot of the object. \code{mzdiff},\code{mzdiff<-}: getter and setter for the -\code{mzdiff} slot of the object. + \code{mzdiff} slot of the object. \code{index},\code{index<-}: getter and setter for the -\code{index} slot of the object. + \code{index} slot of the object. } \details{ The intensities are binned by the provided m/z values within each -spectrum (scan). Binning is performed such that the bins are centered around -the m/z values (i.e. the first bin includes all m/z values between -\code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). + spectrum (scan). Binning is performed such that the bins are centered + around the m/z values (i.e. the first bin includes all m/z values between + \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}). -For more details on binning and missing value imputation see -\code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. + For more details on binning and missing value imputation see + \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods. Parallel processing (one process per sample) is supported and can be configured either by the \code{BPPARAM} parameter or by globally defining @@ -267,14 +259,15 @@ method from the \code{BiocParallel} package. the version from the class. Slots values should exclusively be accessed \emph{via} the corresponding getter and setter methods listed above.} }} + \note{ These methods and classes are part of the updated and modernized -\code{xcms} user interface which will eventually replace the -\code{\link{findPeaks}} methods. It supports feature detection on -\code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -objects (both defined in the \code{MSnbase} package). All of the settings -to the matchedFilter algorithm can be passed with a -\code{MatchedFilterParam} object. + \code{xcms} user interface which will eventually replace the + \code{\link{findPeaks}} methods. It supports chromatographic peak + detection on \code{\link[MSnbase]{MSnExp}} and + \code{\link[MSnbase]{OnDiskMSnExp}} objects (both defined in the + \code{MSnbase} package). All of the settings to the matchedFilter + algorithm can be passed with a \code{MatchedFilterParam} object. } \examples{ @@ -284,7 +277,7 @@ mfp <- MatchedFilterParam(binSize = 0.5) snthresh(mfp) <- 15 mfp -## Perform the feature detection using matchecFilter on the files from the +## Perform the peak detection using matchecFilter on the files from the ## faahKO package. Files are read using the readMSData2 from the MSnbase ## package library(faahKO) @@ -292,15 +285,12 @@ library(MSnbase) fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, full.names = TRUE) raw_data <- readMSData2(fls) -## Perform the feature detection using the settings defined above. We're -## returning the results as an xcmsSet object. Note that we are also -## disabling parallel processing in this example by registering a "SerialParam" +## Perform the chromatographic peak detection using the settings defined +## above. Note that we are also disabling parallel processing in this +## example by registering a "SerialParam" register(SerialParam()) -res <- detectFeatures(raw_data, param = mfp, return.type = "xcmsSet") -head(peaks(res)) -} -\author{ -Colin A Smith, Johannes Rainer +res <- findChromPeaks(raw_data, param = mfp) +head(chromPeaks(res)) } \references{ Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and @@ -309,16 +299,18 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" \emph{Anal. Chem.} 2006, 78:779-787. } \seealso{ -The \code{\link{do_detectFeatures_matchedFilter}} core API function -and \code{\link{findPeaks.matchedFilter}} for the old user interface. +The \code{\link{do_findChromPeaks_matchedFilter}} core API function + and \code{\link{findPeaks.matchedFilter}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the feature detection. +the chromatographic peak detection. -Other feature detection methods: \code{\link{detectFeatures}}, - \code{\link{featureDetection-MSW}}, - \code{\link{featureDetection-centWaveWithPredIsoROIs}}, - \code{\link{featureDetection-centWave}}, - \code{\link{featureDetection-massifquant}} +Other peak detection methods: \code{\link{chromatographic-peak-detection}}, + \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, + \code{\link{findChromPeaks-centWave}}, + \code{\link{findChromPeaks-massifquant}}, + \code{\link{findPeaks-MSW}} +} +\author{ +Colin A Smith, Johannes Rainer } - diff --git a/man/featureDetection-MSW.Rd b/man/findPeaks-MSW.Rd similarity index 64% rename from man/featureDetection-MSW.Rd rename to man/findPeaks-MSW.Rd index 04fc4e963..a992c2dba 100644 --- a/man/featureDetection-MSW.Rd +++ b/man/findPeaks-MSW.Rd @@ -1,67 +1,64 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/DataClasses.R, R/functions-Params.R, R/methods-OnDiskMSnExp.R, R/methods-Params.R +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-OnDiskMSnExp.R, R/methods-Params.R \docType{class} -\name{featureDetection-MSW} +\name{findPeaks-MSW} +\alias{findPeaks-MSW} \alias{MSW} -\alias{MSWParam} \alias{MSWParam-class} -\alias{addParams} -\alias{addParams,MSWParam-method} -\alias{addParams<-} -\alias{addParams<-,MSWParam-method} -\alias{ampTh} -\alias{ampTh,MSWParam-method} -\alias{ampTh<-} -\alias{ampTh<-,MSWParam-method} -\alias{detectFeatures,MSnExp,MSWParam-method} -\alias{detectFeatures,OnDiskMSnExp,MSWParam-method} -\alias{featureDetection-MSW} -\alias{minNoiseLevel} -\alias{minNoiseLevel,MSWParam-method} -\alias{minNoiseLevel<-} -\alias{minNoiseLevel<-,MSWParam-method} -\alias{nearbyPeak} +\alias{MSWParam} +\alias{findChromPeaks,OnDiskMSnExp,MSWParam-method} +\alias{show,MSWParam-method} +\alias{snthresh,MSWParam-method} +\alias{snthresh<-,MSWParam-method} +\alias{verboseColumns,MSWParam-method} +\alias{verboseColumns<-,MSWParam-method} +\alias{scales,MSWParam-method} +\alias{scales} +\alias{scales<-,MSWParam-method} +\alias{scales<-} \alias{nearbyPeak,MSWParam-method} -\alias{nearbyPeak<-} +\alias{nearbyPeak} \alias{nearbyPeak<-,MSWParam-method} -\alias{peakScaleRange} +\alias{nearbyPeak<-} \alias{peakScaleRange,MSWParam-method} -\alias{peakScaleRange<-} +\alias{peakScaleRange} \alias{peakScaleRange<-,MSWParam-method} -\alias{peakThr} -\alias{peakThr,MSWParam-method} -\alias{peakThr<-} -\alias{peakThr<-,MSWParam-method} -\alias{ridgeLength} +\alias{peakScaleRange<-} +\alias{ampTh,MSWParam-method} +\alias{ampTh} +\alias{ampTh<-,MSWParam-method} +\alias{ampTh<-} +\alias{minNoiseLevel,MSWParam-method} +\alias{minNoiseLevel} +\alias{minNoiseLevel<-,MSWParam-method} +\alias{minNoiseLevel<-} \alias{ridgeLength,MSWParam-method} -\alias{ridgeLength<-} +\alias{ridgeLength} \alias{ridgeLength<-,MSWParam-method} -\alias{scales} -\alias{scales,MSWParam-method} -\alias{scales<-} -\alias{scales<-,MSWParam-method} -\alias{show,MSWParam-method} -\alias{snthresh,MSWParam-method} -\alias{snthresh<-,MSWParam-method} -\alias{tuneIn} +\alias{ridgeLength<-} +\alias{peakThr,MSWParam-method} +\alias{peakThr} +\alias{peakThr<-,MSWParam-method} +\alias{peakThr<-} \alias{tuneIn,MSWParam-method} -\alias{tuneIn<-} +\alias{tuneIn} \alias{tuneIn<-,MSWParam-method} -\alias{verboseColumns,MSWParam-method} -\alias{verboseColumns<-,MSWParam-method} -\title{Single-spectrum non-chromatography MS data feature detection} +\alias{tuneIn<-} +\alias{addParams,MSWParam-method} +\alias{addParams} +\alias{addParams<-,MSWParam-method} +\alias{addParams<-} +\title{Single-spectrum non-chromatography MS data peak detection} \usage{ MSWParam(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, 2), seq(32, 64, 4)), nearbyPeak = TRUE, peakScaleRange = 5, ampTh = 0.01, minNoiseLevel = ampTh/snthresh, ridgeLength = 24, peakThr = NULL, tuneIn = FALSE, ...) -\S4method{detectFeatures}{OnDiskMSnExp,MSWParam}(object, param, +\S4method{findChromPeaks}{OnDiskMSnExp,MSWParam}(object, param, BPPARAM = bpparam(), return.type = "XCMSnExp") -\S4method{detectFeatures}{MSnExp,MSWParam}(object, param, BPPARAM = bpparam(), - return.type = "list") - \S4method{show}{MSWParam}(object) \S4method{snthresh}{MSWParam}(object) @@ -109,10 +106,10 @@ MSWParam(snthresh = 3, verboseColumns = FALSE, scales = c(1, seq(2, 30, \S4method{addParams}{MSWParam}(object) <- value } \arguments{ -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{verboseColumns}{Logical whether additional feature meta data columns -should be returned.} +\item{verboseColumns}{\code{logical(1)} whether additional peak meta data +columns should be returned.} \item{scales}{Numeric defining the scales of the continuous wavelet transform (CWT).} @@ -145,9 +142,9 @@ estimation of the detected peaks.} \code{\link[MassSpecWavelet]{sav.gol}} functions from the \code{MassSpecWavelet} package.} -\item{object}{For \code{detectFeatures}: Either an -\code{\link[MSnbase]{OnDiskMSnExp}} or a \code{\link[MSnbase]{MSnExp}} -object containing the MS- and all other experiment-relevant data. +\item{object}{For \code{findChromPeaks}: an +\code{\link[MSnbase]{OnDiskMSnExp}} object containing the MS- and all other +experiment-relevant data. For all other methods: a parameter object.} @@ -157,93 +154,86 @@ the algorithm.} \item{BPPARAM}{A parameter class specifying if and how parallel processing should be performed. It defaults to \code{\link[BiocParallel]{bpparam}}. See documentation of the \code{BiocParallel} for more details. If parallel -processing is enables, feature detection is performed in parallel on several +processing is enables, peak detection is performed in parallel on several of the input samples.} \item{return.type}{Character specifying what type of object the method should -return. Can be either \code{"XCMSnExp"} (code), \code{"list"} or +return. Can be either \code{"XCMSnExp"} (default), \code{"list"} or \code{"xcmsSet"}.} \item{value}{The value for the slot.} } \value{ The \code{MSWParam} function returns a \code{MSWParam} -class instance with all of the settings specified for feature detection by -the centWave method. +class instance with all of the settings specified for peak detection by +the \emph{MSW} method. -For \code{detectFeatures}: if \code{return.type = "XCMSnExp"} an -\code{\link{XCMSnExp}} object with the results of the feature detection. +For \code{findChromPeaks}: if \code{return.type = "XCMSnExp"} an +\code{\link{XCMSnExp}} object with the results of the peak detection. If \code{return.type = "list"} a list of length equal to the number of -samples with matrices specifying the identified features/peaks. +samples with matrices specifying the identified peaks. If \code{return.type = "xcmsSet"} an \code{\linkS4class{xcmsSet}} object -with the results of the feature detection. +with the results of the detection. } \description{ -Perform feature detection in mass spectrometry -direct injection spectrum using a wavelet based algorithm. +Perform peak detection in mass spectrometry + direct injection spectrum using a wavelet based algorithm. The \code{MSWParam} class allows to specify all -settings for a feature detection using the MSW method. Instances should be -created with the \code{MSWParam} constructor. + settings for a peak detection using the MSW method. Instances should be + created with the \code{MSWParam} constructor. -The \code{detectFeatures,OnDiskMSnExp,MSWParam} -method performs feature detection in single-spectrum non-chromatography MS +The \code{findChromPeaks,OnDiskMSnExp,MSWParam} +method performs peak detection in single-spectrum non-chromatography MS data using functionality from the \code{MassSpecWavelet} package on all samples from an \code{\link[MSnbase]{OnDiskMSnExp}} object. \code{\link[MSnbase]{OnDiskMSnExp}} objects encapsule all experiment specific data and load the spectra data (mz and intensity values) on the fly from the original files applying also all eventual data manipulations. -The \code{detectFeatures,MSnExp,MSWParam} method -performs feature detection in single-spectrum non-chromatography MS -data using functionality from the \code{MassSpecWavelet} package on all -samples from an \code{\link[MSnbase]{MSnExp}} object. These objects contain -mz and intensity values of all spectra hence no additional -data input from the original files is required. - \code{snthresh},\code{snthresh<-}: getter and setter for the -\code{snthresh} slot of the object. + \code{snthresh} slot of the object. \code{verboseColumns},\code{verboseColumns<-}: getter and setter -for the \code{verboseColumns} slot of the object. + for the \code{verboseColumns} slot of the object. \code{scales},\code{scales<-}: getter and setter for the -\code{scales} slot of the object. + \code{scales} slot of the object. \code{nearbyPeak},\code{nearbyPeak<-}: getter and setter for the -\code{nearbyPeak} slot of the object. + \code{nearbyPeak} slot of the object. \code{peakScaleRange},\code{peakScaleRange<-}: getter and setter -for the \code{peakScaleRange} slot of the object. + for the \code{peakScaleRange} slot of the object. \code{ampTh},\code{ampTh<-}: getter and setter for the -\code{ampTh} slot of the object. + \code{ampTh} slot of the object. \code{minNoiseLevel},\code{minNoiseLevel<-}: getter and setter -for the \code{minNoiseLevel} slot of the object. + for the \code{minNoiseLevel} slot of the object. \code{ridgeLength},\code{ridgeLength<-}: getter and setter for -the \code{ridgeLength} slot of the object. + the \code{ridgeLength} slot of the object. \code{peakThr},\code{peakThr<-}: getter and setter for the -\code{peakThr} slot of the object. + \code{peakThr} slot of the object. \code{tuneIn},\code{tuneIn<-}: getter and setter for the -\code{tuneIn} slot of the object. + \code{tuneIn} slot of the object. \code{addParams},\code{addParams<-}: getter and setter for the -\code{addParams} slot of the object. This slot stores optional additional -parameters to be passed to the -\code{\link[MassSpecWavelet]{identifyMajorPeaks}} and -\code{\link[MassSpecWavelet]{sav.gol}} functions from the -\code{MassSpecWavelet} package. + \code{addParams} slot of the object. This slot stores optional additional + parameters to be passed to the + \code{\link[MassSpecWavelet]{identifyMajorPeaks}} and + \code{\link[MassSpecWavelet]{sav.gol}} functions from the + \code{MassSpecWavelet} package. } \details{ This is a wrapper for the peak picker in Bioconductor's -\code{MassSpecWavelet} package calling -\code{\link[MassSpecWavelet]{peakDetectionCWT}} and -\code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the -\emph{xcmsDirect} vignette for more information. + \code{MassSpecWavelet} package calling + \code{\link[MassSpecWavelet]{peakDetectionCWT}} and + \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. See the + \emph{xcmsDirect} vignette for more information. Parallel processing (one process per sample) is supported and can be configured either by the \code{BPPARAM} parameter or by globally defining @@ -257,38 +247,49 @@ method from the \code{BiocParallel} package. should exclusively be accessed \emph{via} the corresponding getter and setter methods listed above.} }} + \note{ These methods and classes are part of the updated and modernized -\code{xcms} user interface which will eventually replace the -\code{\link{findPeaks}} methods. It supports feature detection on -\code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} -objects (both defined in the \code{MSnbase} package). All of the settings -to the massifquant and centWave algorithm can be passed with a -\code{MassifquantParam} object. + \code{xcms} user interface which will eventually replace the + \code{\link{findPeaks}} methods. It supports peak detection on + \code{\link[MSnbase]{MSnExp}} and \code{\link[MSnbase]{OnDiskMSnExp}} + objects (both defined in the \code{MSnbase} package). All of the settings + to the algorithm can be passed with a \code{MSWParam} object. } \examples{ -## Create a MassifquantParam object +## Create a MSWParam object mp <- MSWParam() ## Change snthresh parameter snthresh(mp) <- 15 mp -} -\author{ -Joachim Kutzera, Steffen Neumann, Johannes Rainer +## Loading a small subset of direct injection, single spectrum files +library(msdata) +fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) +fticr <- readMSData2(fticrf[1:2], msLevel. = 1) + +## Perform the MSW peak detection on these: +p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, + SNR.method = "data.mean", winSize.noise = 500) +fticr <- findChromPeaks(fticr, param = p) + +head(chromPeaks(fticr)) } \seealso{ -The \code{\link{do_detectFeatures_MSW}} core API function -and \code{\link{findPeaks.MSW}} for the old user interface. +The \code{\link{do_findPeaks_MSW}} core API function + and \code{\link{findPeaks.MSW}} for the old user interface. \code{\link{XCMSnExp}} for the object containing the results of -the feature detection. +the peak detection. -Other feature detection methods: \code{\link{detectFeatures}}, - \code{\link{featureDetection-centWaveWithPredIsoROIs}}, - \code{\link{featureDetection-centWave}}, - \code{\link{featureDetection-massifquant}}, - \code{\link{featureDetection-matchedFilter}} +Other peak detection methods: \code{\link{chromatographic-peak-detection}}, + \code{\link{findChromPeaks-centWaveWithPredIsoROIs}}, + \code{\link{findChromPeaks-centWave}}, + \code{\link{findChromPeaks-massifquant}}, + \code{\link{findChromPeaks-matchedFilter}} +} +\author{ +Joachim Kutzera, Steffen Neumann, Johannes Rainer } - diff --git a/man/findPeaks.MSW-xcmsRaw-method.Rd b/man/findPeaks.MSW-xcmsRaw-method.Rd index 4cd9d39af..f8bf1c692 100644 --- a/man/findPeaks.MSW-xcmsRaw-method.Rd +++ b/man/findPeaks.MSW-xcmsRaw-method.Rd @@ -2,20 +2,20 @@ % Please edit documentation in R/methods-xcmsRaw.R \docType{methods} \name{findPeaks.MSW,xcmsRaw-method} -\alias{findPeaks.MSW} \alias{findPeaks.MSW,xcmsRaw-method} -\title{Feature detection for single-spectrum non-chromatography MS data} +\alias{findPeaks.MSW} +\title{Peak detection for single-spectrum non-chromatography MS data} \usage{ \S4method{findPeaks.MSW}{xcmsRaw}(object, snthresh = 3, verbose.columns = FALSE, ...) } \arguments{ -\item{object}{The \code{\linkS4class{xcmsRaw}} object on which feature +\item{object}{The \code{\linkS4class{xcmsRaw}} object on which peak detection should be performed.} -\item{snthresh}{numeric(1) defining the signal to noise ratio cutoff.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise ratio cutoff.} -\item{verbose.columns}{Logical whether additional feature meta data columns +\item{verbose.columns}{Logical whether additional peak meta data columns should be returned.} \item{...}{Additional parameters to be passed to the @@ -24,23 +24,23 @@ should be returned.} \code{MassSpecWavelet} package.} } \value{ -A matrix, each row representing an intentified feature, with columns: +A matrix, each row representing an intentified peak, with columns: \describe{ -\item{mz}{m/z value of the feature at the centroid position.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} +\item{mz}{m/z value of the peak at the centroid position.} +\item{mzmin}{Minimum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} \item{rt}{Always \code{-1}.} \item{rtmin}{Always \code{-1}.} \item{rtmax}{Always \code{-1}.} -\item{into}{Integrated (original) intensity of the feature.} -\item{maxo}{Maximum intensity of the feature.} +\item{into}{Integrated (original) intensity of the peak.} +\item{maxo}{Maximum intensity of the peak.} \item{intf}{Always \code{NA}.} -\item{maxf}{Maximum MSW-filter response of the feature.} +\item{maxf}{Maximum MSW-filter response of the peak.} \item{sn}{Signal to noise ratio.} } } \description{ -This method performs feature detection in mass spectrometry +This method performs peak detection in mass spectrometry direct injection spectrum using a wavelet based algorithm. } \details{ @@ -49,14 +49,13 @@ This is a wrapper around the peak picker in Bioconductor's \code{\link[MassSpecWavelet]{peakDetectionCWT}} and \code{\link[MassSpecWavelet]{tuneInPeakInfo}} functions. } -\author{ -Joachim Kutzera, Steffen Neumann, Johannes Rainer -} \seealso{ \code{\link{MSW}} for the new user interface, -\code{\link{do_detectFeatures_MSW}} for the downstream analysis +\code{\link{do_findPeaks_MSW}} for the downstream analysis function or \code{\link[MassSpecWavelet]{peakDetectionCWT}} from the \code{MassSpecWavelet} for details on the algorithm and additionally supported parameters. } - +\author{ +Joachim Kutzera, Steffen Neumann, Johannes Rainer +} diff --git a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd index 7347c0fb0..9c921205d 100644 --- a/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd +++ b/man/findPeaks.centWaveWithPredictedIsotopeROIs-methods.Rd @@ -133,7 +133,7 @@ Submitted to Metabolites 2016, Special Issue "Bioinformatics and Data Analysis" } \seealso{ - \code{\link{do_detectFeatures_centWaveWithPredIsoROIs}} for the + \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} for the corresponding core API function. \code{\link{findPeaks.addPredictedIsotopeFeatures}} \code{\link{findPeaks.centWave}} diff --git a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd index 109024290..8d0d228dd 100644 --- a/man/findPeaks.matchedFilter-xcmsRaw-method.Rd +++ b/man/findPeaks.matchedFilter-xcmsRaw-method.Rd @@ -2,9 +2,9 @@ % Please edit documentation in R/methods-xcmsRaw.R \docType{methods} \name{findPeaks.matchedFilter,xcmsRaw-method} -\alias{findPeaks.matchedFilter} \alias{findPeaks.matchedFilter,xcmsRaw-method} -\title{Feature detection in the chromatographic time domain} +\alias{findPeaks.matchedFilter} +\title{Peak detection in the chromatographic time domain} \usage{ \S4method{findPeaks.matchedFilter}{xcmsRaw}(object, fwhm = 30, sigma = fwhm/2.3548, max = 5, snthresh = 10, step = 0.1, steps = 2, @@ -12,65 +12,64 @@ scanrange = numeric()) } \arguments{ -\item{object}{The \code{\linkS4class{xcmsRaw}} object on which feature detection +\item{object}{The \code{\linkS4class{xcmsRaw}} object on which peak detection should be performed.} -\item{fwhm}{numeric(1) specifying the full width at half maximum -of matched filtration gaussian model peak. Only used to calculate the actual -sigma, see below.} +\item{fwhm}{\code{numeric(1)} specifying the full width at half maximum +of matched filtration gaussian model peak. Only used to calculate the +actual sigma, see below.} -\item{sigma}{numeric(1) specifying the standard deviation (width) +\item{sigma}{\code{numeric(1)} specifying the standard deviation (width) of the matched filtration model peak.} -\item{max}{numeric(1) representing the maximum number of peaks +\item{max}{\code{numeric(1)} representing the maximum number of peaks that are expected/will be identified per slice.} -\item{snthresh}{numeric(1) defining the signal to noise cutoff -to be used in the feature detection step.} +\item{snthresh}{\code{numeric(1)} defining the signal to noise cutoff +to be used in the chromatographic peak detection step.} \item{step}{numeric(1) specifying the width of the bins/slices in m/z dimension.} -\item{steps}{numeric(1) defining the number of bins to be -merged before filtration (i.e. the number of neighboring bins that will be -joined to the slice in which filtration and peak detection will be +\item{steps}{\code{numeric(1)} defining the number of bins to be +merged before filtration (i.e. the number of neighboring bins that will +be joined to the slice in which filtration and peak detection will be performed).} -\item{mzdiff}{numeric(1) defining the minimum difference +\item{mzdiff}{\code{numeric(1)} defining the minimum difference in m/z for peaks with overlapping retention times} -\item{index}{Logical specifying whether indicies should be returned instead -of values for m/z and retention times.} +\item{index}{\code{logical(1)} specifying whether indicies should be +returned instead of values for m/z and retention times.} \item{sleep}{(DEFUNCT). This parameter is no longer functional, as it would cause problems in parallel processing mode.} \item{scanrange}{Numeric vector defining the range of scans to which the original -\code{object} should be sub-setted before feature detection.} +\code{object} should be sub-setted before peak detection.} } \value{ -A matrix, each row representing an intentified feature, with columns: +A matrix, each row representing an intentified chromatographic peak, +with columns: \describe{ -\item{mz}{Intensity weighted mean of m/z values of the feature across scans.} -\item{mzmin}{Minimum m/z of the feature.} -\item{mzmax}{Maximum m/z of the feature.} -\item{rt}{Retention time of the feature's midpoint.} -\item{rtmin}{Minimum retention time of the feature.} -\item{rtmax}{Maximum retention time of the feature.} -\item{into}{Integrated (original) intensity of the feature.} +\item{mz}{Intensity weighted mean of m/z values of the peak across scans.} +\item{mzmin}{Minimum m/z of the peak.} +\item{mzmax}{Maximum m/z of the peak.} +\item{rt}{Retention time of the peak's midpoint.} +\item{rtmin}{Minimum retention time of the peak.} +\item{rtmax}{Maximum retention time of the peak.} +\item{into}{Integrated (original) intensity of the peak.} \item{intf}{Integrated intensity of the filtered peak.} -\item{maxo}{Maximum intensity of the feature.} +\item{maxo}{Maximum intensity of the peak.} \item{maxf}{Maximum intensity of the filtered peak.} -\item{i}{Rank of feature in merged EIC (\code{<= max}).} -\item{sn}{Signal to noise ratio of the feature} +\item{i}{Rank of peak in merged EIC (\code{<= max}).} +\item{sn}{Signal to noise ratio of the peak.} } } \description{ -Find features (peaks) in the chromatographic time domain of the -profile matrix. For more details see \code{\link{do_detectFeatures_matchedFilter}}. -} -\author{ -Colin A. Smith +Find peaks in the chromatographic time domain of the +profile matrix. For more details see +\code{\link{do_findChromPeaks_matchedFilter}}. } \references{ Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and @@ -81,7 +80,9 @@ Profiling Using Nonlinear Peak Alignment, Matching, and Identification" \seealso{ \code{\link{matchedFilter}} for the new user interface. \code{\linkS4class{xcmsRaw}}, -\code{\link{do_detectFeatures_matchedFilter}} for the core function -performing the feature detection. +\code{\link{do_findChromPeaks_matchedFilter}} for the core function +performing the peak detection. +} +\author{ +Colin A. Smith } - diff --git a/man/group.density.Rd b/man/group.density.Rd index e1b3a818e..66969afc9 100644 --- a/man/group.density.Rd +++ b/man/group.density.Rd @@ -49,6 +49,8 @@ An \code{xcmsSet} object with peak group assignments and statistics. } \seealso{ + \code{\link{do_groupChromPeaks_density}} for the core API function + performing the analysis. \code{\link{xcmsSet-class}}, \code{\link{density}} } diff --git a/man/groupChromPeaks-density.Rd b/man/groupChromPeaks-density.Rd new file mode 100644 index 000000000..1c8b2bd42 --- /dev/null +++ b/man/groupChromPeaks-density.Rd @@ -0,0 +1,232 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{groupChromPeaks-density} +\alias{groupChromPeaks-density} +\alias{PeakDensityParam-class} +\alias{PeakDensityParam} +\alias{show,PeakDensityParam-method} +\alias{sampleGroups,PeakDensityParam-method} +\alias{sampleGroups} +\alias{sampleGroups<-,PeakDensityParam-method} +\alias{sampleGroups<-} +\alias{bw,PeakDensityParam-method} +\alias{bw} +\alias{bw<-,PeakDensityParam-method} +\alias{bw<-} +\alias{minFraction,PeakDensityParam-method} +\alias{minFraction} +\alias{minFraction<-,PeakDensityParam-method} +\alias{minFraction<-} +\alias{minSamples,PeakDensityParam-method} +\alias{minSamples} +\alias{minSamples<-,PeakDensityParam-method} +\alias{minSamples<-} +\alias{binSize,PeakDensityParam-method} +\alias{binSize<-,PeakDensityParam-method} +\alias{maxFeatures,PeakDensityParam-method} +\alias{maxFeatures} +\alias{maxFeatures<-,PeakDensityParam-method} +\alias{maxFeatures<-} +\alias{groupChromPeaks,XCMSnExp,PeakDensityParam-method} +\title{Peak grouping based on time dimension peak densities} +\usage{ +PeakDensityParam(sampleGroups = numeric(), bw = 30, minFraction = 0.5, + minSamples = 1, binSize = 0.25, maxFeatures = 50) + +\S4method{show}{PeakDensityParam}(object) + +\S4method{sampleGroups}{PeakDensityParam}(object) + +\S4method{sampleGroups}{PeakDensityParam}(object) <- value + +\S4method{bw}{PeakDensityParam}(object) + +\S4method{bw}{PeakDensityParam}(object) <- value + +\S4method{minFraction}{PeakDensityParam}(object) + +\S4method{minFraction}{PeakDensityParam}(object) <- value + +\S4method{minSamples}{PeakDensityParam}(object) + +\S4method{minSamples}{PeakDensityParam}(object) <- value + +\S4method{binSize}{PeakDensityParam}(object) + +\S4method{binSize}{PeakDensityParam}(object) <- value + +\S4method{maxFeatures}{PeakDensityParam}(object) + +\S4method{maxFeatures}{PeakDensityParam}(object) <- value + +\S4method{groupChromPeaks}{XCMSnExp,PeakDensityParam}(object, param) +} +\arguments{ +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{bw}{\code{numeric(1)} defining the bandwidth (standard deviation ot the +smoothing kernel) to be used. This argument is passed to the +\code{\link{density}} method.} + +\item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples +in at least one sample group in which the peaks have to be present to be +considered as a peak group (feature).} + +\item{minSamples}{\code{numeric(1)} with the minimum number of samples in at +least one sample group in which the peaks have to be detected to be +considered a peak group (feature).} + +\item{binSize}{\code{numeric(1)} defining the size of the overlapping slices +in mz dimension.} + +\item{maxFeatures}{\code{numeric(1)} with the maximum number of peak groups +to be identified in a single mz slice.} + +\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object + containing the results from a previous peak detection analysis (see + \code{\link{findChromPeaks}}). + + For all other methods: a \code{PeakDensityParam} object.} + +\item{value}{The value for the slot.} + +\item{param}{A \code{PeakDensityParam} object containing all settings for +the peak grouping algorithm.} +} +\value{ +The \code{PeakDensityParam} function returns a +\code{PeakDensityParam} class instance with all of the settings +specified for chromatographic peak alignment based on peak densities. + +For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the + results of the correspondence analysis. The definition of the resulting + mz-rt features can be accessed with the \code{\link{featureDefinitions}} + method. +} +\description{ +This method performs performs correspondence (chromatographic + peak grouping) based on the density (distribution) of identified peaks + along the retention time axis within slices of overlapping mz ranges. + All peaks (from the same or from different samples) being close on the + retention time axis are grouped into a feature (\emph{peak group}). + +The \code{PeakDensityParam} class allows to specify all + settings for the peak grouping based on peak densities along the time + dimension. Instances should be created with the \code{PeakDensityParam} + constructor. + +\code{sampleGroups},\code{sampleGroups<-}: getter and setter + for the \code{sampleGroups} slot of the object. + +\code{bw},\code{bw<-}: getter and setter for the \code{bw} slot + of the object. + +\code{minFraction},\code{minFraction<-}: getter and setter for + the \code{minFraction} slot of the object. + +\code{minSamples},\code{minSamples<-}: getter and setter for the + \code{minSamples} slot of the object. + +\code{binSize},\code{binSize<-}: getter and setter for the + \code{binSize} slot of the object. + +\code{maxFeatures},\code{maxFeatures<-}: getter and setter for + the \code{maxFeatures} slot of the object. + +\code{groupChromPeaks,XCMSnExp,PeakDensityParam}: + performs correspondence (peak grouping within and across samples) within + in mz dimension overlapping slices of MS data based on the density + distribution of the identified chromatographic peaks in the slice along + the time axis. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{group}} methods. All of the settings to the algorithm + can be passed with a \code{PeakDensityParam} object. + +Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause + all eventually present previous correspondence results to be dropped. +} +\examples{ + +## Create a PeakDensityParam object +p <- PeakDensityParam(binSize = 0.05) +## Change hte minSamples slot +minSamples(p) <- 3 +p + +############################## +## Chromatographic peak detection and grouping. +## +## Below we perform first a peak detection (using the matchedFilter +## method) on some of the test files from the faahKO package followed by +## a peak grouping using the density method. +library(faahKO) +library(MSnbase) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the chromatographic peak detection using the matchedFilter method. +mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +res <- findChromPeaks(raw_data, param = mfp) + +head(chromPeaks(res)) +## The number of peaks identified per sample: +table(chromPeaks(res)[, "sample"]) + +## Performing the chromatographic peak grouping +fdp <- PeakDensityParam() +res <- groupChromPeaks(res, fdp) + +## The definition of the features (peak groups): +featureDefinitions(res) + +## Using the featureValues method to extract a matrix with the intensities of +## the features per sample. +head(featureValues(res, value = "into")) + +## The process history: +processHistory(res) +} +\references{ +Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and +Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite +Profiling Using Nonlinear Peak Alignment, Matching, and Identification" +\emph{Anal. Chem.} 2006, 78:779-787. +} +\seealso{ +The \code{\link{do_groupChromPeaks_density}} core + API function and \code{\link{group.density}} for the old user interface. + +\code{\link{plotChromPeakDensity}} to plot peak densities and + evaluate different algorithm settings. + \code{\link{featureDefinitions}} and + \code{\link{featureValues,XCMSnExp-method}} for methods to access the + features (i.e. the peak grouping results). + +\code{\link{XCMSnExp}} for the object containing the results of + the correspondence. + +Other peak grouping methods: \code{\link{groupChromPeaks-mzClust}}, + \code{\link{groupChromPeaks-nearest}}, + \code{\link{groupChromPeaks}} +} +\author{ +Colin Smith, Johannes Rainer +} diff --git a/man/groupChromPeaks-mzClust.Rd b/man/groupChromPeaks-mzClust.Rd new file mode 100644 index 000000000..69ae1f38d --- /dev/null +++ b/man/groupChromPeaks-mzClust.Rd @@ -0,0 +1,178 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{groupChromPeaks-mzClust} +\alias{groupChromPeaks-mzClust} +\alias{MzClustParam-class} +\alias{MzClustParam} +\alias{show,MzClustParam-method} +\alias{sampleGroups,MzClustParam-method} +\alias{sampleGroups<-,MzClustParam-method} +\alias{ppm,MzClustParam-method} +\alias{ppm<-,MzClustParam-method} +\alias{absMz,MzClustParam-method} +\alias{absMz} +\alias{absMz<-,MzClustParam-method} +\alias{absMz<-} +\alias{minFraction,MzClustParam-method} +\alias{minFraction<-,MzClustParam-method} +\alias{minSamples,MzClustParam-method} +\alias{minSamples<-,MzClustParam-method} +\alias{groupChromPeaks,XCMSnExp,MzClustParam-method} +\title{High resolution peak grouping for single spectra samples} +\usage{ +MzClustParam(sampleGroups = numeric(), ppm = 20, absMz = 0, + minFraction = 0.5, minSamples = 1) + +\S4method{show}{MzClustParam}(object) + +\S4method{sampleGroups}{MzClustParam}(object) + +\S4method{sampleGroups}{MzClustParam}(object) <- value + +\S4method{ppm}{MzClustParam}(object) + +\S4method{ppm}{MzClustParam}(object) <- value + +\S4method{absMz}{MzClustParam}(object) + +\S4method{absMz}{MzClustParam}(object) <- value + +\S4method{minFraction}{MzClustParam}(object) + +\S4method{minFraction}{MzClustParam}(object) <- value + +\S4method{minSamples}{MzClustParam}(object) + +\S4method{minSamples}{MzClustParam}(object) <- value + +\S4method{groupChromPeaks}{XCMSnExp,MzClustParam}(object, param) +} +\arguments{ +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{ppm}{\code{numeric(1)} representing the relative mz error for the +clustering/grouping (in parts per million).} + +\item{absMz}{\code{numeric(1)} representing the absolute mz error for the +clustering.} + +\item{minFraction}{\code{numeric(1)} defining the minimum fraction of samples +in at least one sample group in which the peaks have to be present to be +considered as a peak group (feature).} + +\item{minSamples}{\code{numeric(1)} with the minimum number of samples in at +least one sample group in which the peaks have to be detected to be +considered a peak group (feature).} + +\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object + containing the results from a previous chromatographic peak detection + analysis (see \code{\link{findChromPeaks}}). + + For all other methods: a \code{MzClustParam} object.} + +\item{value}{The value for the slot.} + +\item{param}{A \code{MzClustParam} object containing all settings for +the peak grouping algorithm.} +} +\value{ +The \code{MzClustParam} function returns a +\code{MzClustParam} class instance with all of the settings +specified for high resolution single spectra peak alignment. + +For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the + results of the peak grouping step (i.e. the features). These can be + accessed with the \code{\link{featureDefinitions}} method. +} +\description{ +This method performs high resolution correspondence for single + spectra samples. + +The \code{MzClustParam} class allows to specify all + settings for the peak grouping based on the \emph{mzClust} algorithm. + Instances should be created with the \code{MzClustParam} constructor. + +\code{sampleGroups},\code{sampleGroups<-}: getter and setter + for the \code{sampleGroups} slot of the object. + +\code{ppm},\code{ppm<-}: getter and setter for the \code{ppm} + slot of the object. + +\code{absMz},\code{absMz<-}: getter and setter for the + \code{absMz} slot of the object. + +\code{minFraction},\code{minFraction<-}: getter and setter for + the \code{minFraction} slot of the object. + +\code{minSamples},\code{minSamples<-}: getter and setter for the + \code{minSamples} slot of the object. + +\code{groupChromPeaks,XCMSnExp,MzClustParam}: + performs high resolution peak grouping for single spectrum + metabolomics data. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,sampleGroups,ppm,absMz,minFraction,minSamples}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{group}} methods. All of the settings to the algorithm + can be passed with a \code{MzClustParam} object. + +Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause + all eventually present previous correspondence results to be dropped. +} +\examples{ + +## Loading a small subset of direct injection, single spectrum files +library(msdata) +fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) +fticr <- readMSData2(fticrf[1:2], msLevel. = 1) + +## Perform the MSW peak detection on these: +p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005, + SNR.method = "data.mean", winSize.noise = 500) +fticr <- findChromPeaks(fticr, param = p) + +head(chromPeaks(fticr)) + +## Now create the MzClustParam parameter object: we're assuming here that +## both samples are from the same sample group. +p <- MzClustParam(sampleGroups = c(1, 1)) + +fticr <- groupChromPeaks(fticr, param = p) + +## Get the definition of the features. +featureDefinitions(fticr) +} +\references{ +Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill +and David F. Grant\cr \emph{Alignment of high resolution mass spectra: +development of a heuristic approach for metabolomics}.\cr Metabolomics, +Vol. 2, No. 2, 75-83 (2006) +} +\seealso{ +The \code{\link{do_groupPeaks_mzClust}} core API function and + \code{\link{group.mzClust}} for the old user interface. + \code{\link{featureDefinitions}} and + \code{\link{featureValues,XCMSnExp-method}} for methods to access peak + grouping results (i.e. the features). + +\code{\link{XCMSnExp}} for the object containing the results of + the peak grouping. + +Other peak grouping methods: \code{\link{groupChromPeaks-density}}, + \code{\link{groupChromPeaks-nearest}}, + \code{\link{groupChromPeaks}} +} diff --git a/man/groupChromPeaks-nearest.Rd b/man/groupChromPeaks-nearest.Rd new file mode 100644 index 000000000..9af5b51ce --- /dev/null +++ b/man/groupChromPeaks-nearest.Rd @@ -0,0 +1,203 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R, R/functions-Params.R, +% R/methods-Params.R, R/methods-XCMSnExp.R +\docType{class} +\name{groupChromPeaks-nearest} +\alias{groupChromPeaks-nearest} +\alias{NearestPeaksParam-class} +\alias{NearestPeaksParam} +\alias{show,NearestPeaksParam-method} +\alias{sampleGroups,NearestPeaksParam-method} +\alias{sampleGroups<-,NearestPeaksParam-method} +\alias{mzVsRtBalance,NearestPeaksParam-method} +\alias{mzVsRtBalance} +\alias{mzVsRtBalance<-,NearestPeaksParam-method} +\alias{mzVsRtBalance<-} +\alias{absMz,NearestPeaksParam-method} +\alias{absMz<-,NearestPeaksParam-method} +\alias{absRt,NearestPeaksParam-method} +\alias{absRt} +\alias{absRt<-,NearestPeaksParam-method} +\alias{absRt<-} +\alias{kNN,NearestPeaksParam-method} +\alias{kNN} +\alias{kNN<-,NearestPeaksParam-method} +\alias{kNN<-} +\alias{groupChromPeaks,XCMSnExp,NearestPeaksParam-method} +\title{Peak grouping based on proximity in the mz-rt space} +\usage{ +NearestPeaksParam(sampleGroups = numeric(), mzVsRtBalance = 10, + absMz = 0.2, absRt = 15, kNN = 10) + +\S4method{show}{NearestPeaksParam}(object) + +\S4method{sampleGroups}{NearestPeaksParam}(object) + +\S4method{sampleGroups}{NearestPeaksParam}(object) <- value + +\S4method{mzVsRtBalance}{NearestPeaksParam}(object) + +\S4method{mzVsRtBalance}{NearestPeaksParam}(object) <- value + +\S4method{absMz}{NearestPeaksParam}(object) + +\S4method{absMz}{NearestPeaksParam}(object) <- value + +\S4method{absRt}{NearestPeaksParam}(object) + +\S4method{absRt}{NearestPeaksParam}(object) <- value + +\S4method{kNN}{NearestPeaksParam}(object) + +\S4method{kNN}{NearestPeaksParam}(object) <- value + +\S4method{groupChromPeaks}{XCMSnExp,NearestPeaksParam}(object, param) +} +\arguments{ +\item{sampleGroups}{A vector of the same length than samples defining the +sample group assignments (i.e. which samples belong to which sample +group).} + +\item{mzVsRtBalance}{\code{numeric(1)} representing the factor by which mz +values are multiplied before calculating the (euclician) distance between +two peaks.} + +\item{absMz}{\code{numeric(1)} maximum tolerated distance for mz values.} + +\item{absRt}{\code{numeric(1)} maximum tolerated distance for rt values.} + +\item{kNN}{\code{numeric(1)} representing the number of nearest neighbors +to check.} + +\item{object}{For \code{groupChromPeaks}: an \code{\link{XCMSnExp}} object + containing the results from a previous chromatographic peak detection + analysis (see \code{\link{findChromPeaks}}). + + For all other methods: a \code{NearestPeaksParam} object.} + +\item{value}{The value for the slot.} + +\item{param}{A \code{NearestPeaksParam} object containing all settings for +the peak grouping algorithm.} +} +\value{ +The \code{NearestPeaksParam} function returns a +\code{NearestPeaksParam} class instance with all of the settings +specified for peak alignment based on peak proximity. + +For \code{groupChromPeaks}: a \code{\link{XCMSnExp}} object with the + results of the peak grouping/correspondence step (i.e. the mz-rt + features). These can be accessed with the + \code{\link{featureDefinitions}} method. +} +\description{ +This method is inspired by the grouping algorithm of mzMine + [Katajamaa 2006] and performs correspondence based on proximity of peaks + in the space spanned by retention time and mz values. + The method creates first a \emph{master peak list} consisting of all + chromatographic peaks from the sample in which most peaks were + identified, and starting from that, calculates distances to peaks from + the sample with the next most number of peaks. If peaks are closer than + the defined threshold they are grouped together. + +The \code{NearestPeaksParam} class allows to specify all + settings for the peak grouping based on the \emph{nearest} algorithm. + Instances should be created with the \code{NearestPeaksParam} constructor. + +\code{sampleGroups},\code{sampleGroups<-}: getter and setter + for the \code{sampleGroups} slot of the object. + +\code{mzVsRtBalance},\code{mzVsRtBalance<-}: getter and setter + for the \code{mzVsRtBalance} slot of the object. + +\code{absMz},\code{absMz<-}: getter and setter for the + \code{absMz} slot of the object. + +\code{absRt},\code{absRt<-}: getter and setter for the + \code{absRt} slot of the object. + +\code{kNN},\code{kNN<-}: getter and setter for the + \code{kNN} slot of the object. + +\code{groupChromPeaks,XCMSnExp,NearestPeaksParam}: + performs peak grouping based on the proximity between chromatographic + peaks from different samples in the mz-rt range. +} +\section{Slots}{ + +\describe{ +\item{\code{.__classVersion__,sampleGroups,mzVsRtBalance,absMz,absRt,kNN}}{See corresponding parameter above. \code{.__classVersion__} stores +the version from the class. Slots values should exclusively be accessed +\emph{via} the corresponding getter and setter methods listed above.} +}} + +\note{ +These methods and classes are part of the updated and modernized + \code{xcms} user interface which will eventually replace the + \code{\link{group}} methods. All of the settings to the algorithm + can be passed with a \code{NearestPeaksParam} object. + +Calling \code{groupChromPeaks} on an \code{XCMSnExp} object will cause + all eventually present previous alignment results to be dropped. +} +\examples{ + +## Create a NearestPeaksParam object +p <- NearestPeaksParam(kNN = 3) +p + +############################## +## Chromatographi peak detection and grouping. +## +## Below we perform first a chromatographic peak detection (using the +## matchedFilter method) on some of the test files from the faahKO package +## followed by a peaks grouping using the "nearest" method. +library(faahKO) +library(MSnbase) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the peak detection using the matchedFilter method. +mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +res <- findChromPeaks(raw_data, param = mfp) + +head(chromPeaks(res)) +## The number of peaks identified per sample: +table(chromPeaks(res)[, "sample"]) + +## Performing the peak grouping +p <- NearestPeaksParam() +res <- groupChromPeaks(res, param = p) + +## The results from the peak grouping: +featureDefinitions(res) + +## Using the featureValues method to extract a matrix with the intensities of +## the features per sample. +head(featureValues(res, value = "into")) + +## The process history: +processHistory(res) +} +\references{ +Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for +processing and visualization of mass spectrometry based molecular profile +data. \emph{Bioinformatics} 2006, 22:634-636. +} +\seealso{ +The \code{\link{do_groupChromPeaks_nearest}} core + API function and \code{\link{group.nearest}} for the old user interface. + \code{\link{featureDefinitions}} and + \code{\link{featureValues,XCMSnExp-method}} for methods to access + peak grouping results (i.e. the features). + +\code{\link{XCMSnExp}} for the object containing the results of + the peak grouping. + +Other peak grouping methods: \code{\link{groupChromPeaks-density}}, + \code{\link{groupChromPeaks-mzClust}}, + \code{\link{groupChromPeaks}} +} diff --git a/man/groupChromPeaks.Rd b/man/groupChromPeaks.Rd new file mode 100644 index 000000000..2adda95b7 --- /dev/null +++ b/man/groupChromPeaks.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataClasses.R +\name{groupChromPeaks} +\alias{groupChromPeaks} +\title{Correspondence: Chromatographic peak grouping methods.} +\description{ +The \code{groupChromPeaks} method(s) perform the correspondence, + i.e. the grouping of chromatographic peaks within and between samples. + These methods are part of the modernized \code{xcms} user interface. + The resulting peak groups are referred to as (mz-rt) features and can be + accessed \emph{via} the \code{\link{featureDefinitions}} method on the + result object. + + The implemented peak grouping methods are: + \describe{ + + \item{density}{peak grouping based on time dimension peak densities. + See \code{\link{groupChromPeaks-density}} for more details.} + + \item{mzClust}{high resolution peak grouping for single spectra (direct + infusion) MS data. See \code{\link{groupChromPeaks-mzClust}} for more + details.} + + \item{nearest}{chromatographic peak grouping based on their proximity in + the mz-rt space. See \code{\link{groupChromPeaks-nearest}} for more + details.} + +} +} +\seealso{ +\code{\link{group}} for the \emph{old} peak grouping methods. + \code{\link{featureDefinitions}} and + \code{\link{featureValues,XCMSnExp-method}} for methods to access peak + grouping results. + +Other peak grouping methods: \code{\link{groupChromPeaks-density}}, + \code{\link{groupChromPeaks-mzClust}}, + \code{\link{groupChromPeaks-nearest}} +} +\author{ +Johannes Rainer +} diff --git a/man/imputeLinInterpol.Rd b/man/imputeLinInterpol.Rd index 2e094363b..51ecbf848 100644 --- a/man/imputeLinInterpol.Rd +++ b/man/imputeLinInterpol.Rd @@ -114,4 +114,3 @@ points(x = 1:length(x), y = xInt, type = "l", col = "grey") \author{ Johannes Rainer } - diff --git a/man/plotAdjustedRtime.Rd b/man/plotAdjustedRtime.Rd new file mode 100644 index 000000000..e1e4d1620 --- /dev/null +++ b/man/plotAdjustedRtime.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{plotAdjustedRtime} +\alias{plotAdjustedRtime} +\title{Visualization of alignment results} +\usage{ +plotAdjustedRtime(object, col = "#00000080", lty = 1, type = "l", + adjustedRtime = TRUE, xlab = ifelse(adjustedRtime, yes = + expression(rt[adj]), no = expression(rt[raw])), ylab = expression(rt[adj] - + rt[raw]), peakGroupsCol = "#00000060", peakGroupsPch = 16, + peakGroupsLty = 3, ...) +} +\arguments{ +\item{object}{A \code{\link{XCMSnExp}} object with the alignment results.} + +\item{col}{colors to be used for the lines corresponding to the individual +samples.} + +\item{lty}{line type to be used for the lines of the individual samples.} + +\item{type}{plot type to be used. See help on the \code{par} function for +supported values.} + +\item{adjustedRtime}{logical(1) whether adjusted or raw retention times +should be shown on the x-axis.} + +\item{xlab}{the label for the x-axis.} + +\item{ylab}{the label for the y-axis.} + +\item{peakGroupsCol}{color to be used for the peak groups (only used if +alignment was performed using the \code{\link{adjustRtime-peakGroups}} +method.} + +\item{peakGroupsPch}{point character (\code{pch}) to be used for the peak +groups (only used if alignment was performed using the +\code{\link{adjustRtime-peakGroups}} method.} + +\item{peakGroupsLty}{line type (\code{lty}) to be used to connect points for +each peak groups (only used if alignment was performed using the +\code{\link{adjustRtime-peakGroups}} method.} + +\item{...}{Additional arguments to be passed down to the \code{plot} +function.} +} +\description{ +Plot the difference between the adjusted and the raw retention + time (y-axis) for each file along the (adjusted or raw) retention time + (x-axis). If alignment was performed using the + \code{\link{adjustRtime-peakGroups}} method, also the features (peak + groups) used for the alignment are shown. +} +\examples{ +## Below we perform first a peak detection (using the matchedFilter +## method) on some of the test files from the faahKO package followed by +## a peak grouping and retention time adjustment using the "peak groups" +## method +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the peak detection using the matchedFilter method. +mfp <- MatchedFilterParam(snthresh = 20, binSize = 1) +res <- findChromPeaks(raw_data, param = mfp) + +## Performing the peak grouping using the "peak density" method. +p <- PeakDensityParam(sampleGroups = c(1, 1)) +res <- groupChromPeaks(res, param = p) + +## Perform the retention time adjustment using peak groups found in both +## files. +fgp <- PeakGroupsParam(minFraction = 1) +res <- adjustRtime(res, param = fgp) + +## Visualize the impact of the alignment. We show both versions of the plot, +## with the raw retention times on the x-axis (top) and with the adjusted +## retention times (bottom). +par(mfrow = c(2, 1)) +plotAdjustedRtime(res, adjusted = FALSE) +grid() +plotAdjustedRtime(res) +grid() +} +\seealso{ +\code{\link{adjustRtime}} for all retention time correction/ + alignment methods. +} +\author{ +Johannes Rainer +} diff --git a/man/plotChromPeakDensity.Rd b/man/plotChromPeakDensity.Rd new file mode 100644 index 000000000..9fe333079 --- /dev/null +++ b/man/plotChromPeakDensity.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-XCMSnExp.R +\name{plotChromPeakDensity} +\alias{plotChromPeakDensity} +\title{Plot chromatographic peak density along the retention time axis} +\usage{ +plotChromPeakDensity(object, mz, rt, param = PeakDensityParam(), + col = "#00000080", xlab = "retention time", ylab = "sample", + xlim = range(rt), ...) +} +\arguments{ +\item{object}{A \code{\link{XCMSnExp}} object with identified +chromatographic peaks.} + +\item{mz}{\code{numeric(2)} defining an mz range for which the peak density +should be plotted.} + +\item{rt}{\code{numeric(2)} defining an optional rt range for which the +peak density should be plotted. Defaults to the absolute retention time +range of \code{object}.} + +\item{param}{\code{\link{PeakDensityParam}} from which parameters for the +\emph{peak density} correspondence algorithm can be extracted.} + +\item{col}{Color to be used for the individual samples. Length has to be 1 +or equal to the number of samples in \code{object}.} + +\item{xlab}{\code{character(1)} with the label for the x-axis.} + +\item{ylab}{\code{character(1)} with the label for the y-axis.} + +\item{xlim}{\code{numeric(2)} representing the limits for the x-axis. +Defaults to the range of the \code{rt} parameter.} + +\item{...}{Additional parameters to be passed to the \code{plot} function.} +} +\value{ +The function is called for its side effect, i.e. to create a plot. +} +\description{ +Plot the density of chromatographic peaks along the retention + time axis and indicate which peaks would be grouped into the same feature + based using the \emph{peak density} correspondence method. Settings for + the \emph{peak density} method can be passed with an + \code{\link{PeakDensityParam}} object to parameter \code{param}. +} +\details{ +The \code{plotChromPeakDensity} function allows to evaluate + different settings for the \emph{peak density} on an mz slice of + interest (e.g. containing chromatographic peaks corresponding to a known + metabolite). + The plot shows the individual peaks that were detected within the + specified \code{mz} slice at their retention time (x-axis) and sample in + which they were detected (y-axis). The density function is plotted as a + black line. Parameters for the \code{density} function are taken from the + \code{param} object. Grey rectangles indicate which chromatographic peaks + would be grouped into a feature by the \emph{peak density} correspondence + method. Parameters for the algorithm are also taken from \code{param}. + See \code{\link{groupChromPeaks-density}} for more information about the + algorithm and its supported settings. +} +\examples{ + +## Below we perform first a peak detection (using the centWave +## method) on some of the test files from the faahKO package. +library(faahKO) +library(xcms) +fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE, + full.names = TRUE) + +## Reading 2 of the KO samples +raw_data <- readMSData2(fls[1:2]) + +## Perform the peak detection using the centWave method. +res <- findChromPeaks(raw_data, param = CentWaveParam(noise = 1000)) + +## Align the samples using obiwarp +res <- adjustRtime(res, param = ObiwarpParam()) + +## Plot the chromatographic peak density for a specific mz range to evaluate +## different peak density correspondence settings. +mzr <- c(305.05, 305.15) + +plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(), pch = 16) + +## Use a larger bandwidth +plotChromPeakDensity(res, mz = mzr, param = PeakDensityParam(bw = 60), + pch = 16) +## Neighboring peaks are now fused into one. + +## Require the chromatographic peak to be present in all samples of a group +plotChromPeakDensity(res, mz = mzr, pch = 16, + param = PeakDensityParam(minFraction = 1)) +} +\seealso{ +\code{\link{groupChromPeaks-density}} for details on the + \emph{peak density} correspondence method and supported settings. +} +\author{ +Johannes Rainer +} diff --git a/man/plotChromatogram.Rd b/man/plotChromatogram.Rd new file mode 100644 index 000000000..03ff29dba --- /dev/null +++ b/man/plotChromatogram.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/functions-Chromatogram.R +\name{plotChromatogram} +\alias{plotChromatogram} +\alias{highlightChromPeaks} +\title{Plot Chromatogram objects} +\usage{ +plotChromatogram(x, rt, col = "#00000060", lty = 1, type = "l", + xlab = "retention time", ylab = "intensity", main = NULL, ...) + +highlightChromPeaks(x, rt, mz, border = rep("00000040", length(fileNames(x))), + lwd = 1, col = NA, type = c("rect", "point"), ...) +} +\arguments{ +\item{x}{For \code{plotChromatogram}: \code{list} of +\code{\link{Chromatogram}} objects. Such as extracted from an +\code{\link{XCMSnExp}} object by the \code{\link{extractChromatograms}} +method. +For \code{highlightChromPeaks}: \code{XCMSnExp} object with the detected +peaks.} + +\item{rt}{For \code{plotChromatogram}: \code{numeric(2)}, optional parameter +to subset each \code{Chromatogram} by retention time prior to plotting. +Alternatively, the plot could be subsetted by passing a \code{xlim} +parameter. +For \code{highlightChromPeaks}: \code{numeric(2)} with the +retention time range from which peaks should be extracted and plotted.} + +\item{col}{For \code{plotChromatogram}: color definition for each +line/sample. Has to have the same length as samples/elements in \code{x}, +otherwise \code{col[1]} is recycled to generate a vector of +\code{length(x)}. +For \code{highlightChromPeaks}: color to be used to fill the +rectangle.} + +\item{lty}{the line type. See \code{\link[graphics]{plot}} for more details.} + +\item{type}{the plotting type. See \code{\link[graphics]{plot}} for more +details. +For \code{highlightChromPeaks}: \code{character(1)} defining how the peak +should be highlighted: \code{type = "rect"} draws a rectangle +representing the peak definition, \code{type = "point"} indicates a +chromatographic peak with a single point at the position of the peak's +\code{"rt"} and \code{"maxo"}.} + +\item{xlab}{\code{character(1)} with the label for the x-axis.} + +\item{ylab}{\code{character(1)} with the label for the y-axis.} + +\item{main}{The title for the plot. For \code{plotChromatogram}: if +\code{main = NULL} the mz range of the \code{Chromatogram} object(s) will +be used as the title.} + +\item{...}{additional parameters to the \code{\link{matplot}} or \code{plot} +function.} + +\item{mz}{\code{numeric(2)} with the mz range from which the peaks should +be extracted and plotted.} + +\item{border}{colors to be used to color the border of the rectangles. Has to +be equal to the number of samples in \code{x}.} + +\item{lwd}{\code{numeric(1)} defining the width of the line/border.} +} +\description{ +\code{plotChromatogram} creates a chromatogram plot for a + single \code{Chromatogram} object or a \code{list} of + \code{\link{Chromatogram}} objects (one line for each + \code{\link{Chromatogram}}/sample). + +The \code{highlightChromPeaks} function adds chromatographic + peak definitions to an existing plot, such as one created by the + \code{plotChromatograms} function. +} +\details{ +The \code{plotChromatogram} function allows to efficiently plot + the chromatograms of several samples into a single plot. +} +\examples{ + +## Perform a fast peak detection. +library(xcms) +library(faahKO) +faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), + system.file('cdf/KO/ko16.CDF', package = "faahKO"), + system.file('cdf/KO/ko18.CDF', package = "faahKO")) + +od <- readMSData2(faahko_3_files) + +od <- findChromPeaks(od, param = CentWaveParam(snthresh = 20, noise = 10000)) + +rtr <- c(2600, 2750) +mzr <- c(344, 344) +chrs <- extractChromatograms(od, rt = rtr, mz = mzr) + +## Plot a single chromatogram +plotChromatogram(chrs[[1]]) + +## Plot all chromatograms at once, using different colors for each. +plotChromatogram(chrs, col = c("#FF000080", "#00FF0080", "#0000FF80"), lwd = 2) + +## Highlight identified chromatographic peaks. +highlightChromPeaks(od, rt = rtr, mz = mzr, + col = c("#FF000005", "#00FF0005", "#0000FF05"), + border = c("#FF000040", "#00FF0040", "#0000FF40")) + +} +\seealso{ +\code{\link{extractChromatograms}} for how to extract a list of + \code{\link{Chromatogram}} objects from an \code{\link{XCMSnExp}} + objects. +} +\author{ +Johannes Rainer +} diff --git a/man/plotQC.Rd b/man/plotQC.Rd index b88fa94d4..32c8539d1 100644 --- a/man/plotQC.Rd +++ b/man/plotQC.Rd @@ -54,4 +54,3 @@ plotQC(xsg, what="rtdevsample") \author{ Michael Wenk, Michael Wenk } - diff --git a/man/profMat-xcmsRaw-method.Rd b/man/profMat-xcmsSet.Rd similarity index 97% rename from man/profMat-xcmsRaw-method.Rd rename to man/profMat-xcmsSet.Rd index d6000ea88..8b15474d9 100644 --- a/man/profMat-xcmsRaw-method.Rd +++ b/man/profMat-xcmsSet.Rd @@ -1,10 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods-xcmsRaw.R \docType{methods} -\name{profMat,xcmsRaw-method} +\name{profMat-xcmsSet} +\alias{profMat-xcmsSet} +\alias{profile-matrix} \alias{profMat} \alias{profMat,xcmsRaw-method} -\alias{profile-matrix} \title{The profile matrix} \usage{ \S4method{profMat}{xcmsRaw}(object, method, step, baselevel, basespace, @@ -92,13 +93,15 @@ profmat <- profMat(xraw, step = 0.3, method = "binlin") profMethod(xraw) <- "binlin" profmat_2 <- profMat(xraw, step = 0.3) all.equal(profmat, profmat_2) -} -\author{ -Johannes Rainer + } \seealso{ \code{\linkS4class{xcmsRaw}}, \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} for the employed binning and missing value imputation methods, respectively. +\code{\link{profMat,XCMSnExp-method}} for the method on \code{\link{XCMSnExp}} +objects. +} +\author{ +Johannes Rainer } - diff --git a/man/showError-xcmsSet-method.Rd b/man/showError-xcmsSet-method.Rd index 939f4002f..65d9d3f90 100644 --- a/man/showError-xcmsSet-method.Rd +++ b/man/showError-xcmsSet-method.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/methods-xcmsSet.R \docType{methods} \name{showError,xcmsSet-method} -\alias{showError} \alias{showError,xcmsSet-method} +\alias{showError} \title{Extract processing errors} \usage{ \S4method{showError}{xcmsSet}(object, message. = TRUE, ...) @@ -21,7 +21,7 @@ A list of error messages (if \code{message. = TRUE}) or errors or an empty list if no errors are present. } \description{ -If feature detection is performed with \code{\link{findPeaks}} +If peak detection is performed with \code{\link{findPeaks}} setting argument \code{stopOnError = FALSE} eventual errors during the process do not cause to stop the processing but are recorded inside of the resulting \code{\linkS4class{xcmsSet}} object. These errors can be accessed @@ -30,4 +30,3 @@ with the \code{showError} method. \author{ Johannes Rainer } - diff --git a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd index ad49dafaf..62c25d8fa 100644 --- a/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd +++ b/man/sub-xcmsRaw-logicalOrNumeric-missing-missing-method.Rd @@ -48,10 +48,9 @@ length(xsub@scantime) ## The number of values of the subset: length(xsub@env$mz) } -\author{ -Johannes Rainer -} \seealso{ \code{\link{split.xcmsRaw}} } - +\author{ +Johannes Rainer +} diff --git a/man/updateObject-xcmsSet-method.Rd b/man/updateObject-xcmsSet-method.Rd index 6095129c5..18eea6020 100644 --- a/man/updateObject-xcmsSet-method.Rd +++ b/man/updateObject-xcmsSet-method.Rd @@ -25,4 +25,3 @@ object to the latest definition. \author{ Johannes Rainer } - diff --git a/man/useOriginalCode.Rd b/man/useOriginalCode.Rd index 9e94f5f57..fb223073f 100644 --- a/man/useOriginalCode.Rd +++ b/man/useOriginalCode.Rd @@ -12,25 +12,23 @@ old code should be used in corresponding functions. If not provided the function simply returns the value of the global option.} } \value{ -logical(1) indicating whether old code is being -used. +logical(1) indicating whether old code is being used. } \description{ This function allows to enable the usage of old, partially -deprecated code from xcms by setting a corresponding global option. See -details for functions affected. + deprecated code from xcms by setting a corresponding global option. See + details for functions affected. } \details{ The functions/methods that will be affected by this are: -\itemize{ -\item \code{\link{do_detectFeatures_matchedFilter}} -} + \itemize{ + \item \code{\link{do_findChromPeaks_matchedFilter}} + } } \note{ Usage of old code is strongly dicouraged. This function is thought -to be used mainly in the transition phase from xcms to xcms version 3. + to be used mainly in the transition phase from xcms to xcms version 3. } \author{ Johannes Rainer } - diff --git a/readme.org b/readme.org index 5cb8f597e..fb0434509 100644 --- a/readme.org +++ b/readme.org @@ -1,3 +1,6 @@ +#+HTML:

+ + * The =xcms3= package The =xcms3= package is an updated and partially re-written version of the =xcms= @@ -31,3 +34,5 @@ of the original software's functionality, =xcms3= aims at: Discussions and suggestions are welcome: https://github.com/sneumann/xcms/issues (/xcms3/ tag). + +For more information see also the [[file:vignettes/new_functionality.Rmd]] file. diff --git a/src/massifquant/TrMgr.cpp b/src/massifquant/TrMgr.cpp index 12ac19f1d..1a4c9f8e4 100644 --- a/src/massifquant/TrMgr.cpp +++ b/src/massifquant/TrMgr.cpp @@ -517,26 +517,26 @@ bool TrMgr::hasMzDeviation(int i) { return false; } -bool TrMgr::isSeizmo(int i) { - - std::list mzList = trks[i]->getMzList(); - std::vector mzVec(mzList.begin(), mzList.end()); - std::vector rmz = mzVec; //make copy - int midIdx = int(mzList.size()/2); - int n = mzList.size() - midIdx; - for (int i = 0; i < 3; ++i) { - random_shuffle ( rmz.begin(), rmz.end() ); - std::vector seizmo(n); - int k = 0; - for (size_t j = midIdx; j < mzVec.size(); ++j) { - seizmo[k] = fabs(rmz.at(j) - mzVec.at(j)); - } - for (size_t z = 0; z < seizmo.size(); ++z) { - if (seizmo.at(z) > 0.01) { return true; } - } - } - return false; -} +// bool TrMgr::isSeizmo(int i) { + +// std::list mzList = trks[i]->getMzList(); +// std::vector mzVec(mzList.begin(), mzList.end()); +// std::vector rmz = mzVec; //make copy +// int midIdx = int(mzList.size()/2); +// int n = mzList.size() - midIdx; +// for (int i = 0; i < 3; ++i) { +// random_shuffle ( rmz.begin(), rmz.end() ); +// std::vector seizmo(n); +// int k = 0; +// for (size_t j = midIdx; j < mzVec.size(); ++j) { +// seizmo[k] = fabs(rmz.at(j) - mzVec.at(j)); +// } +// for (size_t z = 0; z < seizmo.size(); ++z) { +// if (seizmo.at(z) > 0.01) { return true; } +// } +// } +// return false; +// } void TrMgr::shiftUpIndices(const int i) { diff --git a/src/massifquant/TrMgr.h b/src/massifquant/TrMgr.h index 6123f43ba..c7cc1dea2 100644 --- a/src/massifquant/TrMgr.h +++ b/src/massifquant/TrMgr.h @@ -54,7 +54,7 @@ class TrMgr { bool hasMzDeviation(int i); - bool isSeizmo(int i); + /* bool isSeizmo(int i); */ public: diff --git a/src/xcms_obiwarp.cpp b/src/xcms_obiwarp.cpp index 844586783..998db09dd 100644 --- a/src/xcms_obiwarp.cpp +++ b/src/xcms_obiwarp.cpp @@ -39,7 +39,7 @@ extern "C" SEXP R_set_from_xcms(SEXP valscantime, SEXP scantime, SEXP mzrange, S double *pscantime2, *pmz2, *pintensity2; SEXP corrected; - valscantime = coerceVector(valscantime, INTSXP); + PROTECT(valscantime = coerceVector(valscantime, INTSXP)); mzrange = coerceVector(mzrange, INTSXP); pvalscantime = INTEGER(valscantime)[0]; pmzrange = INTEGER(mzrange)[0]; @@ -47,7 +47,7 @@ extern "C" SEXP R_set_from_xcms(SEXP valscantime, SEXP scantime, SEXP mzrange, S pmz = REAL(mz); pintensity = REAL(intensity); - valscantime2 = coerceVector(valscantime2, INTSXP); + PROTECT(valscantime2 = coerceVector(valscantime2, INTSXP)); mzrange2 = coerceVector(mzrange2, INTSXP); pvalscantime2 = INTEGER(valscantime2)[0]; pmzrange2 = INTEGER(mzrange2)[0]; @@ -125,7 +125,7 @@ extern "C" SEXP R_set_from_xcms(SEXP valscantime, SEXP scantime, SEXP mzrange, S REAL(corrected)[i] = lmat2.tm()->back()[i]; } - UNPROTECT(1); + UNPROTECT(3); return corrected; diff --git a/tests/doRUnit.R b/tests/doRUnit.R index a2bfd96be..729feb8af 100644 --- a/tests/doRUnit.R +++ b/tests/doRUnit.R @@ -2,7 +2,7 @@ if(require("RUnit", quietly=TRUE)) { ## --- Setup --- - + pkg <- "xcms" # <-- Change to package name! if(Sys.getenv("RCMDCHECK") == "FALSE") { ## Path to unit tests for standalone running under Makefile (not R CMD check) @@ -22,25 +22,36 @@ if(require("RUnit", quietly=TRUE)) { attr(faahko, "filepaths") <- sapply(as.list(basename(attr(faahko, "filepaths"))), function(x) system.file("cdf", if (length(grep("ko",x)) > 0) "KO" else "WT" ,x, package = "faahKO")) - ## faahko_grouped <- group(faahko) - ## faahko_grouped_filled <- fillPeaks(faahko_grouped) - ## faahko_processed <- fillPeaks(group(retcor(faahko_grouped))) - ## Disable parallel processing for the unit tests library(BiocParallel) register(SerialParam()) - + ## Create some objects we can re-use in different tests: ## Needed in runit.XCMSnExp.R faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"), system.file('cdf/KO/ko16.CDF', package = "faahKO"), system.file('cdf/KO/ko18.CDF', package = "faahKO")) - + ## An xcmsRaw for the first file: faahko_xr_1 <- xcmsRaw(system.file('cdf/KO/ko15.CDF', package = "faahKO"), profstep = 0) faahko_od <- readMSData2(faahko_3_files) + ## Feature alignment on those: + faahko_xod <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 10000, + snthresh = 40)) + faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), + method = "centWave", noise = 10000, snthresh = 40) + ## faahko_xod <- findChromPeaks(faahko_od, param = CentWaveParam(noise = 5000)) + ## faahko_xs <- xcmsSet(faahko_3_files, profparam = list(step = 0), + ## method = "centWave", noise = 5000) + ## Doing also the retention time correction etc + od_x <- faahko_od + xod_x <- faahko_xod + xod_xg <- groupChromPeaks(xod_x, param = PeakDensityParam()) + xod_xgr <- adjustRtime(xod_xg, param = PeakGroupsParam(span = 0.4)) + xod_xgrg <- groupChromPeaks(xod_xgr, param = PeakDensityParam()) + ## microtofq library(msdata) microtofq_fs <- c(system.file("microtofq/MM14.mzML", package = "msdata"), @@ -48,6 +59,19 @@ if(require("RUnit", quietly=TRUE)) { microtofq_xr <- xcmsRaw(microtofq_fs[1], profstep = 0) microtofq_od <- readMSData2(microtofq_fs) + ## Direct injection data: + fticrf <- list.files(system.file("fticr", package = "msdata"), + recursive = TRUE, full.names = TRUE) + fticr <- readMSData2(fticrf[1:2], msLevel. = 1) + fticr_xod <- findChromPeaks(fticr, MSWParam(scales = c(1, 7), + peakThr = 80000, ampTh = 0.005, + SNR.method = "data.mean", + winSize.noise = 500)) + fticr_xs <- xcmsSet(method="MSW", files=fticrf[1:2], scales=c(1,7), + SNR.method='data.mean' , winSize.noise=500, + peakThr=80000, amp.Th=0.005) + + ## microtofq_xod <- findChromPeaks(microtofq_od, param = MSWParam()) ## If desired, load the name space to allow testing of private functions ## if (is.element(pkg, loadedNamespaces())) ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) diff --git a/vignettes/new_functionality.Rmd b/vignettes/new_functionality.Rmd index 98d6566d0..e658787f0 100644 --- a/vignettes/new_functionality.Rmd +++ b/vignettes/new_functionality.Rmd @@ -30,44 +30,70 @@ in the `xcms` package introduced during the update to version *3*. ```{r message = FALSE, warning = FALSE} library(xcms) library(RColorBrewer) +register(SerialParam()) ``` ## Modernized user interface -The modernization of the user interface comprises the definition of a new method -for feature detection, namely `detectFeatures`, that performs the dispatching to -the various algorithms (e.g. *centWave*, *matchedFilter*) based on a parameter class -submitted *via* the `param` argument. The rationale behind changing the name from -`findPeaks` to `detectFeatures` to discriminate *peak* (i.e. mass peak mostly used in -proteomics), from *feature* (which describes a peak in chromatographic space). - -The encapsulation of the parameters to a function into a parameter class (such -as `CentWaveParam`) avoids busy function calls (with many single parameters to -specify) and enables saving, reloading and reusing settings as well as, by -adding them to the result objects, enabling the documentation of the data -processing history. In addition, parameter validation can be performed within -the parameter object and hence is no longer required in the analysis function. - -The implementation of the `detectFeatures` method for `OnDiskMSnExp` and `MSnExp` -objects (from the `MSnbase` package) enable feature detection on these types of -objects that represent the full MS data of an experiment along with all -experiment relevant metadata. Loading the files first into an `MSnExp` or -`OnDiskMSnExp` (the latter being specifically designed for large scale -experiments) enables a first data inspection and quality assessment, followed by -eventual subsetting and filtering (e.g. on retention time) before performing the -feature detection on the thus processed data. - -Below we load the raw data files from the `faahKO` package as an `OnDiskMSnExp` -object using the `readMSData2` function from the `MSnbase` package. +The modernization of the user interface comprises new classes for data +representation and new data analysis methods. In addition, the core logic for +the data processing has been extracted from the old methods and put into a set +of R functions, the so called core API functions (or `do_` functions). These +functions take standard R data structures as input and return standard R data +types as result and can hence be easily included in other R packages. + +The new user interface aims at simplifying and streamlining the `xcms` workflow +while guaranteeing data integrity and performance also for large scale +metabolomics experiments. Importantly, a simplified access to the original raw +data should be provided throughout the whole metabolomics data analysis workflow. + +The new interface re-uses objects from the `MSnbase` Bioconductor package, such as +the `OnDiskMSnExp` object. This object is specifically designed for large scale MS +experiments as it initially reads just the scan header information from the mzML +while the mz-intensity value pairs from all or from selected spectra of a file +are read on demand hence minimizing the memory demand. Also, in contrast to +the old `xcmsRaw` object, the `OnDiskMSnExp` contains information from all files of +an experiment. In addition, all data normalization and adjustment methods +implemented in the `MSnbase` package can be directly applied to the MS data +without the need to re-implement such methods in `xcms`. Results from `xcms` +preprocessings, such as chromatographic peak detection or correspondence are +stored into the new `XCMSnExp` object. This object extends the `OnDiskMSnExp` object +and inherits thus all of its methods including raw data access. + +Class and method/function names follow also a new naming convention trying tp +avoid the partially confusing nomenclature of the original `xcms` methods (such as +the `group` method to perform the correspondence of peaks across samples). To +distinguish them from mass peaks, the peaks identified by the peak detection in +an LS/GC-MS experiment are referred to as *chromatographic peaks*. The respective +method to identify such peaks is hence called `findChromPeaks` and the identified +peaks can be accessed using the `XCMSnExp` `chromPeaks` method. The results from an +correspondence analysis which aims to match and group chromatographic peaks +within and between samples are called *features*. The definition of such mz-rt +features (i.e. the result from the `groupChromPeaks` method) can be accessed *via* +the `featureDefinitions` method of the `XCMSnExp` class. Finally, alignment +(retention time correction) can be performed using the `adjustRtime` method. + +The settings for any of the new analysis methods are bundled in *parameter* +classes, one class for each method. This encapsulation of the parameters to a +function into a parameter class (such as `CentWaveParam`) avoids busy function +calls (with many single parameters) and enables saving, reloading and reusing +the settings. In addition, the parameter classes are added, along with other +information to the process history of an `XCMSnExp` object thus providing a +detailed documentation of each processing step of an analysis, with the +possibility to recall all settings of the performed analyses at any stage. In +addition, validation of the parameters can be performed within the parameter +object and hence is no longer required in the analysis function. + +The example below illustrates the new user interface. First we load the raw data +files from the `faahKO` package using the `readMSData2` from the `MSnbase` package. ```{r message = FALSE, warning = FALSE} ## Reading the raw data using the MSnbase package -library(MSnbase) library(xcms) -## Load the CDF files from the faahKO +## Load 6 of the CDF files from the faahKO cdf_files <- dir(system.file("cdf", package = "faahKO"), recursive = TRUE, - full.names = TRUE) + full.names = TRUE)[c(1:3, 7:9)] ## Define the sample grouping. s_groups <- rep("KO", length(cdf_files)) @@ -82,9 +108,10 @@ raw_data <- readMSData2(cdf_files, pdata = new("NAnnotatedDataFrame", pheno)) ``` We next plot the total ion chromatogram (TIC) for all files within the -experiment. Note that we are iteratively sub-setting the full data per file, -which for `OnDiskMSnExp` is an efficient way to subset the data while ensuring -that all data, including metadata, stays consistent. +experiment. Note that we are iteratively sub-setting the full data per file +using the `filterFile` method, which, for `OnDiskMSnExp` objects, is an efficient +way to subset the data while ensuring that all data, including metadata, stays +consistent. ```{r faahKO-tic, message = FALSE, fig.align = 'center', fig.width = 8, fig.height = 4} library(RColorBrewer) @@ -93,17 +120,412 @@ names(sample_colors) <- c("KO", "WT") ## Subset the full raw data by file and plot the data. tmp <- filterFile(raw_data, file = 1) plot(x = rtime(tmp), y = tic(tmp), xlab = "retention time", ylab = "TIC", - col = sample_colors[pData(tmp)$sample_group], type = "l") + col = paste0(sample_colors[pData(tmp)$sample_group], 80), type = "l") for (i in 2:length(fileNames(raw_data))) { tmp <- filterFile(raw_data, file = i) points(rtime(tmp), tic(tmp), type = "l", - col = sample_colors[pData(tmp)$sample_group]) + col = paste0(sample_colors[pData(tmp)$sample_group], 80)) } legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) ``` -- Do the feature detection. -- Describe the feature detection methods. +Alternatively we can use the `extractChromatograms` method that extracts +chromatograms from the object. In the example below we extract the *base peak +chromatogram* (BPC) by setting `aggregationFun` to `"max"` and not specifying an `rt` +or `mz` range to extract only a data subset. In contrast to the `tic` and `bpi` +methods, this function reads the data from the raw files. It takes thus more +time to create the plot, but it is based on the actual raw data that is used for +the later analysis - the `tic` and `bpi` methods access only the information that is +stored in the raw data files by the MS detector during the data acquisition. + +```{r faahKO-bpi, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Get the base peak chromatograms. This reads data from the files. +bpis <- extractChromatograms(raw_data, aggregationFun = "max") +## Plot the list of Chromatogram objects. +plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) +``` + +While the `plotChromatogram` function if very convenient (and fast), it would also +not be too difficult to create the plot manually: + +```{r faahKO-bbpi-manual, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") +for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) +} +``` + +Note that we could restrict the analysis to a certain retention time range by +first sub-setting `raw_data` with the `filterRt` method. + +In addition we can plot the distribution of the total ion counts per file. In +contrast to sub-setting the object we split the numeric vector returned by the +`tic` by file using the `fromFile` method that provides the mapping of the +experiment's spectra to the originating files. + +```{r faahKO-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Get the total ion current by file +tc <- split(tic(raw_data), f = fromFile(raw_data)) +boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), + ylab = "intensity", main = "Total ion current") +``` + +The `tic` (and for mzML files) the `bpi` methods are very fast, even for large data +sets, as these information are stored in the header of the raw files avoiding +the need to read the raw data from each file. Also, we could subset the whole +object using the filter functions `filterFile`, `filterRt` or `filterMz` to +e.g. remove problematic samples or restrict the retention time range in which we +want to perform the chromatographic peak detection. + +Next we perform the chromatographic peak detection using the *centWave* algorithm +[@Tautenhahn:2008fx]. In the example below we use most of the standard +parameters, but the settings should be adjusted to each experiment individually +based on e.g. the expected width of the chromatographic peaks etc. + +```{r faahKO-centWave, message = FALSE, warning = FALSE} +## Defining the settings for the centWave peak detection. +cwp <- CentWaveParam(snthresh = 20, noise = 1000) +xod <- findChromPeaks(raw_data, param = cwp) +``` + +The identified peaks can be accessed with the `chromPeaks` parameter which returns +a `matrix`, each line representing an identified peak. Column `"sample"` specifies +in which *sample* (i.e. file) of the experiment the peak was detected. Below we +plot the signal distribution of the identified peaks per sample. + +```{r faahKO-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) +ints <- lapply(ints, log2) +boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], + ylab = expression(log[2]~intensity), main = "Peak intensities") +``` + +After peak detection it might be advisable to evaluate whether the peak +detection identified e.g. compounds known to be present in the +sample. Facilitating access to the raw data has thus been one of the major aims +for the updated user interface. + +Next we extract the chromatogram for the rt-mz region corresponding to one +detected chromatographic peak increasing the region in rt dimension by +/- 60 +seconds. In addition we extract also the full chromatogram for the specified mz +range (i.e. the full rt range) and identify all chromatographic peaks in that +region by passing the same `mz` and `rt` parameters to the `chromPeaks` method. + +If two-column matrices are passed to the `extractChromatograms` method with +parameters `rt` and `mz`, the function returns a `list`, each element being a `list` of +`Chromatogram` objects representing the chromatogram for the respective +ranges. + +```{r faahKO-chromPeaks-extractChroms, warning = FALSE} +rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] +## Increase the range: +rtr[1] <- rtr[1] - 60 +rtr[2] <- rtr[2] + 60 +mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] + +## Add an rt range that would extract the full chromatogram +rtr <- rbind(c(-Inf, Inf), rtr) +mzr <- rbind(mzr, mzr) + +chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) + +## In addition we get all peaks detected in the same region +pks <- chromPeaks(xod, rt = rtr, mz = mzr) +pks +``` + +Next we plot the extracted chromatogram for the data and highlight in addition +the identified peaks. + +```{r faahKO-extracted-chrom-with-peaks, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6} +## Plot the full rt range: +plotChromatogram(chrs[[1]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +## And now for the peak range. +plotChromatogram(chrs[[2]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +## Highlight also the identified chromatographic peaks. +highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], + border = paste0(sample_colors[pData(xod)$sample_group], 40)) +``` + +Note that the `extractChromatograms` does return an `NA` value if in a certain scan +(i.e. for a specific retention time) no signal was measured in the respective mz +range. This is reflected by the lines not being drawn as continuous lines in the +plot above. + +Next we align the samples using the *obiwarp* method [@Prince:2006jj]. This +method does not require, in contrast to other alignment/retention time +correction methods, any identified peaks and could thus also be applied to an +`OnDiskMSnExp` object. Note that all retention time adjustment methods do also +adjust the retention times reported for the individual peaks in `chromPeaks`. + +```{r faahKO-obiwarp, message = FALSE} +## Doing the obiwarp alignment using the default settings. +xod <- adjustRtime(xod, param = ObiwarpParam()) +``` + +Note that any pre-processing results can be removed at any time using a *drop* +method, such as `dropChromPeaks`, `dropFeatureDefinitions` or +`dropAdjustedRtime`. + +To evaluate the impact of the alignment we can plot again the BPC of each +sample. In addition we plot the differences of the adjusted to the raw retention +times per sample using the `plotAdjustedRtime` function. + +```{r faahKO-bpi-obiwarp, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8} +## Get the base peak chromatograms. This reads data from the files. +bpis <- extractChromatograms(xod, aggregationFun = "max") + +par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) +plotChromatogram(bpis, + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) +## Plot also the difference of adjusted to raw retention time. +plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) +``` + +Too large differences between adjusted and raw retention times could indicate +poorly performing samples or alignment. + +The distribution of retention time differences could also be used for quality +assessment. + +```{r faahKO-adjusted-rtime-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +## Calculate the difference between the adjusted and the raw retention times. +diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) + +## By default, rtime and most other accessor methods return a numeric vector. To +## get the values grouped by sample we have to split this vector by file/sample +diffRt <- split(diffRt, fromFile(xod)) + +boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") +``` + +The 3rd sample was used as *center* sample against which all other samples were +aligned to, hence its adjusted retention times are identical to the raw +retention times. + +Below we plot the extracted ion chromatogram for the selected peak from the +example above before and after retention time correction to evaluate the impact +of the alignment. + +```{r faahKO-extracted-chrom-with-peaks-aligned, echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8} +rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] +## Increase the range: +rtr[1] <- rtr[1] - 60 +rtr[2] <- rtr[2] + 60 +mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] + +chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) +chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) + +par(mfrow = c(2, 1)) +plotChromatogram(chrs_raw, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +plotChromatogram(chrs, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) +highlightChromPeaks(xod, rt = rtr, mzr, + border = paste0(sample_colors[pData(xod)$sample_group], 40)) +``` + +After alignment, the peaks are nicely overlapping. + +Next we group identified chromatographic peaks across samples. We use the *peak +density* method [@Smith:2006ic] specifying that a chromatographic peak have +to be present in at least 1/3 of the samples within each group to be combined to +a mz-rt *feature*. + +```{r faahKO-groupPeakDensity, message = FALSE} +## Define the PeakDensityParam +pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + maxFeatures = 300, minFraction = 0.66) +xod <- groupChromPeaks(xod, param = pdp) +``` + +The definitions of the features can be accessed with the `featureDefinitions`, +which lists the mz-rt space specific to a feature. Column `"peakidx"` lists the +indices (in the `chromPeaks` matrix) of the individual chromatographic peaks +belonging to the feature. + +```{r faahKO-featureDefinitions, message = FALSE} +head(featureDefinitions(xod)) +``` + +To extract *values* for the features, the `featureValues` method can be used. This +method returns a matrix with rows being the features and column the samples. The +`value` parameter allows to specify the value that should be returned. Below we +extract the `"into"` signal, i.e. the per-peak integrated intensity for each +feature. + +```{r faahKO-featureValues, message = FALSE} +## Extract the "into" peak integrated signal. +head(featureValues(xod, value = "into")) +``` + +After correspondence there will always be features that do not include peaks +from every sample (being it that the peak finding algorithm failed to identify a +peak or that no signal was measured in the respective mz-rt area). For such +features an `NA` is returned by the `featureValues` method. Here, `xcms` allows to +infer values for such missing peaks using the `fillChromPeaks` method. This method +integrates in files where a peak was not found the signal from the mz-rt area +where it is expected and adds it to the `chromPeaks` matrix. Such *filled-in* peaks +have a value of `1` in the `"is_filled"` column of the `chromPeaks` matrix. + +```{r faahKO-fillPeaks, message = FALSE} +## Fill in peaks with default settings. Settings can be adjusted by passing +## a FillChromPeaksParam object to the method. +xod <- fillChromPeaks(xod) + +head(featureValues(xod, value = "into")) +``` + +Not for all missing peaks a value could be integrated (because at the respective +location no measurements are available). The peak area from which signal is to +be extracted can also be increased modifying the settings by passing a +`FillChromPeaksParam` object. + +Next we inspect the `processHistory` of the analysis. As described earlier, this +records all (major) processing steps along with the corresponding parameter +classes. + +```{r faahKO-processHistory, message = FALSE} +## List the full process history +processHistory(xod) +``` + +It is also possible to extract specific processing steps by specifying its +type. Available types can be listed with the `processHistoryTypes` function. Below +we extract the parameter class for the alignment/retention time adjustment step. + +```{r faahKO-processHistory-select, message = FALSE} +ph <- processHistory(xod, type = "Retention time correction") + +## Access the parameter +processParam(ph[[1]]) +``` + +As described earlier, we can remove specific analysis results at any +stage. Below we remove the results from the alignment. Since the correspondence +was performed after that processing step its results will be removed too leaving +us only with the results from the peak detection step. + +```{r faahKO-drop-alignment, message = FALSE} +## Remove the alignment results +xod <- dropAdjustedRtime(xod) + +processHistory(xod) +``` + +We can now use a different method to perform the alignment. The *peak groups* +alignment method bases the alignment of the samples on chromatographic peaks +present in most samples (so called *well behaved* peaks). This means we have to +perform first an initial correspondence analysis to group peaks within and +across samples. + +```{r faahKO-initial-correspondence, message = FALSE} +## Define the parameter for the correspondence +pdparam <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + minFraction = 0.7, maxFeatures = 100) +xod <- groupChromPeaks(xod, param = pdparam) +``` + +Before performing the alignment we can also inspect which peak groups might be +selected for alignment based on the provided `PeakGroupsParam` object. + +```{r faahKO-peak-groups-matrix, message = FALSE} +## Create the parameter class for the alignment +pgparam <- PeakGroupsParam(minFraction = 0.9, span = 0.4) + +## Extract the matrix with (raw) retention times for the peak groups that would +## be used for alignment. +adjustRtimePeakGroups(xod, param = pgparam) +``` + +If we are not happy with these peak groups (e.g. because we don't have a peak +group for a rather large time span along the retention time axis) we can try +different settings. In addition, we could also *manually* select certain peak +groups, e.g. for internal controls, and add this matrix with the +`peakGroupsMatrix` method to the `PeakGroupsParam` class. Below we just use `pgparam` +we defined and perform the alignment. This will use the peak groups matrix from +above. + +```{r faahKO-peak-groups-alignment, message = FALSE} +## Perform the alignment using the peak groups method. +xod <- adjustRtime(xod, param = pgparam) +``` + +We can now also plot the difference between adjusted and raw retention times. If +alignment was performed using the *peak groups* method, also these peak groups are +highlighted in the plot. + +```{r faahKO-peak-groups-alignment-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4} +plotAdjustedRtime(xod, col = sample_colors[pData(xod)$sample_group]) +``` + + +## New naming convention + +Methods for data analysis from the original `xcms` code have been renamed to avoid +potential confusions: + +- **Chromatographic peak detection**: `findChromPeaks` instead of `findPeaks`: for new + functions and methods the term *peak* is avoided as much as possible, as it is + usually used to describe a mass peak in mz dimension. To clearly distinguish + between these peaks and peaks in retention time space, the latter are referred + to as *chromatographic peak*, or `chromPeak`. + +- **Correspondence**: `groupChromPeaks` instead of `group` to clearly indicate what is + being grouped. Group might be a sample group or a peak group, the latter being + referred to also by (mz-rt) *feature*. + +- **Alignment**: `adjustRtime` instead of `retcor` for retention time correction. The + word *cor* in *retcor* might be easily misinterpreted as *correlation* instead of + correction. + + +## New data classes + + +### `OnDiskMSnExp` + +This object is defined and documented in the `MSnbase` package. In brief, it is a +container for the full raw data from an MS-based experiment. To keep the memory +footprint low the mz and intensity values are only loaded from the raw data +files when required. The `OnDiskMSnExp` object replaces the `xcmsRaw` object. + + +### `XCMSnExp` + +The `XCMSnExp` class extends the `OnDiskMSnExp` object from the `MSnbase` package and +represents a container for the xcms-based preprocessing results while (since it +inherits all functionality from its parent class) keeping a direct relation to +the (raw) data on which the processing was performed. An additional slot +`.processHistory` in the object allows to keep track of all performed processing +steps. Each analysis method, such as `findChromPeaks` adds an `XProcessHistory` +object which includes also the parameter class passed to the analysis +method. Hence not only the time and type of the analysis, but its exact settings +are reported within the `XCMSnExp` object. The `XCMSnExp` is thus equivalent to the +`xcmsSet` from the original `xcms` implementation, but keeps in addition a link to +the raw data on which the preprocessing was performed. + + +### `Chromatogram` + +The `Chromatogram` class allows a data representation that is orthogonal to the +`Spectrum` class defined in `MSnbase`. The `Chromatogram` class stores retention time +and intensity duplets and is designed to accommodate most use cases, from total +ion chromatogram, base peak chromatogram to extracted ion chromatogram and +SRM/MRM ion traces. + +`Chromatogram` objects can be extracted from `XCMSnExp` objects using the +`extractChromatograms` method. + +Note that this class is still considered developmental and might thus undergo +some changes in the future. ## Binning and missing value imputation functions @@ -189,32 +611,50 @@ set to a base value corresponding to half of the smallest bin value. Both methods thus yield same results, except for bins 15-17 (see Figure above). -## Core feature detection functions +## Core functionality exposed *via* simple functions -The core logic from the feature detection methods `findPeaks.centWave`, -`findPeaks.massifquant`, `findPeaks.matchedFilter` has been extracted and put into -functions with the common prefix `do_detectFeatures` with the aim, as detailed in -issue [#30](https://github.com/sneumann/xcms/issues/30), to separate the core logic from the analysis methods invoked by the -users to enable also the use of the feature detection functions using base R +The core logic from the chromatographic peak detection methods +`findPeaks.centWave`, `findPeaks.massifquant`, `findPeaks.matchedFilter` and +`findPeaks.MSW` and from all alignment (`group.*`) and correspondence (`retcor.*`) +methods has been extracted and put into functions with the common prefix +`do_findChromPeaks`, `do_adjustRtime` and `do_groupChromPeaks`, respectively, with the +aim, as detailed in issue [#30](https://github.com/sneumann/xcms/issues/30), to separate the core logic from the analysis +methods invoked by the users to enable also the use these methods using base R parameters (i.e. without specific classes containing the data such as the `xcmsRaw` class). This simplifies also the re-use of these functions in other -packages and simplifies the future implementation of the feature detection +packages and simplifies the future implementation of the peak detection algorithms for e.g. the `MSnExp` or `OnDiskMSnExp` objects from the `MSnbase` Bioconductor package. The implemented functions are: -- `do_detectFeatures_centWave`: peak density and wavelet based feature detection - for high resolution LC/MS data in centroid mode [@Tautenhahn:2008fx]. -- `do_detectFeatures_matchedFilter`: identification of features in the - chromatographic domain based on matched filtration [@Smith:2006ic]. -- `do_detectFeatures_massifquant`: identification of features using Kalman - filters. +- **peak detection methods**: + - `do_findChromPeaks_centWave`: peak density and wavelet based peak detection + for high resolution LC/MS data in centroid mode [@Tautenhahn:2008fx]. + - `do_findChromPeaks_matchedFilter`: identification of peak in the + chromatographic domain based on matched filtration [@Smith:2006ic]. + - `do_findChromPeaks_massifquant`: identification of peaks using Kalman + filters. + - `do_findChromPeaks_MSW`: single spectrum, non-chromatographic peak detection. + +- **alignment methods**: + - `do_adjustRtime_peakGroups`: perform sample alignment (retention time + correction) using alignment of *well behaved* chromatographic peaks that are + present in most samples (and are expected to have the same retention time). + +- **correspondence methods**: + - `do_groupChromPeaks_density`: perform chromatographic peak grouping (within + and across samples) based on the density distribution of peaks along the + retention time axis. + - `do_groupChromPeaks_nearest`: groups peaks across samples similar to the + method implemented in mzMine. + - `do_groupChromPeaks_mzClust`: performs high resolution correspondence on + single spectra samples. One possible drawback from the introduction of this new layer is, that more objects get copied by R which *could* eventually result in a larger memory demand or performance decrease (while no such was decrease was observed up to now). -## Usability improvements +## Usability improvements in the *old* user interface - `[` subsetting method for `xcmsRaw` objects that enables to subset an `xcmsRaw` object to specific scans/spectra. @@ -287,11 +727,11 @@ the beginning and the end of the provided numeric vector. This can be disabled `TRUE` (like in the example above). -## Differences due to updates in `do_detectFeatures_matchedFilter`, respectively `findPeaks.matchedFilter`. +## Differences due to updates in `do_findChromPeaks_matchedFilter`, respectively `findPeaks.matchedFilter`. The original `findPeaks.matchedFilter` (up to version 1.49.7) had several shortcomings and bugs that have been fixed in the new -`do_detectFeatures_matchedFilter` method: +`do_findChromPeaks_matchedFilter` method: - The internal iterative processing of smaller chunks of the full data (also referred to as *iterative buffering*) could result, for some bin (step) sizes to @@ -308,7 +748,7 @@ shortcomings and bugs that have been fixed in the new - The `profBinLin` implementation contains two bugs, one that can result in failing to identify the maximal value in the first and last bin (see issue [#46](https://github.com/sneumann/xcms/issues/46)) and one that fails to assign a value to a bin (issue [#49](https://github.com/sneumann/xcms/issues/49)). Both are fixed - in the `do_detectFeatures_matchedFilter` implementation. + in the `do_findChromPeaks_matchedFilter` implementation. A detailed description of tests comparing all implementations is available in issue [#52](https://github.com/sneumann/xcms/issues/52) on github. Note also that in course of these changes also the `getEIC` @@ -334,6 +774,35 @@ generation uses now the `binYonX` method which fixed some problems in the origin binning and linear interpolation methods. Thus results might be slightly different. +Also, the `retcor.obiwarp` method reports (un-rounded) adjusted retention times, +but adjusts the retention time of eventually already identified peaks using +rounded adjusted retention times. The new `adjustRtime` method(s) does adjust +identified peaks using the reported adjusted retention times (not rounded). This +guarantees that e.g. removing retention time adjustment/alignment results from +an object restores the object to its initial state (i.e. the adjusted retention +times of the identified peaks are reverted to the retention times before +alignment). +See issue [#122](https://github.com/sneumann/xcms/issues/122) for more details. + + +## `retcor.peaksgroups`: change in the way how *well behaved* peak groups are ordered + +The `retcor.peakgroups` defines first the chromatographic peak groups that are +used for the alignment of all spectra. Once these are identified, the retention +time of the peak with the highest intensity in a sample for a given peak group +is returned and the peak groups are ordered increasingly by retention time +(which is required for the later fitting of either a polynomial or a linear +model to the data). The selection of the retention time of the peak with the +highest intensity within a feature (peak group) and samples, denoted as +*representative* peak for a given feature in a sample, ensures that only the +retention time of a single peak per sample and feature is selected (note that +multiple chromatographic peaks within the same sample can be assigned to a +feature). In the original code the ordering of the peak groups was however +performed using the median retention time of the complete peak group (which +includes also potential additional peaks per sample). This has been changed and +the features are ordered now by the median retention time across samples of the +representative chromatographic peaks. + ## `scanrange` parameter in all `findPeaks` methods @@ -353,6 +822,30 @@ These problems have been fixed in version 1.51.1 by first sub-setting the detection. +## `fillPeaks` (`fillChromPeaks`) differences + +In the original `fillPeaks.MSW`, the mz range from which the signal is to be +integrated was defined using + +```{r eval = FALSE} +mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + which.min(abs(mzs - peakArea[i, "mzmax"]))) +``` + +Depending on the data this could lead to the inclusion of signal in the +integration that are just outside of the mz range. In the new `fillChromPeaks` +method signal is integrated only for mz values >= mzmin and <= mzmax thus +ensuring that only signal is used that is truly within the peak area defined by +columns `"mzmin"`, `"mzmax"`, `"rtmin"` and `"rtmax"`. + +Also, the `fillPeaks.chrom` method did return `"into"` and `"maxo"` values of `0` if no +signal was found in the peak area. The new method does not integrate any signal +in such cases and does not fill in that peak. + +See also issue [#130](https://github.com/sneumann/xcms/issues/130) for more +information. + + # Under the hood changes These changes and updates will not have any large impact on the day-to-day use of diff --git a/vignettes/new_functionality.org b/vignettes/new_functionality.org index 0155316fa..8411bc4c3 100644 --- a/vignettes/new_functionality.org +++ b/vignettes/new_functionality.org @@ -42,6 +42,7 @@ in the =xcms= package introduced during the update to version /3/. #+BEGIN_SRC R :ravel message = FALSE, warning = FALSE library(xcms) library(RColorBrewer) + register(SerialParam()) #+END_SRC ** Modernized user interface @@ -49,61 +50,67 @@ in the =xcms= package introduced during the update to version /3/. The modernization of the user interface comprises new classes for data representation and new data analysis methods. In addition, the core logic for the data processing has been extracted from the old methods and put into a set -of R functions, the so called core API functions (or =do_= functions), that take -standard R data structures as input and can hence also easily be included in -other R packages. The new user interface aims at simplifying and streamlining -the =xcms= workflow while guaranteeing data integrity and performance also for -large scale metabolomics experiments. All objects in the new user interface -ensuring data integrity /via/ validation methods and class versioning, all methods -are tested internally in extensive unit tests to guarantee proper functionality. - -In the new user interface, objects from the =MSnbase= Bioconductor package are -used, namely the =OnDiskMSnExp= object. This object is specifically designed for -large scale experiments by just loading basic information from the raw files and -reading the actual spectra data (mz and intensity values) on demand from the -files. In contrast to the old =xcmsRaw= objects this object contains information -from all files of an experiment. In addition, by using this object from the -=MSnbase= package as container for the raw data, data normalization and adjustment -methods defined for this class can be applied directly without having them to be -re-implemented in the =xcms= package. In the new user interface the results from -the pre-processing steps are stored in a new results object, the =XCMSnExp= -object. This is similar to the old =xcmsSet= object, but by extending the -=OnDiskMSnExp= object it inherits all of its methods and hence provides a simple -and streamlined access to the (raw) data on which the processing was performed. - -For feature detection (peak calling), and thus replacing the =findPeaks.*= -methods, the =detectFeatures= method has been implemented. The reason for -changing the name from =findPeaks= to =detectFeatures= was to discriminate /peak/ -(i.e. mass peak mostly used in proteomics), from /feature/ (which describes a peak -in chromatographic space). The =detectFeatures= method performs dispatching to -the various algorithms (e.g. /centWave/, /matchedFilter/) based on a parameter class -submitted /via/ the =param= argument. This encapsulation of the parameters to a +of R functions, the so called core API functions (or =do_= functions). These +functions take standard R data structures as input and return standard R data +types as result and can hence be easily included in other R packages. + +The new user interface aims at simplifying and streamlining the =xcms= workflow +while guaranteeing data integrity and performance also for large scale +metabolomics experiments. Importantly, a simplified access to the original raw +data should be provided throughout the whole metabolomics data analysis workflow. + +# All objects in the new user interface ensuring +# data integrity /via/ validation methods and class versioning, all methods are +# tested internally in extensive unit tests to guarantee proper functionality. + +The new interface re-uses objects from the =MSnbase= Bioconductor package, such as +the =OnDiskMSnExp= object. This object is specifically designed for large scale MS +experiments as it initially reads just the scan header information from the mzML +while the mz-intensity value pairs from all or from selected spectra of a file +are read on demand hence minimizing the memory demand. Also, in contrast to +the old =xcmsRaw= object, the =OnDiskMSnExp= contains information from all files of +an experiment. In addition, all data normalization and adjustment methods +implemented in the =MSnbase= package can be directly applied to the MS data +without the need to re-implement such methods in =xcms=. Results from =xcms= +preprocessings, such as chromatographic peak detection or correspondence are +stored into the new =XCMSnExp= object. This object extends the =OnDiskMSnExp= object +and inherits thus all of its methods including raw data access. + +Class and method/function names follow also a new naming convention trying tp +avoid the partially confusing nomenclature of the original =xcms= methods (such as +the =group= method to perform the correspondence of peaks across samples). To +distinguish them from mass peaks, the peaks identified by the peak detection in +an LS/GC-MS experiment are referred to as /chromatographic peaks/. The respective +method to identify such peaks is hence called =findChromPeaks= and the identified +peaks can be accessed using the =XCMSnExp= =chromPeaks= method. The results from an +correspondence analysis which aims to match and group chromatographic peaks +within and between samples are called /features/. A feature corresponds to +individual ions with a unique mass-to-charge ratio (mz) and a unique retention +time (rt). The definition of such mz-rt features (i.e. the result from the +=groupChromPeaks= method) can be accessed /via/ the =featureDefinitions= method of +the =XCMSnExp= class. Finally, alignment (retention time correction) can be +performed using the =adjustRtime= method. + +The settings for any of the new analysis methods are bundled in /parameter/ +classes, one class for each method. This encapsulation of the parameters to a function into a parameter class (such as =CentWaveParam=) avoids busy function -calls (with many single parameters to specify) and enables saving, reloading and -reusing settings as well as, by adding them to the result objects, enabling the -documentation of the data processing history. In addition, parameter validation -can be performed within the parameter object and hence is no longer required in -the analysis function. - -The implementation of the =detectFeatures= method for =OnDiskMSnExp= and =MSnExp= -objects (from the =MSnbase= package) enable feature detection on these types of -objects that represent the full MS data of an experiment along with all -experiment relevant metadata. Loading the files first into an =MSnExp= or -=OnDiskMSnExp= (the latter being specifically designed for large scale -experiments) enables a first data inspection and quality assessment, followed by -eventual subsetting and filtering (e.g. on retention time) before performing the -feature detection on the thus processed data. - -Below we load the raw data files from the =faahKO= package as an =OnDiskMSnExp= -object using the =readMSData2= function from the =MSnbase= package. +calls (with many single parameters) and enables saving, reloading and reusing +the settings. In addition, the parameter classes are added, along with other +information to the process history of an =XCMSnExp= object thus providing a +detailed documentation of each processing step of an analysis, with the +possibility to recall all settings of the performed analyses at any stage. In +addition, validation of the parameters can be performed within the parameter +object and hence is no longer required in the analysis function. + +The example below illustrates the new user interface. First we load the raw data +files from the =faahKO= package using the =readMSData2= from the =MSnbase= package. #+BEGIN_SRC R :ravel message = FALSE, warning = FALSE ## Reading the raw data using the MSnbase package - library(MSnbase) library(xcms) - ## Load the CDF files from the faahKO + ## Load 6 of the CDF files from the faahKO cdf_files <- dir(system.file("cdf", package = "faahKO"), recursive = TRUE, - full.names = TRUE) + full.names = TRUE)[c(1:3, 7:9)] ## Define the sample grouping. s_groups <- rep("KO", length(cdf_files)) @@ -118,9 +125,10 @@ object using the =readMSData2= function from the =MSnbase= package. #+END_SRC We next plot the total ion chromatogram (TIC) for all files within the -experiment. Note that we are iteratively sub-setting the full data per file, -which for =OnDiskMSnExp= is an efficient way to subset the data while ensuring -that all data, including metadata, stays consistent. +experiment. Note that we are iteratively sub-setting the full data per file +using the =filterFile= method, which, for =OnDiskMSnExp= objects, is an efficient +way to subset the data while ensuring that all data, including metadata, stays +consistent. #+NAME: faahKO-tic #+BEGIN_SRC R :ravel message = FALSE, fig.align = 'center', fig.width = 8, fig.height = 4 @@ -130,17 +138,399 @@ that all data, including metadata, stays consistent. ## Subset the full raw data by file and plot the data. tmp <- filterFile(raw_data, file = 1) plot(x = rtime(tmp), y = tic(tmp), xlab = "retention time", ylab = "TIC", - col = sample_colors[pData(tmp)$sample_group], type = "l") + col = paste0(sample_colors[pData(tmp)$sample_group], 80), type = "l") for (i in 2:length(fileNames(raw_data))) { tmp <- filterFile(raw_data, file = i) points(rtime(tmp), tic(tmp), type = "l", - col = sample_colors[pData(tmp)$sample_group]) + col = paste0(sample_colors[pData(tmp)$sample_group], 80)) } legend("topleft", col = sample_colors, legend = names(sample_colors), lty = 1) #+END_SRC -+ Do the feature detection. -+ Describe the feature detection methods. +Alternatively we can use the =extractChromatograms= method that extracts +chromatograms from the object. In the example below we extract the /base peak +chromatogram/ (BPC) by setting =aggregationFun= to ="max"= and not specifying an =rt= +or =mz= range to extract only a data subset. In contrast to the =tic= and =bpi= +methods, this function reads the data from the raw files. It takes thus more +time to create the plot, but it is based on the actual raw data that is used for +the later analysis - the =tic= and =bpi= methods access only the information that is +stored in the raw data files by the MS detector during the data acquisition. + +#+NAME: faahKO-bpi +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Get the base peak chromatograms. This reads data from the files. + bpis <- extractChromatograms(raw_data, aggregationFun = "max") + ## Plot the list of Chromatogram objects. + plotChromatogram(bpis, col = paste0(sample_colors[pData(raw_data)$sample_group], 80)) + +#+END_SRC + +While the =plotChromatogram= function if very convenient (and fast), it would also +not be too difficult to create the plot manually: + +#+NAME: faahKO-bbpi-manual +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + plot(3, 3, pch = NA, xlim = range(unlist(lapply(bpis, rtime))), + ylim = range(unlist(lapply(bpis, intensity))), main = "BPC", + xlab = "rtime", ylab = "intensity") + for (i in 1:length(bpis)) { + points(rtime(bpis[[i]]), intensity(bpis[[i]]), type = "l", + col = paste0(sample_colors[pData(raw_data)$sample_group[i]], 80)) + } +#+END_SRC + + +Note that we could restrict the analysis to a certain retention time range by +first sub-setting =raw_data= with the =filterRt= method. + +In addition we can plot the distribution of the total ion counts per file. In +contrast to sub-setting the object we split the numeric vector returned by the +=tic= by file using the =fromFile= method that provides the mapping of the +experiment's spectra to the originating files. + +#+NAME: faahKO-tic-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Get the total ion current by file + tc <- split(tic(raw_data), f = fromFile(raw_data)) + boxplot(tc, col = paste0(sample_colors[pData(raw_data)$sample_group], 80), + ylab = "intensity", main = "Total ion current") +#+END_SRC + +The =tic= (and for mzML files) the =bpi= methods are very fast, even for large data +sets, as these information are stored in the header of the raw files avoiding +the need to read the raw data from each file. Also, we could subset the whole +object using the filter functions =filterFile=, =filterRt= or =filterMz= to +e.g. remove problematic samples or restrict the retention time range in which we +want to perform the chromatographic peak detection. + +Next we perform the chromatographic peak detection using the /centWave/ algorithm +\cite{Tautenhahn:2008fx}. In the example below we use most of the standard +parameters, but the settings should be adjusted to each experiment individually +based on e.g. the expected width of the chromatographic peaks etc. + +#+NAME: faahKO-centWave +#+BEGIN_SRC R :ravel message = FALSE, warning = FALSE + ## Defining the settings for the centWave peak detection. + cwp <- CentWaveParam(snthresh = 20, noise = 1000) + xod <- findChromPeaks(raw_data, param = cwp) +#+END_SRC + +The identified peaks can be accessed with the =chromPeaks= parameter which returns +a =matrix=, each line representing an identified peak. Column ="sample"= specifies +in which /sample/ (i.e. file) of the experiment the peak was detected. Below we +plot the signal distribution of the identified peaks per sample. + +#+NAME: faahKO-peak-intensity-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ints <- split(chromPeaks(xod)[, "into"], f = chromPeaks(xod)[, "sample"]) + ints <- lapply(ints, log2) + boxplot(ints, varwidth = TRUE, col = sample_colors[pData(xod)$sample_group], + ylab = expression(log[2]~intensity), main = "Peak intensities") +#+END_SRC + +After peak detection it might be advisable to evaluate whether the peak +detection identified e.g. compounds known to be present in the +sample. Facilitating access to the raw data has thus been one of the major aims +for the updated user interface. + +Next we extract the chromatogram for the rt-mz region corresponding to one +detected chromatographic peak increasing the region in rt dimension by +/- 60 +seconds. In addition we extract also the full chromatogram for the specified mz +range (i.e. the full rt range) and identify all chromatographic peaks in that +region by passing the same =mz= and =rt= parameters to the =chromPeaks= method. + +If two-column matrices are passed to the =extractChromatograms= method with +parameters =rt= and =mz=, the function returns a =list=, each element being a =list= of +=Chromatogram= objects representing the chromatogram for the respective +ranges. + +#+NAME: faahKO-chromPeaks-extractChroms +#+BEGIN_SRC R :ravel warning = FALSE + rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] + ## Increase the range: + rtr[1] <- rtr[1] - 60 + rtr[2] <- rtr[2] + 60 + mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] + + ## Add an rt range that would extract the full chromatogram + rtr <- rbind(c(-Inf, Inf), rtr) + mzr <- rbind(mzr, mzr) + + chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) + + ## In addition we get all peaks detected in the same region + pks <- chromPeaks(xod, rt = rtr, mz = mzr) + pks +#+END_SRC + +Next we plot the extracted chromatogram for the data and highlight in addition +the identified peaks. + +#+NAME: faahKO-extracted-chrom-with-peaks +#+BEGIN_SRC R :ravel message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks. Left: full retention time range, right: rt range of the peak. Each line represents the signal measured in one sample. The rectangles indicate the margins of the identified chromatographic peak in the respective sample.", fig.align = "center", fig.width = 12, fig.height = 6 + ## Plot the full rt range: + plotChromatogram(chrs[[1]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + ## And now for the peak range. + plotChromatogram(chrs[[2]], + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + ## Highlight also the identified chromatographic peaks. + highlightChromPeaks(xod, rt = rtr[2, ], mzr[2, ], + border = paste0(sample_colors[pData(xod)$sample_group], 40)) +#+END_SRC + +Note that the =extractChromatograms= does return an =NA= value if in a certain scan +(i.e. for a specific retention time) no signal was measured in the respective mz +range. This is reflected by the lines not being drawn as continuous lines in the +plot above. + +Next we align the samples using the /obiwarp/ method \cite{Prince:2006jj}. This +method does not require, in contrast to other alignment/retention time +correction methods, any identified peaks and could thus also be applied to an +=OnDiskMSnExp= object. Note that all retention time adjustment methods do also +adjust the retention times reported for the individual peaks in =chromPeaks=. + +#+NAME: faahKO-obiwarp +#+BEGIN_SRC R :ravel message = FALSE + ## Doing the obiwarp alignment using the default settings. + xod <- adjustRtime(xod, param = ObiwarpParam()) +#+END_SRC + +Note that any pre-processing results can be removed at any time using a /drop/ +method, such as =dropChromPeaks=, =dropFeatureDefinitions= or +=dropAdjustedRtime=. + +To evaluate the impact of the alignment we can plot again the BPC of each +sample. In addition we plot the differences of the adjusted to the raw retention +times per sample using the =plotAdjustedRtime= function. + +#+NAME: faahKO-bpi-obiwarp +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8 + ## Get the base peak chromatograms. This reads data from the files. + bpis <- extractChromatograms(xod, aggregationFun = "max") + + par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5)) + plotChromatogram(bpis, + col = paste0(sample_colors[pData(xod)$sample_group[i]], 80)) + ## Plot also the difference of adjusted to raw retention time. + plotAdjustedRtime(xod, col = paste0(sample_colors[pData(xod)$sample_group], 80)) +#+END_SRC + +Too large differences between adjusted and raw retention times could indicate +poorly performing samples or alignment. + +The distribution of retention time differences could also be used for quality +assessment. + +#+NAME: faahKO-adjusted-rtime-boxplot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + ## Calculate the difference between the adjusted and the raw retention times. + diffRt <- rtime(xod) - rtime(xod, adjusted = FALSE) + + ## By default, rtime and most other accessor methods return a numeric vector. To + ## get the values grouped by sample we have to split this vector by file/sample + diffRt <- split(diffRt, fromFile(xod)) + + boxplot(diffRt, col = sample_colors[pData(xod)$sample_group], + main = "Obiwarp alignment results", ylab = "adjusted - raw rt") +#+END_SRC + +The 3rd sample was used as /center/ sample against which all other samples were +aligned to, hence its adjusted retention times are identical to the raw +retention times. + +Below we plot the extracted ion chromatogram for the selected peak from the +example above before and after retention time correction to evaluate the impact +of the alignment. + +#+NAME: faahKO-extracted-chrom-with-peaks-aligned +#+BEGIN_SRC R :ravel echo = FALSE, message = FALSE, fig.cap = "Extracted ion chromatogram for one of the identified peaks before and after alignment.", fig.align = "center", fig.width = 8, fig.height = 8 + rtr <- chromPeaks(xod)[68, c("rtmin", "rtmax")] + ## Increase the range: + rtr[1] <- rtr[1] - 60 + rtr[2] <- rtr[2] + 60 + mzr <- chromPeaks(xod)[68, c("mzmin", "mzmax")] + + chrs <- extractChromatograms(xod, rt = rtr, mz = mzr) + chrs_raw <- extractChromatograms(raw_data, rt = rtr, mz = mzr) + + par(mfrow = c(2, 1)) + plotChromatogram(chrs_raw, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + plotChromatogram(chrs, + col = paste0(sample_colors[pData(xod)$sample_group], 80)) + highlightChromPeaks(xod, rt = rtr, mzr, + border = paste0(sample_colors[pData(xod)$sample_group], 40)) +#+END_SRC + +After alignment, the peaks are nicely overlapping. + +Next we group identified chromatographic peaks across samples. We use the /peak +density/ method \cite{Smith:2006ic} specifying that a chromatographic peak have +to be present in at least 1/3 of the samples within each group to be combined to +a mz-rt /feature/. + +#+NAME: faahKO-groupPeakDensity +#+BEGIN_SRC R :ravel message = FALSE + ## Define the PeakDensityParam + pdp <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + maxFeatures = 300, minFraction = 0.66) + xod <- groupChromPeaks(xod, param = pdp) +#+END_SRC + +The definitions of the features can be accessed with the =featureDefinitions=, +which lists the mz-rt space specific to a feature. Column ="peakidx"= lists the +indices (in the =chromPeaks= matrix) of the individual chromatographic peaks +belonging to the feature. + +#+NAME: faahKO-featureDefinitions +#+BEGIN_SRC R :ravel message = FALSE + head(featureDefinitions(xod)) +#+END_SRC + +To extract /values/ for the features, the =featureValues= method can be used. This +method returns a matrix with rows being the features and column the samples. The +=value= parameter allows to specify the value that should be returned. Below we +extract the ="into"= signal, i.e. the per-peak integrated intensity for each +feature. + +#+NAME: faahKO-featureValues +#+BEGIN_SRC R :ravel message = FALSE + ## Extract the "into" peak integrated signal. + head(featureValues(xod, value = "into")) +#+END_SRC + +After correspondence there will always be features that do not include peaks +from every sample (being it that the peak finding algorithm failed to identify a +peak or that no signal was measured in the respective mz-rt area). For such +features an =NA= is returned by the =featureValues= method. Here, =xcms= allows to +infer values for such missing peaks using the =fillChromPeaks= method. This method +integrates in files where a peak was not found the signal from the mz-rt area +where it is expected and adds it to the =chromPeaks= matrix. Such /filled-in/ peaks +have a value of =1= in the ="is_filled"= column of the =chromPeaks= matrix. + +#+NAME: faahKO-fillPeaks +#+BEGIN_SRC R :ravel message = FALSE + ## Fill in peaks with default settings. Settings can be adjusted by passing + ## a FillChromPeaksParam object to the method. + xod <- fillChromPeaks(xod) + + head(featureValues(xod, value = "into")) +#+END_SRC + +Not for all missing peaks a value could be integrated (because at the respective +location no measurements are available). The peak area from which signal is to +be extracted can also be increased modifying the settings by passing a +=FillChromPeaksParam= object. + +Next we inspect the =processHistory= of the analysis. As described earlier, this +records all (major) processing steps along with the corresponding parameter +classes. + +#+NAME: faahKO-processHistory +#+BEGIN_SRC R :ravel message = FALSE + ## List the full process history + processHistory(xod) +#+END_SRC + +It is also possible to extract specific processing steps by specifying its +type. Available types can be listed with the =processHistoryTypes= function. Below +we extract the parameter class for the alignment/retention time adjustment step. + +#+NAME: faahKO-processHistory-select +#+BEGIN_SRC R :ravel message = FALSE + ph <- processHistory(xod, type = "Retention time correction") + + ## Access the parameter + processParam(ph[[1]]) +#+END_SRC + +As described earlier, we can remove specific analysis results at any +stage. Below we remove the results from the alignment. Since the correspondence +was performed after that processing step its results will be removed too leaving +us only with the results from the peak detection step. + +#+NAME: faahKO-drop-alignment +#+BEGIN_SRC R :ravel message = FALSE + ## Remove the alignment results + xod <- dropAdjustedRtime(xod) + + processHistory(xod) +#+END_SRC + +We can now use a different method to perform the alignment. The /peak groups/ +alignment method bases the alignment of the samples on chromatographic peaks +present in most samples (so called /well behaved/ peaks). This means we have to +perform first an initial correspondence analysis to group peaks within and +across samples. + +#+NAME: faahKO-initial-correspondence +#+BEGIN_SRC R :ravel message = FALSE + ## Define the parameter for the correspondence + pdparam <- PeakDensityParam(sampleGroups = pData(xod)$sample_group, + minFraction = 0.7, maxFeatures = 100) + xod <- groupChromPeaks(xod, param = pdparam) +#+END_SRC + +Before performing the alignment we can also inspect which peak groups might be +selected for alignment based on the provided =PeakGroupsParam= object. + +#+NAME: faahKO-peak-groups-matrix +#+BEGIN_SRC R :ravel message = FALSE + ## Create the parameter class for the alignment + pgparam <- PeakGroupsParam(minFraction = 0.9, span = 0.4) + + ## Extract the matrix with (raw) retention times for the peak groups that would + ## be used for alignment. + adjustRtimePeakGroups(xod, param = pgparam) +#+END_SRC + +If we are not happy with these peak groups (e.g. because we don't have a peak +group for a rather large time span along the retention time axis) we can try +different settings. In addition, we could also /manually/ select certain peak +groups, e.g. for internal controls, and add this matrix with the +=peakGroupsMatrix= method to the =PeakGroupsParam= class. Below we just use =pgparam= +we defined and perform the alignment. This will use the peak groups matrix from +above. + +#+NAME: faahKO-peak-groups-alignment +#+BEGIN_SRC R :ravel message = FALSE + ## Perform the alignment using the peak groups method. + xod <- adjustRtime(xod, param = pgparam) +#+END_SRC + +We can now also plot the difference between adjusted and raw retention times. If +alignment was performed using the /peak groups/ method, also these peak groups are +highlighted in the plot. + +#+NAME: faahKO-peak-groups-alignment-plot +#+BEGIN_SRC R :ravel message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4 + plotAdjustedRtime(xod, col = sample_colors[pData(xod)$sample_group]) +#+END_SRC + +** New naming convention + +Peaks identified in LC/GC-MS metabolomics are referred to as /chromatographic +peaks/ where possible to avoid any misconceptions with /mass peaks/ identified in +mz dimension. + +Methods for data analysis from the original =xcms= code have been renamed to avoid +potential confusions: + ++ *Chromatographic peak detection*: =findChromPeaks= instead of =findPeaks=: for new + functions and methods the term /peak/ is avoided as much as possible, as it is + usually used to describe a mass peak in mz dimension. To clearly distinguish + between these peaks and peaks in retention time space, the latter are referred + to as /chromatographic peak/, or =chromPeak=. + ++ *Correspondence*: =groupChromPeaks= instead of =group= to clearly indicate what is + being grouped. Group might be a sample group or a peak group, the latter being + referred to also by (mz-rt) /feature/. + ++ *Alignment*: =adjustRtime= instead of =retcor= for retention time correction. The + word /cor/ in /retcor/ might be easily misinterpreted as /correlation/ instead of + correction. + ** New data classes @@ -158,11 +548,26 @@ represents a container for the xcms-based preprocessing results while (since it inherits all functionality from its parent class) keeping a direct relation to the (raw) data on which the processing was performed. An additional slot =.processHistory= in the object allows to keep track of all performed processing -steps. Each analysis method, such as =detectFeatures= adds an =XProcessHistory= +steps. Each analysis method, such as =findChromPeaks= adds an =XProcessHistory= object which includes also the parameter class passed to the analysis method. Hence not only the time and type of the analysis, but its exact settings are reported within the =XCMSnExp= object. The =XCMSnExp= is thus equivalent to the -=xcmsSet= from the original =xcms= implementation. +=xcmsSet= from the original =xcms= implementation, but keeps in addition a link to +the raw data on which the preprocessing was performed. + +*** =Chromatogram= + +The =Chromatogram= class allows a data representation that is orthogonal to the +=Spectrum= class defined in =MSnbase=. The =Chromatogram= class stores retention time +and intensity duplets and is designed to accommodate most use cases, from total +ion chromatogram, base peak chromatogram to extracted ion chromatogram and +SRM/MRM ion traces. + +=Chromatogram= objects can be extracted from =XCMSnExp= objects using the +=extractChromatograms= method. + +Note that this class is still considered developmental and might thus undergo +some changes in the future. ** Binning and missing value imputation functions @@ -246,31 +651,49 @@ of the bin with the missing value (=1= by default). The other missing values are set to a base value corresponding to half of the smallest bin value. Both methods thus yield same results, except for bins 15-17 (see Figure above). -** Core feature detection functions +** Core functionality exposed /via/ simple functions -The core logic from the feature detection methods =findPeaks.centWave=, -=findPeaks.massifquant=, =findPeaks.matchedFilter= has been extracted and put into -functions with the common prefix =do_detectFeatures= with the aim, as detailed in -issue [[https://github.com/sneumann/xcms/issues/30][#30]], to separate the core logic from the analysis methods invoked by the -users to enable also the use of the feature detection functions using base R +The core logic from the chromatographic peak detection methods +=findPeaks.centWave=, =findPeaks.massifquant=, =findPeaks.matchedFilter= and +=findPeaks.MSW= and from all alignment (=group.*=) and correspondence (=retcor.*=) +methods has been extracted and put into functions with the common prefix +=do_findChromPeaks=, =do_adjustRtime= and =do_groupChromPeaks=, respectively, with the +aim, as detailed in issue [[https://github.com/sneumann/xcms/issues/30][#30]], to separate the core logic from the analysis +methods invoked by the users to enable also the use these methods using base R parameters (i.e. without specific classes containing the data such as the =xcmsRaw= class). This simplifies also the re-use of these functions in other -packages and simplifies the future implementation of the feature detection +packages and simplifies the future implementation of the peak detection algorithms for e.g. the =MSnExp= or =OnDiskMSnExp= objects from the =MSnbase= Bioconductor package. The implemented functions are: -+ =do_detectFeatures_centWave=: peak density and wavelet based feature detection - for high resolution LC/MS data in centroid mode \cite{Tautenhahn:2008fx}. -+ =do_detectFeatures_matchedFilter=: identification of features in the - chromatographic domain based on matched filtration \cite{Smith:2006ic}. -+ =do_detectFeatures_massifquant=: identification of features using Kalman - filters. ++ *peak detection methods*: + + =do_findChromPeaks_centWave=: peak density and wavelet based peak detection + for high resolution LC/MS data in centroid mode \cite{Tautenhahn:2008fx}. + + =do_findChromPeaks_matchedFilter=: identification of peak in the + chromatographic domain based on matched filtration \cite{Smith:2006ic}. + + =do_findChromPeaks_massifquant=: identification of peaks using Kalman + filters. + + =do_findChromPeaks_MSW=: single spectrum, non-chromatographic peak detection. + ++ *alignment methods*: + + =do_adjustRtime_peakGroups=: perform sample alignment (retention time + correction) using alignment of /well behaved/ chromatographic peaks that are + present in most samples (and are expected to have the same retention time). + ++ *correspondence methods*: + + =do_groupChromPeaks_density=: perform chromatographic peak grouping (within + and across samples) based on the density distribution of peaks along the + retention time axis. + + =do_groupChromPeaks_nearest=: groups peaks across samples similar to the + method implemented in mzMine. + + =do_groupChromPeaks_mzClust=: performs high resolution correspondence on + single spectra samples. One possible drawback from the introduction of this new layer is, that more objects get copied by R which /could/ eventually result in a larger memory demand or performance decrease (while no such was decrease was observed up to now). -** Usability improvements +** Usability improvements in the /old/ user interface + =[= subsetting method for =xcmsRaw= objects that enables to subset an =xcmsRaw= object to specific scans/spectra. @@ -341,11 +764,11 @@ the beginning and the end of the provided numeric vector. This can be disabled (to be compliant with =profBinLin=) by setting parameter =noInterpolAtEnds= to =TRUE= (like in the example above). -** Differences due to updates in =do_detectFeatures_matchedFilter=, respectively =findPeaks.matchedFilter=. +** Differences due to updates in =do_findChromPeaks_matchedFilter=, respectively =findPeaks.matchedFilter=. The original =findPeaks.matchedFilter= (up to version 1.49.7) had several shortcomings and bugs that have been fixed in the new -=do_detectFeatures_matchedFilter= method: +=do_findChromPeaks_matchedFilter= method: + The internal iterative processing of smaller chunks of the full data (also referred to as /iterative buffering/) could result, for some bin (step) sizes to @@ -362,7 +785,7 @@ shortcomings and bugs that have been fixed in the new + The =profBinLin= implementation contains two bugs, one that can result in failing to identify the maximal value in the first and last bin (see issue [[https://github.com/sneumann/xcms/issues/46][#46]]) and one that fails to assign a value to a bin (issue [[https://github.com/sneumann/xcms/issues/49][#49]]). Both are fixed - in the =do_detectFeatures_matchedFilter= implementation. + in the =do_findChromPeaks_matchedFilter= implementation. A detailed description of tests comparing all implementations is available in issue [[https://github.com/sneumann/xcms/issues/52][#52]] on github. Note also that in course of these changes also the =getEIC= @@ -386,6 +809,33 @@ generation uses now the =binYonX= method which fixed some problems in the origin binning and linear interpolation methods. Thus results might be slightly different. +Also, the =retcor.obiwarp= method reports (un-rounded) adjusted retention times, +but adjusts the retention time of eventually already identified peaks using +rounded adjusted retention times. The new =adjustRtime= method(s) does adjust +identified peaks using the reported adjusted retention times (not rounded). This +guarantees that e.g. removing retention time adjustment/alignment results from +an object restores the object to its initial state (i.e. the adjusted retention +times of the identified peaks are reverted to the retention times before +alignment). +See issue [[https://github.com/sneumann/xcms/issues/122][#122]] for more details. + +** =retcor.peaksgroups=: change in the way how /well behaved/ peak groups are ordered + +The =retcor.peakgroups= defines first the chromatographic peak groups that are +used for the alignment of all spectra. Once these are identified, the retention +time of the peak with the highest intensity in a sample for a given peak group +is returned and the peak groups are ordered increasingly by retention time +(which is required for the later fitting of either a polynomial or a linear +model to the data). The selection of the retention time of the peak with the +highest intensity within a feature (peak group) and samples, denoted as +/representative/ peak for a given feature in a sample, ensures that only the +retention time of a single peak per sample and feature is selected (note that +multiple chromatographic peaks within the same sample can be assigned to a +feature). In the original code the ordering of the peak groups was however +performed using the median retention time of the complete peak group (which +includes also potential additional peaks per sample). This has been changed and +the features are ordered now by the median retention time across samples of the +representative chromatographic peaks. ** =scanrange= parameter in all =findPeaks= methods @@ -404,6 +854,30 @@ These problems have been fixed in version 1.51.1 by first sub-setting the =xcmsRaw= object (using the =[= method) before actually performing the feature detection. +** =fillPeaks= (=fillChromPeaks=) differences + +In the original =fillPeaks.MSW=, the mz range from which the signal is to be +integrated was defined using + +#+BEGIN_SRC R :eval = "never", :ravel eval = FALSE + mzarea <- seq(which.min(abs(mzs - peakArea[i, "mzmin"])), + which.min(abs(mzs - peakArea[i, "mzmax"]))) + +#+END_SRC + +Depending on the data this could lead to the inclusion of signal in the +integration that are just outside of the mz range. In the new =fillChromPeaks= +method signal is integrated only for mz values >= mzmin and <= mzmax thus +ensuring that only signal is used that is truly within the peak area defined by +columns ="mzmin"=, ="mzmax"=, ="rtmin"= and ="rtmax"=. + +Also, the =fillPeaks.chrom= method did return ="into"= and ="maxo"= values of =0= if no +signal was found in the peak area. The new method does not integrate any signal +in such cases and does not fill in that peak. + +See also issue [[https://github.com/sneumann/xcms/issues/130][#130]] for more +information. + ** Problems with iterative binning of small data sub-sets in =findPeaks.matchedFilter= :noexport: The problem described here has been fixed in =xcms= >= 1.51.1. @@ -458,8 +932,8 @@ using the =seq= function, but they are not identical. ** Implementation and comparison for =matchedFilter= :noexport: -These results base on the test =dontrun_test_do_detectFeatures_matchedFilter_impl= -defined in /test_do_detectFeatures_matchedFilter.R/ +These results base on the test =dontrun_test_do_findChromPeaks_matchedFilter_impl= +defined in /test_do_findChromPeaks_matchedFilter.R/ We have 4 different functions to test and compare to the original one: + *A*: =.matchedFilter_orig=: it's the original code. @@ -717,13 +1191,19 @@ Here we list all of the functions and related files that are deprecated. * TODOs :noexport: -** TODO Deprecate binning functions. +** DONE Deprecate binning functions. + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] All done except for the retention time correction!!! -** TODO Continue implementing the =do_= functions. -** TODO Define a new object to contain the preprocessing results +** DONE Continue implementing the =do_= functions. + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] +** DONE Define a new object to contain the preprocessing results + CLOSED: [2017-02-23 Thu 07:47] + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] This object should replace in the long run the =xcmsSet= object providing the same functionality while in addition add a better integration of the original raw data files. The object should contain: @@ -758,12 +1238,125 @@ to avoid copying etc of the data. Check also =assayDataElement()= in =MSnbase=. + Rename =peaks= to =features=. + Better alternative for =groups=: =alignedFeatures=. -+ =groupval=? =featureMatrix=. ++ =groupval=? =featureValues=. *** Design and implementation: + =features= should be still implemented as =matrix= (for performance issues). + Alignment information could be implemented as =DataFrame= with the indices added to a column =idx=. +** DONE Rename objects, functions and methods + CLOSED: [2017-02-23 Thu 07:47] + + - State "DONE" from "TODO" [2017-02-23 Thu 07:47] ++ [X] =features=: =chromPeaks=. ++ [X] =hasDetectedFeatures=: =hasChromPeaks=. ++ [ ] feature: chromatographic peak. ++ [X] =detectFeatures=: =findChromPeaks=. ++ [X] =dropFeatures=: =dropChromPeaks=. ++ [X] featureDetection-centWave: findChromPeaks-centWave ++ [X] =validFeatureMatrix=: =validChromPeaksMatrix=. + +Correspondence. ++ [ ] feature groups: features (aligned and grouped chromatographic peaks). ++ [X] =groupFeatures=: =groupChromPeaks=. ++ [X] =hasAlignedFeatures=: =hasFeatures=. ++ [X] =featureGroups=: =featureDefinitions=, =featureValue= (=groupval=). ++ [X] =FeatureDensityParam=: =PeakDensityParam=. ++ [X] =NearestFeaturesParam=: =NearestPeaksParam= ++ [ ] feature alignment methods: peak alignment methods ++ [X] =$features=: =$chromPeaks=. ++ [X] =featureidx=: =peakidx=. ++ [X] =featureIndex=: =peakIndex=. ++ [X] =dropFeatureGroups=: =dropFeatureDefinitions=. ++ [ ] Peak alignment: Peak grouping ++ [X] =.PROCSTEP.PEAK.ALIGNMENT=: =.PROCSTEP.PEAK.GROUPING=. + +Param classes: ++ [X] =extraFeatures=: =extraPeaks=. + +RT correction. ++ [X] =featureGroups= retention time correction: =peakGroups=. ++ [X] =FeatureGroupsParam=: =PeakGroupsParam=. ++ [X] =features=: =peaks= ++ [X] =featureIndex=: =peakIndex= ++ [X] =getFeatureGroupsRtMatrix=: =getPeakGroupsRtMatrix= ++ [X] =applyRtAdjToFeatures=: =applyRtAdjToPeaks=. ++ [X] =do_groupFeatures_mzClust=: =do_groupPeaks_mzClust=. + ++ [X] Check =maxFeatures= parameter for =do_groupChromPeaks_density=. Is it really + the maximum number of features, or of peaks? + ++ [X] Alignment: retention time correction between samples + \cite{Sugimoto:2012jt}. ++ [X] Correspondence: (grouping) registration of recurring signals from the same + analyte over replicate samples \cite{Smith:2014di}. + + +** TODO Implement the =Chromatogram= class + +Now, to accommodate all possibilities: +https://en.wikipedia.org/wiki/Triple_quadrupole_mass_spectrometer +Triple Q-TOF measurements: ++ Product Ion Scan + - Q1 fixed + - Q3 scan ++ Precursor Ion Scan + - Q1 scan + - Q3 fixed ++ Neutral Loss Scan + - Q1 scan at mz = m_{product} + - Q3 scan at mz = m_{product} - m_{neutral molecule} ++ Selected Reaction monitoring (SRM, MRM): Q1 is used to select the precursor + ion, Q3 cycles through the product ions. Precursor/product pair is referred to + as a /transition/. + - Q1 fixed at mz = m_{precursor} + - Q3 scan at mz = m_{product} + + +Other resources: +https://en.wikipedia.org/wiki/Mass_chromatogram#Selected-ion_monitoring_chromatogram_.28SIM.29 +http://proteowizard.sourceforge.net/dox/structpwiz_1_1msdata_1_1_chromatogram.html +https://sourceforge.net/p/proteowizard/mailman/message/27571266/ + +** TODO Implement a =findBackgroundIons= method + +Check on one of our own files. + +#+BEGIN_SRC R + library(xcms) + + rd <- readMSData2("/Volumes/Ext64/data/2016/2016-11/NoSN/250516_QC_NORM_3_POS_3.mzML") + + ## Evaluate the mz-rt matrix - can we spot already something there? + sps <- spectra(rd) + dfs <- lapply(sps, as.data.frame) + ## cut the intensities at 5000 + dfs <- lapply(dfs, function(z) { + z[z[, "i"] > 5000, "i"] <- 5000 + return(z) + }) + + library(RColorBrewer) + library(lattice) + colR <- colorRampPalette(brewer.pal(9, "YlOrRd"))(255) + brks <- do.breaks(c(0, 5000), length(colR)) + + mzR <- range(mz(rd)) + rtR <- range(rtime(rd)) + + plot(3, 3, pch = NA, xlim = rtR, ylim = mzR) + for(i in 1:length(dfs)) { + intC <- level.colors(dfs[[i]]$i, at = brks, col.regions = colR) + xs <- rep(rtime(rd)[i], length(intC)) + points(x = xs, y = dfs[[i]]$mz, col = intC, cex = 0.1, pch = 16) + } + ## level.colors(x, at = brks, col.regions = colR) +#+END_SRC + +A simple approach would be to walk along the mz and evaluate whether, for a +certain mz (bin?) the signal is higher than a threshold in 70% of the spectra, +i.e. that the % of values is larger than a percentage. + * References diff --git a/vignettes/references.bib b/vignettes/references.bib index 4b4f86c1b..e17352a13 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -23,3 +23,14 @@ @article{Smith:2006ic month = feb } +@article{Prince:2006jj, +author = {Prince, John T and Marcotte, Edward M}, +title = {{Chromatographic alignment of ESI-LC-MS proteomics data sets by ordered bijective interpolated warping.}}, +journal = {Analytical chemistry}, +year = {2006}, +volume = {78}, +number = {17}, +pages = {6140--6152}, +month = sep +} + diff --git a/vignettes/xcmsPreprocess.Rnw b/vignettes/xcmsPreprocess.Rnw-notrun similarity index 99% rename from vignettes/xcmsPreprocess.Rnw rename to vignettes/xcmsPreprocess.Rnw-notrun index 375d25921..ac8d0b6c5 100755 --- a/vignettes/xcmsPreprocess.Rnw +++ b/vignettes/xcmsPreprocess.Rnw-notrun @@ -53,6 +53,9 @@ corresponding to each step are also given.} library(multtest) library(xcms) library(faahKO) +## Disable default parallel processing since on some platforms it might +## cause delays. +register(SerialParam()) @ \section{Raw Data File Preparation}