diff --git a/DESCRIPTION b/DESCRIPTION index bbbb1f5..ebf1bb9 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: SpaceMarkers Title: Spatial Interaction Markers -Version: 0.99.8 +Version: 1.11.1 Authors@R: c( person(given = "Atul", family = "Deshpande", email = "adeshpande@jhu.edu", role = c("aut", "cre"), comment = c(ORCID="0000-0001-5144-6924")), person(given = "Ludmila", family = "Danilova", email = "ldanilo1@jhmi.edu", role = "ctb"), @@ -54,5 +54,5 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: false Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 License: MIT + file LICENSE diff --git a/Dockerfile b/Dockerfile index 203edb6..0b798eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -#generated from 69d000eee2de41886a5732a1436be4a98f080dcf +#generated from 5e8ce2188e8157bfd86d5eb614499004cc4a5b65 # --platform=linux/amd64 to avoid 'no match for platform in the manifest' on M1 FROM rocker/tidyverse:4 diff --git a/NEWS.md b/NEWS.md index e48fcb9..fb3d7a9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# SoaceMarkers development version + +* `getSpatialFeatures`: add default method to infer the object passed to it. + # SpaceMarkers 0.1.0 * Added a `NEWS.md` file to track changes to the package. diff --git a/R/preprocessing.R b/R/preprocessing.R index e4e11ed..69c3923 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -141,12 +141,12 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ #' #' @param filePath A string path to the location of the file containing the #' spatial features. -#' @param method A string specifying the method used to obtain spatial -#' features. e.g., "CoGAPS", "Seurat", or "BayesTME". +#' @param method A string specifying the type of object to obtain spatial +#' feature from. Default NULL, where the method is inferred based on object +#' type. Other methods are: "CoGAPS", "Seurat", or "BayesTME". #' @param featureNames An array of strings specifying the column names -#' corresponding to the feature names. If input is NULL, in the case of CoGAPS -#' and BayesTME, all features are selected In the case of Seurat, all metadata -#' columns with "_Feature" suffix are selected. +#' corresponding to the feature names or a regex string. In the case of Seurat, +#' all metadata columns with "_Feature" suffix are selected. #' @return a matrix of spatial features with barcodes associated #' with individual coordinates #' @examples @@ -158,52 +158,108 @@ load10XCoords <- function(visiumDir, resolution = "lowres", version = NULL){ #' head(spFeatures) #' -getSpatialFeatures <- function(filePath,method = "CoGAPS",featureNames = NULL){ - if(method=="CoGAPS"){ - spFeatures <- readRDS(filePath) - spFeatures <- slot(spFeatures,"sampleFactors") - } else if(method=="BayesTME"){ - hf <- hdf5r::h5file(filename = filePath, mode='r') - spFeatures <- t(hdf5r::readDataSet( - hf[["obsm/bayestme_cell_type_counts"]])) - barcodes <- hdf5r::readDataSet(hf[["obs/_index"]]) - rownames(spFeatures) <- barcodes - if (is.null(colnames(spFeatures))) - colnames(spFeatures)<-paste0("BayesTME_",seq(1,ncol(spFeatures))) - } else if(method=="Seurat"){ - spFeatures <- readRDS(filePath) - spFeatures <- spFeatures[[]] - } else {stop("Method not supported.")} - if(is.null(featureNames)){ - featureNames <- colnames(spFeatures) - message("No feature names provided. Using all available features.") - if(method=="Seurat") { - featureNames <- grepl(colnames(spFeatures),pattern = "_feature", - ignore.case = TRUE) - message("Using all metadata columns with '_Feature' suffix.") +getSpatialFeatures <- function(filePath, method = NULL, featureNames = "."){ + + #read the features object based on the format + spObject <- .readFormat(filePath) + + #determine the method to use for feature extractioin + method <- .inferMethod(spObject, method) + + spFun <- c("CoGAPS"=.getCogapsFeatures, + "BayesTME"=.getBTMEfeatures, + "Seurat"=.getSeuratFeatures) + + spFeatures <- spFun[[method]](spObject) + + dataNames <- colnames(spFeatures) + + #subset the features based on the featureNames + if(length(featureNames) == 1) { + #assume regex is provided + namePattern <- featureNames + featureNames <- dataNames[grepl(pattern = namePattern, + dataNames, ignore.case = TRUE)] + if(length(featureNames) == 0) { + stop(sprintf("Regex %s does not match any feature.", namePattern)) } - } else{ - if (length(featureNames) == 1){ - featureNames <- colnames(spFeatures)[grepl(pattern=featureNames, - colnames(spFeatures), - ignore.case = TRUE)] - message("Only one featureName provided. - Assuming input is regular expression.") - if (length(featureNames) == 0) - stop("No features found with matching regular expression. - Please check your input.") - else - message("Found ",length(featureNames), - " features matching the regular expression.") + } else if(!all(featureNames %in% dataNames)) { + stop("Some of the features were not found:", + sprintf(" %s", setdiff(featureNames, dataNames))) + } + + featureNames <- intersect(featureNames, dataNames) + spFeatures <- spFeatures[,featureNames, drop = FALSE] + + return(spFeatures) +} + +#' readFormat +#' Reads a format into an R object +#' @keywords internal +#' +.readFormat <- function(path){ + if(grepl(".rds",path)){ + obj <- readRDS(path) + } else if (grepl(".h5ad",path)){ + obj <- hdf5r::h5file(filename = path, mode='r') + } else { + stop("File format not supported.") + } + return(obj) +} + +#' inferMethod +#' Infer the method used to obtain spatial features +#' @keywords internal +.inferMethod <- function(spObject, method){ + if(is.null(method)){ + if(inherits(spObject, "H5File")){ + method <- "BayesTME" + } else if(inherits(spObject, "CogapsResult")){ + method <- "CoGAPS" } - else - featureNames <- intersect(featureNames,colnames(spFeatures)) - if(!is.null(featureNames)) - spFeatures <- spFeatures[,featureNames] - else - stop("No features found in the spatial - data with provided feature names.") } - spFeatures <- spFeatures[,featureNames] + return(method) +} + +#' .getCogapsFeatures +#' Load features CoGAPS object +#' @keywords internal +#' +.getCogapsFeatures <- function(obj){ + spFeatures <- slot(obj, "sampleFactors") return(spFeatures) } + +#' .getBTMEfeatures +#' Load features BayesTME object +#' +#' @keywords internal +#' +.getBTMEfeatures <- function(hf){ + feat_loc <- "obsm/bayestme_cell_type_counts" + barc_loc <- "obs/_index" + spFeatures <- t(hdf5r::readDataSet(hf[[feat_loc]])) + barcodes <- hdf5r::readDataSet(hf[[barc_loc]]) + rownames(spFeatures) <- barcodes + if (is.null(colnames(spFeatures))) { + colnames(spFeatures)<-paste0("BayesTME_",seq(1,ncol(spFeatures))) + } + + return(spFeatures) +} + +#' .getSeuratFeatures +#' Load features Seurat object +#' @keywords internal +#' +.getSeuratFeatures <- function(obj){ + spFeatures <- slot(obj, "meta.data") + selection <- grepl("_Feature",colnames(spFeatures), ignore.case = TRUE) + if (!any(selection)){ + stop("No _feature columns found in Seurat object.") + } + spFeatures <- spFeatures[,selection, drop = FALSE] + return(spFeatures) +} \ No newline at end of file diff --git a/data/cogaps_result.rda b/data/cogaps_result.rda deleted file mode 100644 index 1c8054d..0000000 Binary files a/data/cogaps_result.rda and /dev/null differ diff --git a/man/cogaps_result.Rd b/man/cogaps_result.Rd deleted file mode 100644 index 3a64709..0000000 --- a/man/cogaps_result.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\name{cogaps_result} -\alias{cogaps_result} -\title{Latent Feature Space for each pattern} -\format{ -CogapsResult object with 24228 features and 6 samples: -\describe{ -\item{featureLoadings}{Data frame of Gene for each pattern} -\item{sampleFactors}{Data frame of cell barcodes and the 5 patterns} -} -} -\value{ -A matrix of statistics for each pattern across each barcode -} -\description{ -A CoGAPS object where the major requirements for SpaceMarkers are the -matrices of genes, barcodes and patterns learned of the latent-feature space -} diff --git a/man/dot-getBTMEfeatures.Rd b/man/dot-getBTMEfeatures.Rd new file mode 100644 index 0000000..197c370 --- /dev/null +++ b/man/dot-getBTMEfeatures.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{.getBTMEfeatures} +\alias{.getBTMEfeatures} +\title{.getBTMEfeatures +Load features BayesTME object} +\usage{ +.getBTMEfeatures(hf) +} +\description{ +.getBTMEfeatures +Load features BayesTME object +} +\keyword{internal} diff --git a/man/dot-getCogapsFeatures.Rd b/man/dot-getCogapsFeatures.Rd new file mode 100644 index 0000000..23652c7 --- /dev/null +++ b/man/dot-getCogapsFeatures.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{.getCogapsFeatures} +\alias{.getCogapsFeatures} +\title{.getCogapsFeatures +Load features CoGAPS object} +\usage{ +.getCogapsFeatures(obj) +} +\description{ +.getCogapsFeatures +Load features CoGAPS object +} +\keyword{internal} diff --git a/man/dot-getSeuratFeatures.Rd b/man/dot-getSeuratFeatures.Rd new file mode 100644 index 0000000..29b60ab --- /dev/null +++ b/man/dot-getSeuratFeatures.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{.getSeuratFeatures} +\alias{.getSeuratFeatures} +\title{.getSeuratFeatures +Load features Seurat object} +\usage{ +.getSeuratFeatures(obj) +} +\description{ +.getSeuratFeatures +Load features Seurat object +} +\keyword{internal} diff --git a/man/dot-inferMethod.Rd b/man/dot-inferMethod.Rd new file mode 100644 index 0000000..bc28210 --- /dev/null +++ b/man/dot-inferMethod.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{.inferMethod} +\alias{.inferMethod} +\title{inferMethod +Infer the method used to obtain spatial features} +\usage{ +.inferMethod(spObject, method) +} +\description{ +inferMethod +Infer the method used to obtain spatial features +} +\keyword{internal} diff --git a/man/dot-readFormat.Rd b/man/dot-readFormat.Rd new file mode 100644 index 0000000..b2789f5 --- /dev/null +++ b/man/dot-readFormat.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{.readFormat} +\alias{.readFormat} +\title{readFormat +Reads a format into an R object} +\usage{ +.readFormat(path) +} +\description{ +readFormat +Reads a format into an R object +} +\keyword{internal} diff --git a/man/getSpatialFeatures.Rd b/man/getSpatialFeatures.Rd index ff49895..c859e22 100644 --- a/man/getSpatialFeatures.Rd +++ b/man/getSpatialFeatures.Rd @@ -5,19 +5,19 @@ \title{getSpatialFeatures Load spatial features} \usage{ -getSpatialFeatures(filePath, method = "CoGAPS", featureNames = NULL) +getSpatialFeatures(filePath, method = NULL, featureNames = ".") } \arguments{ \item{filePath}{A string path to the location of the file containing the spatial features.} -\item{method}{A string specifying the method used to obtain spatial -features. e.g., "CoGAPS", "Seurat", or "BayesTME".} +\item{method}{A string specifying the type of object to obtain spatial +feature from. Default NULL, where the method is inferred based on object +type. Other methods are: "CoGAPS", "Seurat", or "BayesTME".} \item{featureNames}{An array of strings specifying the column names -corresponding to the feature names. If input is NULL, in the case of CoGAPS -and BayesTME, all features are selected In the case of Seurat, all metadata -columns with "_Feature" suffix are selected.} +corresponding to the feature names or a regex string. In the case of Seurat, +all metadata columns with "_Feature" suffix are selected.} } \value{ a matrix of spatial features with barcodes associated diff --git a/tests/testthat/assets/btme.h5ad b/tests/testthat/assets/btme.h5ad new file mode 100644 index 0000000..ab8ffdb Binary files /dev/null and b/tests/testthat/assets/btme.h5ad differ diff --git a/tests/testthat/assets/cogaps.rds b/tests/testthat/assets/cogaps.rds new file mode 100644 index 0000000..658e767 Binary files /dev/null and b/tests/testthat/assets/cogaps.rds differ diff --git a/tests/testthat/assets/create_btme.py b/tests/testthat/assets/create_btme.py new file mode 100644 index 0000000..cb03f5e --- /dev/null +++ b/tests/testthat/assets/create_btme.py @@ -0,0 +1,22 @@ +from bayestme import data, synthetic_data +stdata = synthetic_data.generate_demo_dataset() + +from bayestme import deconvolution +from bayestme.common import InferenceType +best_spatial_smoothing_parameter = 1000.0 +best_n_components = 3 + +deconvolution_result = deconvolution.sample_from_posterior( + data=stdata, + n_components=best_n_components, + spatial_smoothing_parameter=best_spatial_smoothing_parameter, + n_samples=100, + n_svi_steps=10_000, + expression_truth=None, + use_spatial_guide=True) + +data.add_deconvolution_results_to_dataset( + stdata, deconvolution_result +) + +stdata.save('btme.h5ad') \ No newline at end of file diff --git a/tests/testthat/assets/create_cogaps.R b/tests/testthat/assets/create_cogaps.R new file mode 100644 index 0000000..2e506b8 --- /dev/null +++ b/tests/testthat/assets/create_cogaps.R @@ -0,0 +1,5 @@ +library(CoGAPS) +data(GIST) +cg <- CoGAPS(GIST.data_frame, nPatterns = 3 , nIterations = 100) +saveRDS(cg, "cogaps.rds") + diff --git a/tests/testthat/test-preprocessing.getSpatialFeatures.R b/tests/testthat/test-preprocessing.getSpatialFeatures.R new file mode 100644 index 0000000..7dd2379 --- /dev/null +++ b/tests/testthat/test-preprocessing.getSpatialFeatures.R @@ -0,0 +1,65 @@ +#reading input files and extracting features + +test_that(".readFormat can read cogaps and btme files", { + expect_no_error(.readFormat("assets/cogaps.rds")) + expect_no_error(.readFormat("assets/btme.h5ad")) +}) + +test_that(".getCogapsFeatures can read features from a CoGAPS object",{ + cg <- .readFormat("assets/cogaps.rds") + sf <- .getCogapsFeatures(cg) + expect_equal(ncol(sf), 3) +}) + +test_that(".getBTMEfeatures can read features from Anndata object",{ + bt <- .readFormat("assets/btme.h5ad") + sf <- .getBTMEfeatures(bt) + expect_equal(ncol(sf), 3) +}) + +test_that(".inferMethod can infer the method used to obtain spatial features", { + expect_equal(.inferMethod(.readFormat("assets/cogaps.rds"), NULL), "CoGAPS") + expect_equal(.inferMethod(.readFormat("assets/btme.h5ad"), NULL), "BayesTME") +}) + +#main function tests +test_that("getSpatialFeatures fails with unsupported method",{ + expect_error(getSpatialFeatures("assets/cogaps.rds", method = "unsupported")) +}) + +test_that("getSpatialFeatures fails with no matching feature names",{ + expect_error(getSpatialFeatures("assets/cogaps.rds", method = "CoGAPS", featureNames = "no_match"), + "Regex no_match does not match any feature.") +}) + +test_that("getSpatialFeatures work with custom regex", { + sf <- getSpatialFeatures("assets/cogaps.rds", method = "CoGAPS", featureNames = "_1") + expect_equal(ncol(sf), 1) +}) + +test_that("getSpatialFeatures works with a feature name set", { + sf <- getSpatialFeatures("assets/cogaps.rds", method = "CoGAPS", featureNames = c("Pattern_1", "Pattern_2")) + expect_equal(ncol(sf), 2) +}) + +test_that("getSpatialFeatures warns if some of the features are not found", { + expect_error(getSpatialFeatures("assets/cogaps.rds", method = "CoGAPS", + featureNames = c("Pattern_1", "Pattern_2", "Pattern_4")), + "Some of the features were not found: Pattern_4") +}) + +test_that("getSpatialFeatures warns if some of the features are not found", { + expect_error(getSpatialFeatures("assets/cogaps.rds", method = "CoGAPS", + featureNames = c("Pattern_1", "Pattern_2", "Pattern_3", "qq", "ww")), + "Some of the features were not found: qq ww") +}) + +test_that("getSpatialFeatures works in infer mode with Cogaps object", { + sf <- getSpatialFeatures("assets/cogaps.rds") + expect_equal(ncol(sf), 3) +}) + +test_that("getSpatialFeatures works in infer mode with Cogaps object", { + sf <- getSpatialFeatures("assets/btme.h5ad") + expect_equal(ncol(sf), 3) +}) \ No newline at end of file