From 575c12b6b6a4c1700343d33d27ebb809450f5c8a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Tue, 17 Sep 2024 13:54:05 -0400 Subject: [PATCH] [r] Add support for reading v5 assays from an axis query Seurat v5 adds support for ragged arrays, where not every `X` layer has exactly the same cells and features. To handle this, ragged `X` layers need to be re-indexed and re-shaped on ingestion to resize down to only the data present Modified SOMA methods: - `SOMAExperimentAxisQuery$to_seurat()` and `SOMAExperimentAxisQuery$to_seurat_assay()`: now read in as v5 assays New SOMA methods: - `SOMAExperimentAxisQuery$private$.to_seurat_assay_v5()`: helper method to read in ragged and non-ragged arrays into a v5 assay; note this method only handles expression layers, all other assay-level information is handled by parent `$to_seurat_assay()` to share code with v3 assay outgestion Requires #2523 and #3007 [SC-52261](https://app.shortcut.com/tiledb-inc/story/52261/) --- apis/r/R/SOMAExperimentAxisQuery.R | 245 +++++++++++++- apis/r/man/SOMAExperimentAxisQuery.Rd | 12 +- .../tests/testthat/helper-test-soma-objects.R | 200 ++++++++++++ .../tests/testthat/test-SeuratOutgest-assay.R | 300 ++++++++++++++++++ 4 files changed, 739 insertions(+), 18 deletions(-) diff --git a/apis/r/R/SOMAExperimentAxisQuery.R b/apis/r/R/SOMAExperimentAxisQuery.R index 0408e87bb4..db2ad29a16 100644 --- a/apis/r/R/SOMAExperimentAxisQuery.R +++ b/apis/r/R/SOMAExperimentAxisQuery.R @@ -107,10 +107,19 @@ SOMAExperimentAxisQuery <- R6::R6Class( ) # TODO: Stop converting to vectors when SOMAArrayReader supports arrow arrays - x_layer$read(coords = list( - self$obs_joinids()$as_vector(), - self$var_joinids()$as_vector() - )) + coords <- list( + soma_dim_0 = self$obs_joinids()$as_vector(), + soma_dim_1 = self$var_joinids()$as_vector() + ) + # Handle ragged arrays + shape <- tryCatch( + x_layer$maxshape(), + error = function(...) x_layer$shape() + ) + for (i in seq_along(along.with = coords)) { + coords[[i]] <- coords[[i]][coords[[i]] < shape[i]] + } + return(x_layer$read(coords)) }, #' @description Retrieves an `obsm` layer as a \code{\link{SOMASparseNDArrayRead}} @@ -245,7 +254,6 @@ SOMAExperimentAxisQuery <- R6::R6Class( return(varp_layer$read(coords = list(var_ids, var_ids))) }, - #' @description Reads the entire query result as a list of #' [`arrow::Table`]s. This is a low-level routine intended to be used by #' loaders for other in-core formats, such as `Seurat`, which can be created @@ -516,6 +524,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( #' @param varm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers(axis = 'varm')} #' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()} #' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns + #' @param version Assay version to read query in as; by default, will try to + #' infer assay type from the measurement itself #' #' @return A \code{\link[SeuratObject]{Seurat}} object #' @@ -528,7 +538,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( obsm_layers = NULL, varm_layers = NULL, obsp_layers = NULL, - drop_levels = FALSE + drop_levels = FALSE, + version = NULL ) { check_package("SeuratObject", version = .MINIMUM_SEURAT_VERSION()) op <- options(Seurat.object.assay.version = "v3") @@ -572,7 +583,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( X_layers = X_layers, obs_index = obs_index, var_index = var_index, - var_column_names = var_column_names + var_column_names = var_column_names, + version = version ) op <- options(Seurat.object.assay.calcn = FALSE) on.exit(options(op), add = TRUE, after = FALSE) @@ -739,6 +751,8 @@ SOMAExperimentAxisQuery <- R6::R6Class( #' @param var_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index(axis = 'var')} #' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')} #' @param drop_levels Drop unused levels from \code{var} factor columns + #' @param version Assay version to read query in as; by default, will try to + #' infer assay type from the measurement itself #' #' @return An \code{\link[SeuratObject]{Assay}} object #' @@ -747,16 +761,13 @@ SOMAExperimentAxisQuery <- R6::R6Class( obs_index = NULL, var_index = NULL, var_column_names = NULL, - drop_levels = FALSE + drop_levels = FALSE, + version = NULL ) { - version <- "v3" - check_package("SeuratObject", version = .MINIMUM_SEURAT_VERSION()) - op <- options(Seurat.object.assay.version = "v3") - on.exit(options(op), add = TRUE) + check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION()) stopifnot( - "'X_layers' must be a named character vector" = is.character(X_layers) && - is_named(X_layers, allow_empty = FALSE), - "'version' must be a single character value" = is_scalar_character(version), + "'version' must be a single character value" = is.null(version) || + is_scalar_character(version), "'obs_index' must be a single character value" = is.null(obs_index) || (is_scalar_character(obs_index) && !is.na(obs_index)), "'var_index' must be a single character value" = is.null(var_index) || @@ -767,7 +778,25 @@ SOMAExperimentAxisQuery <- R6::R6Class( "'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) || isFALSE(drop_levels) ) - match.arg(version, choices = "v3") + # assay_hint <- names(.assay_version_hint()) + assay_hint <- 'soma_ecosystem_seurat_assay_version' + # Get the assay version + version <- version %||% + # self$ms$get_metadata(names(.assay_version_hint())) %||% + self$ms$get_metadata(assay_hint) %||% + 'v3' + match.arg(version, choices = c('v3', 'v5')) + op <- options(Seurat.object.assay.version = version) + on.exit(options(op), add = TRUE) + # Check our X_layers + if (version == 'v3') { + stopifnot( + "'X_layers' must be a named character vector" = is.character(X_layers) && + is_named(X_layers, allow_empty = FALSE) + ) + } else { + stopifnot("'X_layers' must be a character vector" = is.character(X_layers)) + } features <- if (is.null(var_index)) { paste0("feature", self$var_joinids()$as_vector()) } else { @@ -804,6 +833,22 @@ SOMAExperimentAxisQuery <- R6::R6Class( cells = cells, features = features ) + }, + v5 = { + # cells_hint <- .assay_obs_hint(private$.measurement_name) + cells_hint <- sprintf( + "soma_ecosystem_seurat_assay_cells_%s", + private$.measurement_name + ) + if (cells_hint %in% private$.experiment$obs$colnames()) { + cells_idx <- private$.load_df('obs', column_names = cells_hint)[[cells_hint]] + cells <- cells[cells_idx] + } + private$.to_seurat_assay_v5( + layers = X_layers, + cells = cells, + features = features + ) } ) # Set the key @@ -1593,6 +1638,174 @@ SOMAExperimentAxisQuery <- R6::R6Class( } # Return the assay validObject(obj) + return(obj) + }, + .to_seurat_assay_v5 = function(layers, cells, features) { + check_package('SeuratObject', version = '5.0.2') + + # Create dummy layer to initialize v5 assay + lname <- SeuratObject::RandomName(length = 7L) + i <- 0L + while (lname %in% layers) { + lname <- SeuratObject::RandomName(length = 7L + i) + i <- i + 1L + } + + # Get our metadata hints + # ragged_hint <- .ragged_array_hint() + ragged_hint <- list(soma_ecosystem_seurat_v5_ragged = 'ragged') + # default_hint <- names(.layer_hint(lname)) + default_hint <- names(list(soma_ecosystem_seurat_v5_default_layers = lname)) + # type_hint <- names(.type_hint(NULL)) + r_type_hint <- names(list(soma_r_type_hint = NULL)) + s4_type <- paste0('^', .standard_regexps()$valid_package_name, ':') + + # Check arguments + stopifnot( + "'layers' must be a character vector" = is.character(x = layers), + "'cells' must be a character vector" = is.character(cells), + "'features' must be a character vector" = is.character(features) + ) + + # Check our dimnames + if (length(cells) > self$n_obs) { + stop( + "'cells' must have a length less than or equal to ", + self$n_obs, + call. = FALSE + ) + } + if (length(features) > self$n_vars) { + stop( + "'features' must have a length less than or equal to ", + self$n_vars, + call. = FALSE + ) + } + dnames <- list(features, cells) + + # Find the default layers + default_layers <- self$ms$X$get_metadata(default_hint) + if (!is.null(default_layers) && grepl(pattern = '^[', x = default_layers)) { + check_package('jsonlite') + default_layers <- jsonlite::fromJSON(default_layers) + } + + # Initialize our dummy layer + counts <- list(Matrix::Matrix( + data = 0L, + nrow = length(features), + ncol = length(cells), + sparse = TRUE + )) + names(counts) <- lname + obj <- SeuratObject::.CreateStdAssay( + counts = counts, + cells = cells, + features = features, + transpose = FALSE + ) + + # Read in layers + for (lyr in layers) { + lyr_hint <- self$ms$X$get(lyr)$get_metadata(names(ragged_hint)) + type_hint <- self$ms$X$get(lyr)$get_metadata(r_type_hint) + dnames <- self$ms$X$get(lyr)$dimnames() + attrn <- self$ms$X$get(lyr)$attrnames() + pkg <- NULL + if (!is.null(type_hint)) { + if (grepl(pattern = '^[', x = type_hint)) { + if (!requireNamespace('jsonlite', quietly = TRUE)) { + warning(warningCondition( + sprintf( + "Layer '%s' is typed as '%s', but package 'jsonlite' is unavailable", + lyr, + type_hint + ), + class = "packageNotFoundWarning" + )) + next + } + type_hint <- jsonlite::fromJSON(type_hint) + } else if (grepl(pattern = s4_type, x = type_hint)) { + pkg <- strsplit(type_hint, split = ':')[[1L]][1L] + type_hint <- strsplit(type_hint, split = ':')[[1L]][2L] + if (!requireNamespace(pkg, quietly = TRUE)) { + warning(warningCondition( + sprintf( + "Layer '%s' is typed as '%s:%s', but package '%s' is unavailable", + lyr, + pkg, + type_hint, + pkg + ), + class = "packageNotFoundWarning" + )) + next + } + } + } + lcells <- cells + lfeatures <- features + if (is.null(lyr_hint) || lyr_hint != ragged_hint[[1L]]) { + mat <- Matrix::t(self$to_sparse_matrix( + collection = 'X', + layer_name = lyr + )) + } else { + tbl <- tryCatch( + self$X(lyr)$tables()$concat(), + error = function(...) { + warning(warningCondition( + sprintf("Layer '%s' falls outside the query condition, skipping...", lyr), + class = "unqueryableLayerWarning" + )) + return(NULL) + } + ) + if (is.null(tbl)) { + next + } + sdx <- vector("list", length = length(dnames)) + names(sdx) <- dnames + for (i in dnames) { + sdx[[i]] <- sort(unique(tbl[[i]]$as_vector())) + tbl[[i]] <- match(tbl[[i]]$as_vector(), sdx[[i]]) - 1L + } + lcells <- lcells[match(sdx[[dnames[1L]]], self$obs_joinids()$as_vector())] + lfeatures <- lfeatures[match(sdx[[dnames[2L]]], self$var_joinids()$as_vector())] + mat <- Matrix::t(Matrix::sparseMatrix( + i = tbl[[dnames[1L]]]$as_vector(), + j = tbl[[dnames[2L]]]$as_vector(), + x = tbl[[attrn[1L]]]$as_vector(), + index1 = FALSE, + repr = "T" + )) + } + if (!is.null(type_hint)) { + mat <- suppressWarnings(methods::as(mat, type_hint)) + } + SeuratObject::LayerData( + obj, + layer = lyr, + features = lfeatures, + cells = lcells + ) <- mat + } + + # Remove dummy layer + if (length(SeuratObject::Layers(obj)) == 1L) { + stop(errorCondition( + "None of the requested layers were queryable", + class = "unqueryableLayerError" + )) + } + SeuratObject::DefaultLayer(obj) <- default_layers %||% setdiff( + SeuratObject::Layers(obj), + lname + )[1L] + SeuratObject::LayerData(obj, layer = lname) <- NULL + return(obj) } ) diff --git a/apis/r/man/SOMAExperimentAxisQuery.Rd b/apis/r/man/SOMAExperimentAxisQuery.Rd index 6180e43b99..66c9aafa52 100644 --- a/apis/r/man/SOMAExperimentAxisQuery.Rd +++ b/apis/r/man/SOMAExperimentAxisQuery.Rd @@ -338,7 +338,8 @@ Loads the query as a \code{\link[SeuratObject]{Seurat}} object obsm_layers = NULL, varm_layers = NULL, obsp_layers = NULL, - drop_levels = FALSE + drop_levels = FALSE, + version = NULL )}\if{html}{\out{}} } @@ -362,6 +363,9 @@ Loads the query as a \code{\link[SeuratObject]{Seurat}} object \item{\code{obsp_layers}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()}} \item{\code{drop_levels}}{Drop unused levels from \code{obs} and \code{var} factor columns} + +\item{\code{version}}{Assay version to read query in as; by default, will try to +infer assay type from the measurement itself} } \if{html}{\out{}} } @@ -380,7 +384,8 @@ Loads the query as a Seurat \code{\link[SeuratObject]{Assay}} obs_index = NULL, var_index = NULL, var_column_names = NULL, - drop_levels = FALSE + drop_levels = FALSE, + version = NULL )}\if{html}{\out{}} } @@ -396,6 +401,9 @@ Loads the query as a Seurat \code{\link[SeuratObject]{Assay}} \item{\code{var_column_names}}{\Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')}} \item{\code{drop_levels}}{Drop unused levels from \code{var} factor columns} + +\item{\code{version}}{Assay version to read query in as; by default, will try to +infer assay type from the measurement itself} } \if{html}{\out{}} } diff --git a/apis/r/tests/testthat/helper-test-soma-objects.R b/apis/r/tests/testthat/helper-test-soma-objects.R index 9de2d18028..2af92b3050 100644 --- a/apis/r/tests/testthat/helper-test-soma-objects.R +++ b/apis/r/tests/testthat/helper-test-soma-objects.R @@ -308,6 +308,206 @@ create_and_populate_experiment <- function( experiment } +create_and_populate_ragged_experiment <- function( + uri, + n_obs, + n_var, + X_layer_names, + obsm_layers = NULL, + varm_layers = NULL, + obsp_layer_names = NULL, + varp_layer_names = NULL, + config = NULL, + factors = FALSE, + mode = NULL, + seed = NA_integer_ +) { + + stopifnot( + "'obsm_layers' must be a named integer vector" = is.null(obsm_layers) || + (rlang::is_integerish(obsm_layers) && rlang::is_named(obsm_layers) && all(obsm_layers > 0L)), + "'varm_layers' must be a named integer vector" = is.null(varm_layers) || + (rlang::is_integerish(varm_layers) && rlang::is_named(varm_layers) && all(varm_layers > 0L)), + "'obsp_layer_names' must be a character vector" = is.null(obsp_layer_names) || + (is.character(obsp_layer_names) && all(nzchar(obsp_layer_names))), + "'varp_layer_names' must be a character vector" = is.null(varp_layer_names) || + (is.character(varp_layer_names) && all(nzchar(varp_layer_names))), + "'mode' must be 'READ' or 'WRITE'" = is.null(mode) || + (is.character(mode) && length(mode == 1L) && mode %in% c('READ', 'WRITE')), + "'seed' must be a single integer value" = is.null(seed) || + (is.integer(seed) && length(seed) == 1L) + ) + + experiment <- SOMAExperimentCreate(uri, platform_config = config) + + experiment$obs <- create_and_populate_obs( + uri = file.path(uri, "obs"), + nrows = n_obs, + factors = factors + ) + + experiment$ms <- SOMACollectionCreate(file.path(uri, "ms")) + + ms_rna <- SOMAMeasurementCreate(file.path(uri, "ms", "RNA")) + # ms_rna$set_metadata(.assay_version_hint('v5')) + ms_rna$set_metadata(list(soma_ecosystem_seurat_assay_version = 'v5')) + + ms_rna$var <- create_and_populate_var( + uri = file.path(ms_rna$uri, "var"), + nrows = n_var, + factors = factors + ) + ms_rna$X <- SOMACollectionCreate(file.path(ms_rna$uri, "X")) + + obsv <- seq.int(to = n_obs) + varv <- seq.int(to = n_var) + nd <- seq(from = 0L, to = 1L, by = 0.1) + nd <- rev(nd[nd > 0L]) + nd <- rep_len(nd, length.out = length(X_layer_names)) + + if (!is.na(seed)) { + set.seed(seed) + } + + for (i in seq_along(X_layer_names)) { + layer_name <- X_layer_names[i] + + mat <- Matrix::rsparsematrix( + nrow = ceiling(n_obs * nd[i]), + ncol = ceiling(n_var * nd[i]), + density = 0.6, + rand.x = function(n) as.integer(runif(n, min = 1, max = 100)), + repr = 'T' + ) + + ndarray <- SOMASparseNDArrayCreate( + file.path(ms_rna$X$uri, layer_name), + arrow::int32(), + shape = dim(mat) + ) + ndarray$write(mat) + if (nd[i] != 1L) { + # ndarray$set_metadata(.ragged_array_hint()) + ndarray$set_metadata(list(soma_ecosystem_seurat_v5_ragged = 'ragged')) + } + + ms_rna$X$set(ndarray, name = layer_name) + } + ms_rna$X$close() + + # Add obsm layers + if (rlang::is_integerish(obsm_layers)) { + obsm <- SOMACollectionCreate(file.path(ms_rna$uri, "obsm")) + for (layer in names(obsm_layers)) { + key <- gsub(pattern = '^dense:', replacement = '', x = layer) + shape <- c(n_obs, obsm_layers[layer]) + if (grepl(pattern = '^dense:', x = layer)) { + obsm$add_new_dense_ndarray( + key = key, + type = arrow::int32(), + shape = shape + ) + obsm$get(key)$write(create_dense_matrix_with_int_dims( + nrows = shape[1L], + ncols = shape[2L] + )) + } else { + obsm$add_new_sparse_ndarray( + key = key, + type = arrow::int32(), + shape = shape + ) + obsm$get(key)$write(create_sparse_matrix_with_int_dims( + nrows = shape[1L], + ncols = shape[2L] + )) + } + } + obsm$close() + ms_rna$add_new_collection(obsm, "obsm") + } + + # Add varm layers + if (rlang::is_integerish(varm_layers)) { + varm <- SOMACollectionCreate(file.path(ms_rna$uri, "varm")) + for (layer in names(varm_layers)) { + key <- gsub(pattern = '^dense:', replacement = '', x = layer) + shape <- c(n_var, varm_layers[layer]) + if (grepl(pattern = '^dense:', x = layer)) { + varm$add_new_dense_ndarray( + key = key, + type = arrow::int32(), + shape = shape + ) + varm$get(key)$write(create_dense_matrix_with_int_dims( + nrows = shape[1L], + ncols = shape[2L] + )) + } else { + varm$add_new_sparse_ndarray( + key = key, + type = arrow::int32(), + shape = shape + ) + varm$get(key)$write(create_sparse_matrix_with_int_dims( + nrows = shape[1L], + ncols = shape[2L] + )) + } + } + varm$close() + ms_rna$add_new_collection(varm, "varm") + } + + # Add obsp layers + if (is.character(obsp_layer_names)) { + obsp <- SOMACollectionCreate(file.path(ms_rna$uri, "obsp")) + for (layer in obsp_layer_names) { + obsp$add_new_sparse_ndarray( + key = layer, + type = arrow::int32(), + shape = c(n_obs, n_obs) + ) + obsp$get(layer)$write(create_sparse_matrix_with_int_dims( + nrows = n_obs, + ncols = n_obs + )) + } + obsp$close() + ms_rna$add_new_collection(obsp, "obsp") + } + + # Add varp layers + if (is.character(varp_layer_names)) { + varp <- SOMACollectionCreate(file.path(ms_rna$uri, "varp")) + for (layer in varp_layer_names) { + varp$add_new_sparse_ndarray( + key = layer, + type = arrow::int32(), + shape = c(n_var, n_var) + ) + varp$get(layer)$write(create_sparse_matrix_with_int_dims( + nrows = n_var, + ncols = n_var + )) + } + varp$close() + ms_rna$add_new_collection(varp, "varp") + } + + ms_rna$close() + + experiment$ms$set(ms_rna, name = "RNA") + experiment$ms$close() + + if (is.null(mode)) { + experiment$close() + } else { + experiment$reopen(mode) + } + return(experiment) +} + # Creates a SOMASparseNDArray with domains of `[0, 2^31 - 1]` and non-zero # values at `(0,0)`, `(2^31 - 2, 2^31 - 2)` and `(2^31 - 1, 2^31 - 1)`. This is # intended to test R's ability to read from arrays created with tiledbsoma-py diff --git a/apis/r/tests/testthat/test-SeuratOutgest-assay.R b/apis/r/tests/testthat/test-SeuratOutgest-assay.R index 68394fc840..8b91ce532f 100644 --- a/apis/r/tests/testthat/test-SeuratOutgest-assay.R +++ b/apis/r/tests/testthat/test-SeuratOutgest-assay.R @@ -202,6 +202,114 @@ test_that("Load assay with SeuratObject v5 returns v3 assays", { expect_equal(colnames(assay), paste0("cell", query$obs_joinids()$as_vector())) }) +test_that("Load v5 assay", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "assay-experiment-query-v5-whole") + n_obs <- 20L + n_var <- 10L + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = c("counts", "logcounts"), + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA" + ) + + # Cannot wrap in `expect_no_condition()` because testthat f*cks with + # S4 method dispatch + assay <- query$to_seurat_assay( + X_layers = c("counts", "logcounts"), + version = "v5" + ) + expect_s4_class(assay, "Assay5") + expect_equal(dim(assay), c(n_var, n_obs)) + expect_s4_class(SeuratObject::LayerData(assay, "counts"), "dgTMatrix") + expect_s4_class(SeuratObject::LayerData(assay, "logcounts"), "dgTMatrix") + expect_equal(SeuratObject::Key(assay), "rna_") + expect_equal(names(assay[[]]), query$var_df$attrnames()) + expect_equal(rownames(assay), paste0("feature", seq_len(n_var) - 1L)) + expect_equal(rownames(assay), paste0("feature", query$var_joinids()$as_vector())) + expect_equal(colnames(assay), paste0("cell", seq_len(n_obs) - 1L)) + expect_equal(colnames(assay), paste0("cell", query$obs_joinids()$as_vector())) + + # Test no counts + expect_no_condition(nocounts <- query$to_seurat_assay("logcounts", version = "v5")) + expect_false("counts" %in% SeuratObject::Layers(nocounts)) + + # Test modifying feature-level meta data + expect_no_condition(nomf <- query$to_seurat_assay( + "counts", + var_column_names = FALSE, + version = "v5" + )) + expect_equal(dim(nomf[[]]), c(n_var, 0L)) + expect_no_condition(nomf2 <- query$to_seurat_assay( + "counts", + var_column_names = NA, + version = "v5" + )) + expect_equal(dim(nomf2[[]]), c(n_var, 0L)) + + # Test using cell and feature names + expect_no_condition(named <- query$to_seurat_assay( + obs_index = "baz", + var_index = "quux", + version = "v5" + )) + expect_identical( + colnames(named), + query$obs("baz")$concat()$GetColumnByName("baz")$as_vector() + ) + expect_identical( + rownames(named), + query$var("quux")$concat()$GetColumnByName("quux")$as_vector() + ) + + # Test `X_layers` assertions + expect_error(query$to_seurat_assay("tomato", version = "v5")) +}) + +test_that("Load v5 ragged assay", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "assay-experiment-query-v5-whole") + n_obs <- 20L + n_var <- 10L + layers <- c("mat", "matA", "matB", "matC") + experiment <- create_and_populate_ragged_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = layers, + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA" + ) + + assay <- query$to_seurat_assay(layers, version = "v5") + expect_s4_class(assay, "Assay5") + expect_equal(dim(assay), c(n_var, n_obs)) + + nd <- seq(from = 0L, to = 1L, by = 0.1) + nd <- rev(nd[nd > 0L]) + nd <- rep_len(nd, length.out = length(layers)) + for (i in seq_along(layers)) { + expect_s4_class(mat <- SeuratObject::LayerData(assay, layers[i]), "dgTMatrix") + expect_equal(dim(mat), ceiling(c(n_var, n_obs) * nd[i]), info = layers[i]) + } +}) + test_that("Load assay from sliced ExperimentQuery", { skip_if(!extended_tests()) skip_if_not_installed("SeuratObject", .MINIMUM_SEURAT_VERSION("c")) @@ -247,6 +355,99 @@ test_that("Load assay from sliced ExperimentQuery", { ) }) +test_that("Load v5 assay from sliced ExperimentQuery", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "assay-experiment-query-v5-sliced") + n_obs <- 1001L + n_var <- 99L + obs_slice <- bit64::as.integer64(seq(3, 72)) + var_slice <- bit64::as.integer64(seq(7, 21)) + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = "counts", + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(coords = list(soma_joinid = obs_slice)), + var_query = SOMAAxisQuery$new(coords = list(soma_joinid = var_slice)) + ) + + assay <- query$to_seurat_assay("counts", version = "v5") + expect_s4_class(assay, "Assay5") + expect_equal(dim(assay), c(length(var_slice), length(obs_slice))) + expect_identical(rownames(assay), paste0("feature", query$var_joinids()$as_vector())) + expect_identical(colnames(assay), paste0("cell", query$obs_joinids()$as_vector())) + + # Test named + expect_no_condition(named <- query$to_seurat_assay( + "counts", + obs_index = "baz", + var_index = "quux", + version = "v5" + )) + expect_identical( + rownames(named), + query$var("quux")$concat()$GetColumnByName("quux")$as_vector() + ) + expect_identical( + colnames(named), + query$obs("baz")$concat()$GetColumnByName("baz")$as_vector() + ) +}) + +test_that("Load v5 ragged assay from sliced ExperimentQuery", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "assay-experiment-query-ragged-sliced") + n_obs <- 1001L + n_var <- 99L + obs_slice <- bit64::as.integer64(seq.int(799L, 999L)) + var_slice <- bit64::as.integer64(seq.int(75L, 95L)) + layers <- c("mat", "matA", "matB", "matC") + experiment <- create_and_populate_ragged_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = layers, + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(coords = list(soma_joinid = obs_slice)), + var_query = SOMAAxisQuery$new(coords = list(soma_joinid = var_slice)) + ) + expect_warning( + assay <- query$to_seurat_assay(layers, version = "v5"), + class = "unqueryableLayerWarning" + ) + expect_s4_class(assay, "Assay5") + expect_setequal( + setdiff(layers, "matC"), + SeuratObject::Layers(assay) + ) + expect_equal(dim(assay), sapply(list(var_slice, obs_slice), FUN = length)) + expect_identical( + rownames(assay), + paste0("feature", as.integer(var_slice)) + ) + expect_identical( + colnames(assay), + paste0("cell", as.integer(obs_slice)) + ) +}) + test_that("Load assay from indexed ExperimentQuery", { skip_if(!extended_tests()) skip_if_not_installed("SeuratObject", .MINIMUM_SEURAT_VERSION("c")) @@ -303,3 +504,102 @@ test_that("Load assay from indexed ExperimentQuery", { ) expect_identical(colnames(named), obs_label_values) }) + +test_that("Load v5 assay from indexed ExperimentQuery", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "soma-experiment-query-v5-filters") + n_obs <- 1001L + n_var <- 99L + obs_label_values <- c("1003", "1007", "1038", "1099") + var_label_values <- c("1018", "1034", "1067") + experiment <- create_and_populate_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = "counts", + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + obs_value_filter <- paste0( + sprintf("baz == '%s'", obs_label_values), + collapse = "||" + ) + var_value_filter <- paste0( + sprintf("quux == '%s'", var_label_values), + collapse = "||" + ) + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(value_filter = obs_value_filter), + var_query = SOMAAxisQuery$new(value_filter = var_value_filter) + ) + assay <- query$to_seurat_assay("counts", version = "v5") + expect_s4_class(assay, "Assay5") + expect_equal( + dim(assay), + c(length(var_label_values), length(obs_label_values)) + ) + expect_identical(rownames(assay), paste0("feature", query$var_joinids()$as_vector())) + expect_identical(colnames(assay), paste0("cell", query$obs_joinids()$as_vector())) + + # Test named + expect_no_condition(named <- query$to_seurat_assay( + "counts", + obs_index = "baz", + var_index = "quux", + version = "v5" + )) + expect_identical( + rownames(named), + query$var("quux")$concat()$GetColumnByName("quux")$as_vector() + ) + expect_identical(rownames(named), var_label_values) + expect_identical( + colnames(named), + query$obs("baz")$concat()$GetColumnByName("baz")$as_vector() + ) + expect_identical(colnames(named), obs_label_values) +}) + +test_that("Load v5 ragged assay from indexed ExperimentQuery", { + skip_if(!extended_tests()) + skip_if_not_installed("SeuratObject", minimum_version = "5.0.2") + + uri <- tempfile(pattern = "soma-experiment-query-ragged-filters") + n_obs <- 1001L + n_var <- 99L + obs_label_values <- c("1003", "1007", "1038", "1099") + var_label_values <- c("1018", "1034", "1067") + layers <- c("mat", "matA", "matB", "matC") + experiment <- create_and_populate_ragged_experiment( + uri = uri, + n_obs = n_obs, + n_var = n_var, + X_layer_names = layers, + mode = "READ" + ) + on.exit(experiment$close(), add = TRUE, after = FALSE) + + obs_value_filter <- paste0( + sprintf("baz == '%s'", obs_label_values), + collapse = "||" + ) + var_value_filter <- paste0( + sprintf("quux == '%s'", var_label_values), + collapse = "||" + ) + query <- SOMAExperimentAxisQuery$new( + experiment = experiment, + measurement_name = "RNA", + obs_query = SOMAAxisQuery$new(value_filter = obs_value_filter), + var_query = SOMAAxisQuery$new(value_filter = var_value_filter) + ) + + assay <- query$to_seurat_assay(layers, version = "v5") + expect_s4_class(assay, "Assay5") + expect_setequal(layers, SeuratObject::Layers(assay)) +})