Skip to content

Commit

Permalink
[r] Add support for reading v5 assays from an axis query
Browse files Browse the repository at this point in the history
Seurat v5 adds support for ragged arrays, where not every `X` layer has
exactly the same cells and features. To handle this, ragged `X` layers
need to be re-indexed and re-shaped on ingestion to resize down to only
the data present

Modified SOMA methods:
 - `SOMAExperimentAxisQuery$to_seurat()` and
   `SOMAExperimentAxisQuery$to_seurat_assay()`: now read in as v5 assays

New SOMA methods:
 - `SOMAExperimentAxisQuery$private$.to_seurat_assay_v5()`: helper
   method to read in ragged and non-ragged arrays into a v5 assay; note
   this method only handles expression layers, all other assay-level
   information is handled by parent `$to_seurat_assay()` to share code
   with v3 assay outgestion

Requires #2523 and #3007

[SC-52261](https://app.shortcut.com/tiledb-inc/story/52261/)
  • Loading branch information
mojaveazure committed Nov 5, 2024
1 parent 762abda commit 575c12b
Show file tree
Hide file tree
Showing 4 changed files with 739 additions and 18 deletions.
245 changes: 229 additions & 16 deletions apis/r/R/SOMAExperimentAxisQuery.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,19 @@ SOMAExperimentAxisQuery <- R6::R6Class(
)

# TODO: Stop converting to vectors when SOMAArrayReader supports arrow arrays
x_layer$read(coords = list(
self$obs_joinids()$as_vector(),
self$var_joinids()$as_vector()
))
coords <- list(
soma_dim_0 = self$obs_joinids()$as_vector(),
soma_dim_1 = self$var_joinids()$as_vector()
)
# Handle ragged arrays
shape <- tryCatch(
x_layer$maxshape(),
error = function(...) x_layer$shape()
)
for (i in seq_along(along.with = coords)) {
coords[[i]] <- coords[[i]][coords[[i]] < shape[i]]
}
return(x_layer$read(coords))
},

#' @description Retrieves an `obsm` layer as a \code{\link{SOMASparseNDArrayRead}}
Expand Down Expand Up @@ -245,7 +254,6 @@ SOMAExperimentAxisQuery <- R6::R6Class(
return(varp_layer$read(coords = list(var_ids, var_ids)))
},


#' @description Reads the entire query result as a list of
#' [`arrow::Table`]s. This is a low-level routine intended to be used by
#' loaders for other in-core formats, such as `Seurat`, which can be created
Expand Down Expand Up @@ -516,6 +524,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
#' @param varm_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_mlayers(axis = 'varm')}
#' @param obsp_layers \Sexpr[results=rd]{tiledbsoma:::rd_outgest_players()}
#' @param drop_levels Drop unused levels from \code{obs} and \code{var} factor columns
#' @param version Assay version to read query in as; by default, will try to
#' infer assay type from the measurement itself
#'
#' @return A \code{\link[SeuratObject]{Seurat}} object
#'
Expand All @@ -528,7 +538,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
obsm_layers = NULL,
varm_layers = NULL,
obsp_layers = NULL,
drop_levels = FALSE
drop_levels = FALSE,
version = NULL
) {
check_package("SeuratObject", version = .MINIMUM_SEURAT_VERSION())
op <- options(Seurat.object.assay.version = "v3")
Expand Down Expand Up @@ -572,7 +583,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
X_layers = X_layers,
obs_index = obs_index,
var_index = var_index,
var_column_names = var_column_names
var_column_names = var_column_names,
version = version
)
op <- options(Seurat.object.assay.calcn = FALSE)
on.exit(options(op), add = TRUE, after = FALSE)
Expand Down Expand Up @@ -739,6 +751,8 @@ SOMAExperimentAxisQuery <- R6::R6Class(
#' @param var_index \Sexpr[results=rd]{tiledbsoma:::rd_outgest_index(axis = 'var')}
#' @param var_column_names \Sexpr[results=rd]{tiledbsoma:::rd_outgest_metadata_names(axis = 'var')}
#' @param drop_levels Drop unused levels from \code{var} factor columns
#' @param version Assay version to read query in as; by default, will try to
#' infer assay type from the measurement itself
#'
#' @return An \code{\link[SeuratObject]{Assay}} object
#'
Expand All @@ -747,16 +761,13 @@ SOMAExperimentAxisQuery <- R6::R6Class(
obs_index = NULL,
var_index = NULL,
var_column_names = NULL,
drop_levels = FALSE
drop_levels = FALSE,
version = NULL
) {
version <- "v3"
check_package("SeuratObject", version = .MINIMUM_SEURAT_VERSION())
op <- options(Seurat.object.assay.version = "v3")
on.exit(options(op), add = TRUE)
check_package('SeuratObject', version = .MINIMUM_SEURAT_VERSION())
stopifnot(
"'X_layers' must be a named character vector" = is.character(X_layers) &&
is_named(X_layers, allow_empty = FALSE),
"'version' must be a single character value" = is_scalar_character(version),
"'version' must be a single character value" = is.null(version) ||
is_scalar_character(version),
"'obs_index' must be a single character value" = is.null(obs_index) ||
(is_scalar_character(obs_index) && !is.na(obs_index)),
"'var_index' must be a single character value" = is.null(var_index) ||
Expand All @@ -767,7 +778,25 @@ SOMAExperimentAxisQuery <- R6::R6Class(
"'drop_levels' must be TRUE or FALSE" = isTRUE(drop_levels) ||
isFALSE(drop_levels)
)
match.arg(version, choices = "v3")
# assay_hint <- names(.assay_version_hint())
assay_hint <- 'soma_ecosystem_seurat_assay_version'
# Get the assay version
version <- version %||%
# self$ms$get_metadata(names(.assay_version_hint())) %||%
self$ms$get_metadata(assay_hint) %||%
'v3'
match.arg(version, choices = c('v3', 'v5'))
op <- options(Seurat.object.assay.version = version)
on.exit(options(op), add = TRUE)
# Check our X_layers
if (version == 'v3') {
stopifnot(
"'X_layers' must be a named character vector" = is.character(X_layers) &&
is_named(X_layers, allow_empty = FALSE)
)
} else {
stopifnot("'X_layers' must be a character vector" = is.character(X_layers))
}
features <- if (is.null(var_index)) {
paste0("feature", self$var_joinids()$as_vector())
} else {
Expand Down Expand Up @@ -804,6 +833,22 @@ SOMAExperimentAxisQuery <- R6::R6Class(
cells = cells,
features = features
)
},
v5 = {
# cells_hint <- .assay_obs_hint(private$.measurement_name)
cells_hint <- sprintf(
"soma_ecosystem_seurat_assay_cells_%s",
private$.measurement_name
)
if (cells_hint %in% private$.experiment$obs$colnames()) {
cells_idx <- private$.load_df('obs', column_names = cells_hint)[[cells_hint]]
cells <- cells[cells_idx]
}
private$.to_seurat_assay_v5(
layers = X_layers,
cells = cells,
features = features
)
}
)
# Set the key
Expand Down Expand Up @@ -1593,6 +1638,174 @@ SOMAExperimentAxisQuery <- R6::R6Class(
}
# Return the assay
validObject(obj)
return(obj)
},
.to_seurat_assay_v5 = function(layers, cells, features) {
check_package('SeuratObject', version = '5.0.2')

# Create dummy layer to initialize v5 assay
lname <- SeuratObject::RandomName(length = 7L)
i <- 0L
while (lname %in% layers) {
lname <- SeuratObject::RandomName(length = 7L + i)
i <- i + 1L
}

# Get our metadata hints
# ragged_hint <- .ragged_array_hint()
ragged_hint <- list(soma_ecosystem_seurat_v5_ragged = 'ragged')
# default_hint <- names(.layer_hint(lname))
default_hint <- names(list(soma_ecosystem_seurat_v5_default_layers = lname))
# type_hint <- names(.type_hint(NULL))
r_type_hint <- names(list(soma_r_type_hint = NULL))
s4_type <- paste0('^', .standard_regexps()$valid_package_name, ':')

# Check arguments
stopifnot(
"'layers' must be a character vector" = is.character(x = layers),
"'cells' must be a character vector" = is.character(cells),
"'features' must be a character vector" = is.character(features)
)

# Check our dimnames
if (length(cells) > self$n_obs) {
stop(
"'cells' must have a length less than or equal to ",
self$n_obs,
call. = FALSE
)
}
if (length(features) > self$n_vars) {
stop(
"'features' must have a length less than or equal to ",
self$n_vars,
call. = FALSE
)
}
dnames <- list(features, cells)

# Find the default layers
default_layers <- self$ms$X$get_metadata(default_hint)
if (!is.null(default_layers) && grepl(pattern = '^[', x = default_layers)) {
check_package('jsonlite')
default_layers <- jsonlite::fromJSON(default_layers)
}

# Initialize our dummy layer
counts <- list(Matrix::Matrix(
data = 0L,
nrow = length(features),
ncol = length(cells),
sparse = TRUE
))
names(counts) <- lname
obj <- SeuratObject::.CreateStdAssay(
counts = counts,
cells = cells,
features = features,
transpose = FALSE
)

# Read in layers
for (lyr in layers) {
lyr_hint <- self$ms$X$get(lyr)$get_metadata(names(ragged_hint))
type_hint <- self$ms$X$get(lyr)$get_metadata(r_type_hint)
dnames <- self$ms$X$get(lyr)$dimnames()
attrn <- self$ms$X$get(lyr)$attrnames()
pkg <- NULL
if (!is.null(type_hint)) {
if (grepl(pattern = '^[', x = type_hint)) {
if (!requireNamespace('jsonlite', quietly = TRUE)) {
warning(warningCondition(
sprintf(
"Layer '%s' is typed as '%s', but package 'jsonlite' is unavailable",
lyr,
type_hint
),
class = "packageNotFoundWarning"
))
next
}
type_hint <- jsonlite::fromJSON(type_hint)
} else if (grepl(pattern = s4_type, x = type_hint)) {
pkg <- strsplit(type_hint, split = ':')[[1L]][1L]
type_hint <- strsplit(type_hint, split = ':')[[1L]][2L]
if (!requireNamespace(pkg, quietly = TRUE)) {
warning(warningCondition(
sprintf(
"Layer '%s' is typed as '%s:%s', but package '%s' is unavailable",
lyr,
pkg,
type_hint,
pkg
),
class = "packageNotFoundWarning"
))
next
}
}
}
lcells <- cells
lfeatures <- features
if (is.null(lyr_hint) || lyr_hint != ragged_hint[[1L]]) {
mat <- Matrix::t(self$to_sparse_matrix(
collection = 'X',
layer_name = lyr
))
} else {
tbl <- tryCatch(
self$X(lyr)$tables()$concat(),
error = function(...) {
warning(warningCondition(
sprintf("Layer '%s' falls outside the query condition, skipping...", lyr),
class = "unqueryableLayerWarning"
))
return(NULL)
}
)
if (is.null(tbl)) {
next
}
sdx <- vector("list", length = length(dnames))
names(sdx) <- dnames
for (i in dnames) {
sdx[[i]] <- sort(unique(tbl[[i]]$as_vector()))
tbl[[i]] <- match(tbl[[i]]$as_vector(), sdx[[i]]) - 1L
}
lcells <- lcells[match(sdx[[dnames[1L]]], self$obs_joinids()$as_vector())]
lfeatures <- lfeatures[match(sdx[[dnames[2L]]], self$var_joinids()$as_vector())]
mat <- Matrix::t(Matrix::sparseMatrix(
i = tbl[[dnames[1L]]]$as_vector(),
j = tbl[[dnames[2L]]]$as_vector(),
x = tbl[[attrn[1L]]]$as_vector(),
index1 = FALSE,
repr = "T"
))
}
if (!is.null(type_hint)) {
mat <- suppressWarnings(methods::as(mat, type_hint))
}
SeuratObject::LayerData(
obj,
layer = lyr,
features = lfeatures,
cells = lcells
) <- mat
}

# Remove dummy layer
if (length(SeuratObject::Layers(obj)) == 1L) {
stop(errorCondition(
"None of the requested layers were queryable",
class = "unqueryableLayerError"
))
}
SeuratObject::DefaultLayer(obj) <- default_layers %||% setdiff(
SeuratObject::Layers(obj),
lname
)[1L]
SeuratObject::LayerData(obj, layer = lname) <- NULL

return(obj)
}
)
Expand Down
12 changes: 10 additions & 2 deletions apis/r/man/SOMAExperimentAxisQuery.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 575c12b

Please sign in to comment.