Skip to content

Commit

Permalink
feat: update "is visium" definition for cell_type_ontology_term_id (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
joyceyan authored Nov 19, 2024
1 parent bb662b5 commit 34e76db
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 11 deletions.
39 changes: 35 additions & 4 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@

logger = logging.getLogger(__name__)

ONTOLOGY_PARSER = OntologyParser(schema_version=f"v{schema.get_current_schema_version()}")
ONTOLOGY_PARSER = OntologyParser(schema_version="v5.3.0")

ASSAY_VISIUM = "EFO:0010961"
ASSAY_SLIDE_SEQV2 = "EFO:0030062"

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000

ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
Expand Down Expand Up @@ -1475,12 +1475,12 @@ def _validate_spatial_cell_type_ontology_term_id(self):
# Exit if:
# - not Visium and is_single is True as no further checks are necessary
# - in_tissue is not specified as checks are dependent on this value
if not self._is_visium_and_is_single_true() or "in_tissue" not in self.adata.obs:
if not self._is_visium_including_descendants() and self._is_single() or "in_tissue" not in self.adata.obs:
return

# Validate cell type: must be "unknown" if Visium and is_single is True and in_tissue is 0.
if (
(self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM)
self._is_visium_including_descendants()
& (self.adata.obs["in_tissue"] == 0)
& (self.adata.obs["cell_type_ontology_term_id"] != "unknown")
).any():
Expand Down Expand Up @@ -1760,6 +1760,37 @@ def _is_visium(self) -> bool:
self.is_visium = assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any()
return self.is_visium

def _is_visium_including_descendants(self) -> bool:
"""
Determine if the assay_ontology_term_id is Visium (descendant of EFO:0010961).
:return True if assay_ontology_term_id is Visium, False otherwise.
:rtype bool
"""
if self.is_visium is None:
assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id")

if assay_ontology_term_id is not None:
# Convert to a regular Series if it's Categorical
assay_ontology_term_id = pd.Series(assay_ontology_term_id)

# Check if any term is a descendant of ASSAY_VISIUM
try:
visium_results = assay_ontology_term_id.apply(
lambda term: ASSAY_VISIUM
in list(ONTOLOGY_PARSER.get_lowest_common_ancestors(ASSAY_VISIUM, term))
)
self.is_visium = visium_results.astype(bool).any()
except KeyError as e:
# This generally means the assay_ontology_term_id is invalid, but we want the error to be raised
# by our explicit validator checks, not this implicit one.
logger.warning(f"KeyError processing assay_ontology_term_id ontology: {e}")
self.is_visium = False
else:
self.is_visium = False

return self.is_visium

def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
"""
Validate the spatial image is of shape (,,3 or 4) and has a max dimension, if specified. A spatial image
Expand Down
60 changes: 53 additions & 7 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,31 @@ def test__validate_with_h5ad_invalid_and_without_labels(self):


class TestCheckSpatial:
@pytest.mark.parametrize(
"assay_ontology_term_id, expected_is_visium",
[
# Parent term for Visium Spatial Gene Expression. This term and all its descendants are Visium
("EFO:0010961", True),
# Visium Spatial Gene Expression V1
("EFO:0022857", True),
# Visium CytAssist Spatial Gene Expression V2
("EFO:0022858", True),
# Visium CytAssist Spatial Gene Expression, 11mm
("EFO:0022860", True),
# Visium CytAssist Spatial Gene Expression, 6.5mm
("EFO:0022859", True),
# Random other EFO term
("EFO:0003740", False),
],
)
def test__is_visium_descendant(self, assay_ontology_term_id, expected_is_visium):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id

assert validator._is_visium_including_descendants() == expected_is_visium

def test__validate_spatial_visium_ok(self):
validator: Validator = Validator()
validator._set_schema_def()
Expand Down Expand Up @@ -958,33 +983,54 @@ def test__validate_tissue_position_int_max_error(self, tissue_position_name, max
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue",
[("unknown", 0), (["unknown", "CL:0000066"], [0, 1]), ("CL:0000066", 1)],
"cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
[
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression
("unknown", 0, "EFO:0010961"),
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
("unknown", 0, "EFO:0022860"),
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1
# valid CL term is ok when in_tissue = 1 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
(["unknown", "CL:0000066"], [0, 1], ["EFO:0022857", "EFO:0022860"]),
# normal CL term for in_tissue = 1 and assay_ontology_term_id = 10x 3' v2
("CL:0000066", 1, "EFO:0009899"),
],
)
def test__validate_cell_type_ontology_term_id_ok(self, cell_type_ontology_term_id, in_tissue):
def test__validate_cell_type_ontology_term_id_ok(
self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id
):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
validator.adata.obs.in_tissue = in_tissue
validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id

# Confirm cell type is valid.
validator._validate_spatial_cell_type_ontology_term_id()
assert not validator.errors

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue",
"cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
[
("CL:0000066", 0),
(["CL:0000066", "unknown"], [0, 1]),
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression
("CL:0000066", 0, "EFO:0010961"),
(["CL:0000066", "unknown"], [0, 1], ["EFO:0010961", "EFO:0010961"]),
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
("CL:0000066", 0, "EFO:0022860"),
# MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1
("CL:0000066", 0, "EFO:0022857"),
],
)
def test__validate_cell_type_ontology_term_id_error(self, cell_type_ontology_term_id, in_tissue):
def test__validate_cell_type_ontology_term_id_error(
self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id
):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
validator.adata.obs.in_tissue = in_tissue
validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id

# Confirm errors.
validator._validate_spatial_cell_type_ontology_term_id()
Expand Down

0 comments on commit 34e76db

Please sign in to comment.