diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 6869a9df..98ce9e7d 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -ONTOLOGY_PARSER = OntologyParser(schema_version=f"v{schema.get_current_schema_version()}") +ONTOLOGY_PARSER = OntologyParser(schema_version="v5.3.0") ASSAY_VISIUM = "EFO:0010961" ASSAY_SLIDE_SEQV2 = "EFO:0030062" @@ -29,7 +29,7 @@ VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000 -ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True" +ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True" ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0" @@ -1475,12 +1475,12 @@ def _validate_spatial_cell_type_ontology_term_id(self): # Exit if: # - not Visium and is_single is True as no further checks are necessary # - in_tissue is not specified as checks are dependent on this value - if not self._is_visium_and_is_single_true() or "in_tissue" not in self.adata.obs: + if not self._is_visium_including_descendants() and self._is_single() or "in_tissue" not in self.adata.obs: return # Validate cell type: must be "unknown" if Visium and is_single is True and in_tissue is 0. if ( - (self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM) + self._is_visium_including_descendants() & (self.adata.obs["in_tissue"] == 0) & (self.adata.obs["cell_type_ontology_term_id"] != "unknown") ).any(): @@ -1760,6 +1760,37 @@ def _is_visium(self) -> bool: self.is_visium = assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any() return self.is_visium + def _is_visium_including_descendants(self) -> bool: + """ + Determine if the assay_ontology_term_id is Visium (descendant of EFO:0010961). + + :return True if assay_ontology_term_id is Visium, False otherwise. + :rtype bool + """ + if self.is_visium is None: + assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id") + + if assay_ontology_term_id is not None: + # Convert to a regular Series if it's Categorical + assay_ontology_term_id = pd.Series(assay_ontology_term_id) + + # Check if any term is a descendant of ASSAY_VISIUM + try: + visium_results = assay_ontology_term_id.apply( + lambda term: ASSAY_VISIUM + in list(ONTOLOGY_PARSER.get_lowest_common_ancestors(ASSAY_VISIUM, term)) + ) + self.is_visium = visium_results.astype(bool).any() + except KeyError as e: + # This generally means the assay_ontology_term_id is invalid, but we want the error to be raised + # by our explicit validator checks, not this implicit one. + logger.warning(f"KeyError processing assay_ontology_term_id ontology: {e}") + self.is_visium = False + else: + self.is_visium = False + + return self.is_visium + def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None): """ Validate the spatial image is of shape (,,3 or 4) and has a max dimension, if specified. A spatial image diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 819cde43..801bc7cc 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -333,6 +333,31 @@ def test__validate_with_h5ad_invalid_and_without_labels(self): class TestCheckSpatial: + @pytest.mark.parametrize( + "assay_ontology_term_id, expected_is_visium", + [ + # Parent term for Visium Spatial Gene Expression. This term and all its descendants are Visium + ("EFO:0010961", True), + # Visium Spatial Gene Expression V1 + ("EFO:0022857", True), + # Visium CytAssist Spatial Gene Expression V2 + ("EFO:0022858", True), + # Visium CytAssist Spatial Gene Expression, 11mm + ("EFO:0022860", True), + # Visium CytAssist Spatial Gene Expression, 6.5mm + ("EFO:0022859", True), + # Random other EFO term + ("EFO:0003740", False), + ], + ) + def test__is_visium_descendant(self, assay_ontology_term_id, expected_is_visium): + validator: Validator = Validator() + validator._set_schema_def() + validator.adata = adata_visium.copy() + validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id + + assert validator._is_visium_including_descendants() == expected_is_visium + def test__validate_spatial_visium_ok(self): validator: Validator = Validator() validator._set_schema_def() @@ -958,33 +983,54 @@ def test__validate_tissue_position_int_max_error(self, tissue_position_name, max assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0] @pytest.mark.parametrize( - "cell_type_ontology_term_id, in_tissue", - [("unknown", 0), (["unknown", "CL:0000066"], [0, 1]), ("CL:0000066", 1)], + "cell_type_ontology_term_id, in_tissue, assay_ontology_term_id", + [ + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression + ("unknown", 0, "EFO:0010961"), + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm + ("unknown", 0, "EFO:0022860"), + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1 + # valid CL term is ok when in_tissue = 1 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm + (["unknown", "CL:0000066"], [0, 1], ["EFO:0022857", "EFO:0022860"]), + # normal CL term for in_tissue = 1 and assay_ontology_term_id = 10x 3' v2 + ("CL:0000066", 1, "EFO:0009899"), + ], ) - def test__validate_cell_type_ontology_term_id_ok(self, cell_type_ontology_term_id, in_tissue): + def test__validate_cell_type_ontology_term_id_ok( + self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id + ): validator: Validator = Validator() validator._set_schema_def() validator.adata = adata_visium.copy() validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id validator.adata.obs.in_tissue = in_tissue + validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id # Confirm cell type is valid. validator._validate_spatial_cell_type_ontology_term_id() assert not validator.errors @pytest.mark.parametrize( - "cell_type_ontology_term_id, in_tissue", + "cell_type_ontology_term_id, in_tissue, assay_ontology_term_id", [ - ("CL:0000066", 0), - (["CL:0000066", "unknown"], [0, 1]), + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression + ("CL:0000066", 0, "EFO:0010961"), + (["CL:0000066", "unknown"], [0, 1], ["EFO:0010961", "EFO:0010961"]), + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm + ("CL:0000066", 0, "EFO:0022860"), + # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1 + ("CL:0000066", 0, "EFO:0022857"), ], ) - def test__validate_cell_type_ontology_term_id_error(self, cell_type_ontology_term_id, in_tissue): + def test__validate_cell_type_ontology_term_id_error( + self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id + ): validator: Validator = Validator() validator._set_schema_def() validator.adata = adata_visium.copy() validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id validator.adata.obs.in_tissue = in_tissue + validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id # Confirm errors. validator._validate_spatial_cell_type_ontology_term_id()