Skip to content

Commit

Permalink
chore: update logging to log by donor id instead (#1150)
Browse files Browse the repository at this point in the history
  • Loading branch information
joyceyan authored Dec 10, 2024
1 parent 6de64f6 commit a6c9086
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
9 changes: 6 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,9 +554,10 @@ def is_valid_row(row):
invalid_rows = ~self.adata.obs.apply(is_valid_row, axis=1)

if invalid_rows.any():
invalid_indices = self.adata.obs.index[invalid_rows].tolist()
donor_ids = self.adata.obs[donor_id_column].tolist()
unique_donor_ids = list(set(donor_ids))
self.errors.append(
f"obs rows with indices {invalid_indices} have invalid genetic_ancestry_* values. All "
f"obs rows with donor ids {unique_donor_ids} have invalid genetic_ancestry_* values. All "
f"observations with the same donor_id must contain the same genetic_ancestry_* values. If "
f"organism_ontolology_term_id is NOT 'NCBITaxon:9606' for Homo sapiens, then all genetic"
f"ancestry values MUST be float('nan'). If organism_ontolology_term_id is 'NCBITaxon:9606' "
Expand Down Expand Up @@ -959,7 +960,6 @@ def _validate_dataframe(self, df_name: str):
f"Column '{column_name}' in dataframe '{df_name}' contains a category '{category}' with "
f"zero observations. These categories will be removed when `--add-labels` flag is present."
)
self._validate_genetic_ancestry()
categorical_types = {type(x) for x in column.dtype.categories.values}
# Check for columns that have illegal categories, which are not supported by anndata 0.8.0
# TODO: check if this can be removed after upgading to anndata 0.10.0
Expand Down Expand Up @@ -2058,6 +2058,9 @@ def _deep_check(self):
# Checks spatial
self._check_spatial()

# Validate genetic ancestry
self._validate_genetic_ancestry()

# Checks each component
for component_name, component_def in self.schema_def["components"].items():
logger.debug(f"Validating component: {component_name}")
Expand Down
2 changes: 1 addition & 1 deletion cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -1740,7 +1740,7 @@ def test_genetic_ancestry_same_donor_id(self, validator_with_adata):
validator.adata.obs["genetic_ancestry_South_Asian"] = [0.0, 0.0]
validator.reset(None, 2)
validator.validate_adata()
assert len(validator.errors) > 0
assert len(validator.errors) == 1

# Change the donor id back to two different donor id's. Now, this should pass validation
validator.adata.obs["donor_id"] = original_donor_id_column
Expand Down

0 comments on commit a6c9086

Please sign in to comment.