Skip to content

Commit

Permalink
fix(4.0): Remove use of subtest in unit tests (#652)
Browse files Browse the repository at this point in the history
- Move libraries required for testing into requirements-dev.txt. Use this to install requirements in GHA. 
- Remove test libraries from requirements.txt. These should not be in the release.
- Remove all uses of subTest in test cases. Replace subtests with parameterized, and removed unittest.TestCase as needed.
- No functional changes were made to the tests. Only how they are run.
- remove all uses of unittest.TestCase
- add reset and adata setter to Validator
  • Loading branch information
Bento007 authored Oct 2, 2023
1 parent 9de266d commit 63dbc3b
Show file tree
Hide file tree
Showing 10 changed files with 1,548 additions and 1,769 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/push_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ jobs:
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install -r cellxgene_schema_cli/requirements.txt
pip install pytest coverage
pip install -r ./cellxgene_schema_cli/requirements.txt
pip install -r ./requirements-dev.txt
- name: cellxgene_schema_cli Unit tests
run: make unit-test
- name: Upload coverage results as an artifact
Expand Down Expand Up @@ -71,7 +71,7 @@ jobs:
- name: Install dependencies
run: |
pip install -r scripts/migration_assistant/requirements.txt
pip install pytest coverage
pip install -r ./requirements-dev.txt
- name: migration_assistant Unit tests
run: make migration-assistant-tests
- name: Upload coverage results as an artifact
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
- name: Install dependencies
run: |
pip install -r scripts/schema_bump_dry_run_ontologies/requirements.txt
pip install pytest coverage
pip install -r ./requirements-dev.txt
- name: Ontology Dry Run Unit Tests
run: make ontology-dry-run-tests
- name: Upload coverage results as an artifact
Expand Down Expand Up @@ -133,7 +133,7 @@ jobs:
- name: Install dependencies
run: |
pip install -r scripts/schema_bump_dry_run_genes/requirements.txt
pip install pytest coverage
pip install -r ./requirements-dev.txt
- name: Gene Dry Run Unit Tests
run: make gene-dry-run-tests
- name: Upload coverage results as an artifact
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ COVERAGE_DATA_FILE=.coverage.$(shell git rev-parse --short HEAD)
export COVERAGE_RUN_ARGS:=--data-file=$(COVERAGE_DATA_FILE) --parallel-mode

unit-test:
cd cellxgene_schema_cli && coverage run $(COVERAGE_RUN_ARGS) --source=cellxgene_schema -m pytest --log-level=INFO \
./tests
cd cellxgene_schema_cli \
&& coverage run $(COVERAGE_RUN_ARGS) --source=cellxgene_schema -m pytest --log-level=INFO ./tests
mv ./cellxgene_schema_cli/$(COVERAGE_DATA_FILE)* ./$(COVERAGE_DATA_FILE).cellxgene_schema_cli

ontology-dry-run-tests:
Expand Down
29 changes: 18 additions & 11 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,34 @@ class Validator:
"""Handles validation of AnnData"""

def __init__(self, ignore_labels=False):
# Set initial state
self.errors = []
self.warnings = []
self.is_valid = False
self.adata = anndata.AnnData()
self.schema_def = dict()
self.schema_version: str = None
self.h5ad_path = ""
self.invalid_feature_ids = []
self._raw_layer_exists = None
self.is_seurat_convertible: bool = True
self.ignore_labels = ignore_labels

# Values will be instances of ontology.GeneChecker,
# keys will be one of ontology.SupportedOrganisms
self.gene_checkers = dict()

def reset(self):
self.errors = []
self.warnings = []
self.is_valid = False
self.h5ad_path = ""
self._raw_layer_exists = None
self.is_seurat_convertible: bool = True

# Matrix (e.g., X, raw.X, ...) number non-zero cache
self.number_non_zero = dict()

@property
def adata(self) -> anndata.AnnData:
return self._adata

@adata.setter
def adata(self, adata: anndata.AnnData):
self.reset()
self._adata = adata

def _validate_encoding_version(self):
import h5py

Expand Down Expand Up @@ -290,7 +298,6 @@ def _validate_feature_id(self, feature_id: str, df_name: str):
self.gene_checkers[organism] = ontology.GeneChecker(organism)

if not self.gene_checkers[organism].is_valid_id(feature_id):
self.invalid_feature_ids.append(feature_id)
self.errors.append(f"'{feature_id}' is not a valid feature ID in '{df_name}'.")

return
Expand Down Expand Up @@ -1379,7 +1386,7 @@ def validate_adata(self, h5ad_path: Union[str, bytes, os.PathLike] = None, to_me
"""
logger.info("Starting validation...")
# Re-start errors in case a new h5ad is being validated
self.errors = []
self.reset()

if h5ad_path:
logger.debug("Reading the h5ad file...")
Expand Down
4 changes: 1 addition & 3 deletions cellxgene_schema_cli/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
anndata==0.8.0
click==8.1.3
coverage==7.2.3
Cython==0.29.34
numpy==1.23.2
owlready2==0.38
pandas==1.4.4
pytest==7.2.2
PyYaml==6.0
wheel==0.40.0
semver==3.0.0
xxhash==3.3.0
xxhash==3.3.0
146 changes: 69 additions & 77 deletions cellxgene_schema_cli/tests/test_ontology.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import unittest

import pytest
from cellxgene_schema import ontology
from fixtures.examples_ontology_test import (
invalid_genes,
Expand All @@ -14,78 +13,71 @@
# Tests for internal functions of the OntologyChecker and GeneChecker classes


class TestGeneChecker(unittest.TestCase):
def setUp(self):
self.valid_species = ontology.SupportedOrganisms
self.invalid_species = invalid_species
self.valid_genes = valid_genes
self.invalid_genes = invalid_genes

def test_species_validity(self):
for species in self.valid_species:
self.assertIsInstance(ontology.GeneChecker(species), ontology.GeneChecker)
for species in self.invalid_species:
with self.assertRaises(ValueError):
ontology.GeneChecker(species)

def test_valid_genes(self):
for species in self.valid_genes:
geneChecker = ontology.GeneChecker(species)
for gene_id in self.valid_genes[species]:
gene_label = self.valid_genes[species][gene_id][0]
gene_length = self.valid_genes[species][gene_id][1]

self.assertTrue(geneChecker.is_valid_id(gene_id))
self.assertEqual(geneChecker.get_symbol(gene_id), gene_label)
self.assertEqual(geneChecker.get_length(gene_id), gene_length)

def test_invalid_genes(self):
for species in self.invalid_genes:
geneChecker = ontology.GeneChecker(species)
for gene_id in self.invalid_genes[species]:
self.assertFalse(geneChecker.is_valid_id(gene_id))
with self.assertRaises(ValueError):
geneChecker.get_symbol(gene_id)
with self.assertRaises(ValueError):
geneChecker.get_length(gene_id)


class TestOntologyChecker(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.ontologyChecker = ontology.OntologyChecker()

def setUp(self):
self.valid_ontologies = valid_ontologies
self.invalid_ontologies = invalid_ontologies
self.valid_terms = valid_terms
self.invalid_terms = invalid_terms

def test_ontology_validity(self):
for i in self.valid_ontologies:
self.assertTrue(self.ontologyChecker.is_valid_ontology(i))
self.assertIsNone(self.ontologyChecker.assert_ontology(i))

for i in self.invalid_ontologies:
self.assertFalse(self.ontologyChecker.is_valid_ontology(i))
with self.assertRaises(ValueError):
self.ontologyChecker.assert_ontology(i)

def test_valid_term_id(self):
for ontology_id in self.valid_terms:
for term_id in self.valid_terms[ontology_id]:
term_label = self.valid_terms[ontology_id][term_id]

self.assertTrue(self.ontologyChecker.is_valid_term_id(ontology_id, term_id))
self.assertIsNone(self.ontologyChecker.assert_term_id(ontology_id, term_id))
self.assertEqual(
self.ontologyChecker.get_term_label(ontology_id, term_id),
term_label,
)

def test_invalid_term_ids(self):
for ontology_id in self.invalid_terms:
for term_id in self.invalid_terms[ontology_id]:
self.assertFalse(self.ontologyChecker.is_valid_term_id(ontology_id, term_id))
with self.assertRaises(ValueError):
self.ontologyChecker.assert_term_id(ontology_id, term_id)
class TestGeneChecker:
@pytest.mark.parametrize("valid_species", ontology.SupportedOrganisms)
def test_species_valid(self, valid_species):
assert isinstance(ontology.GeneChecker(valid_species), ontology.GeneChecker)

@pytest.mark.parametrize("invalid_species", invalid_species)
def test_species_invalid(self, invalid_species):
with pytest.raises(ValueError):
ontology.GeneChecker(invalid_species)

@pytest.mark.parametrize("species,valid_genes", valid_genes.items())
def test_valid_genes(self, species, valid_genes):
geneChecker = ontology.GeneChecker(species)
for gene_id in valid_genes:
gene_label = valid_genes[gene_id][0]
gene_length = valid_genes[gene_id][1]

assert geneChecker.is_valid_id(gene_id)
assert geneChecker.get_symbol(gene_id) == gene_label
assert geneChecker.get_length(gene_id) == gene_length

@pytest.mark.parametrize("species,invalid_genes", invalid_genes.items())
def test_invalid_genes(self, species, invalid_genes):
geneChecker = ontology.GeneChecker(species)
for gene_id in invalid_genes:
assert not geneChecker.is_valid_id(gene_id)
with pytest.raises(ValueError):
geneChecker.get_symbol(gene_id)
with pytest.raises(ValueError):
geneChecker.get_length(gene_id)


@pytest.fixture(scope="class")
def ontologyChecker() -> ontology.OntologyChecker:
return ontology.OntologyChecker()


class TestOntologyChecker:
@pytest.mark.parametrize("valid_ontology", valid_ontologies)
def test_ontology_valid(self, ontologyChecker, valid_ontology):
assert ontologyChecker.is_valid_ontology(valid_ontology)
assert ontologyChecker.assert_ontology(valid_ontology) is None

@pytest.mark.parametrize("invalid_ontology", invalid_ontologies)
def test_ontology_invalid(self, ontologyChecker, invalid_ontology):
assert not ontologyChecker.is_valid_ontology(invalid_ontology)
with pytest.raises(ValueError):
ontologyChecker.assert_ontology(invalid_ontology)

@pytest.mark.parametrize(
"ontology_id,term_id",
[(ontology_id, term_id) for ontology_id in valid_terms for term_id in valid_terms[ontology_id]],
)
def test_valid_term_id(self, ontologyChecker, ontology_id, term_id):
term_label = valid_terms[ontology_id][term_id]

assert ontologyChecker.is_valid_term_id(ontology_id, term_id)
assert ontologyChecker.assert_term_id(ontology_id, term_id) is None
assert ontologyChecker.get_term_label(ontology_id, term_id) == term_label

@pytest.mark.parametrize(
"ontology_id,term_id",
[(ontology_id, term_id) for ontology_id in invalid_terms for term_id in invalid_terms[ontology_id]],
)
def test_invalid_term_ids(self, ontologyChecker, ontology_id, term_id):
assert not ontologyChecker.is_valid_term_id(ontology_id, term_id)
with pytest.raises(ValueError):
ontologyChecker.assert_term_id(ontology_id, term_id)
48 changes: 26 additions & 22 deletions cellxgene_schema_cli/tests/test_remove_labels.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import unittest

import pytest
from cellxgene_schema.remove_labels import AnnDataLabelRemover
from fixtures.examples_validate import (
adata,
Expand All @@ -8,26 +7,31 @@
from pandas.testing import assert_frame_equal


class TestRemoveLabels(unittest.TestCase):
def setUp(self):
self.anndata_label_remover = AnnDataLabelRemover(adata_with_labels.copy())
self.adata_no_labels = adata
@pytest.fixture
def remove_labels_setup():
anndata_label_remover = AnnDataLabelRemover(adata_with_labels.copy())
adata_no_labels = adata
return anndata_label_remover, adata_no_labels


def test_remove_labels(self):
self.anndata_label_remover.adata.raw = adata_with_labels
self.anndata_label_remover.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
self.anndata_label_remover.remove_labels()
adata_labels_removed = self.anndata_label_remover.adata
assert_frame_equal(adata_labels_removed.obs, self.adata_no_labels.obs)
assert_frame_equal(adata_labels_removed.var, self.adata_no_labels.var)
assert_frame_equal(adata_labels_removed.raw.var, self.adata_no_labels.raw.var)
self.assertDictEqual(dict(adata_labels_removed.uns), dict(self.adata_no_labels.uns))
class TestRemoveLabels:
def test_remove_labels(self, remove_labels_setup):
anndata_label_remover, adata_no_labels = remove_labels_setup
anndata_label_remover.adata.raw = adata_with_labels
anndata_label_remover.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
anndata_label_remover.remove_labels()
adata_labels_removed = anndata_label_remover.adata
assert_frame_equal(adata_labels_removed.obs, adata_no_labels.obs)
assert_frame_equal(adata_labels_removed.var, adata_no_labels.var)
assert_frame_equal(adata_labels_removed.raw.var, adata_no_labels.raw.var)
assert dict(adata_labels_removed.uns) == dict(adata_no_labels.uns)

def test_remove_labels_no_raw(self):
self.anndata_label_remover.remove_labels()
adata_labels_removed = self.anndata_label_remover.adata
def test_remove_labels_no_raw(self, remove_labels_setup):
anndata_label_remover, adata_no_labels = remove_labels_setup
anndata_label_remover.remove_labels()
adata_labels_removed = anndata_label_remover.adata

self.assertIsNone(adata_labels_removed.raw)
assert_frame_equal(adata_labels_removed.obs, self.adata_no_labels.obs)
assert_frame_equal(adata_labels_removed.var, self.adata_no_labels.var)
self.assertDictEqual(dict(adata_labels_removed.uns), dict(self.adata_no_labels.uns))
assert adata_labels_removed.raw is None
assert_frame_equal(adata_labels_removed.obs, adata_no_labels.obs)
assert_frame_equal(adata_labels_removed.var, adata_no_labels.var)
assert dict(adata_labels_removed.uns) == dict(adata_no_labels.uns)
Loading

0 comments on commit 63dbc3b

Please sign in to comment.