From 77a2d25335b21a54ca139ba4d20d057dd948e25f Mon Sep 17 00:00:00 2001 From: Daniel Hegeman Date: Thu, 28 Sep 2023 13:16:52 -0700 Subject: [PATCH] fix: rename all instances of 'format' variable to 'matrix_format' (#653) --- cellxgene_schema_cli/cellxgene_schema/utils.py | 12 ++++++------ .../cellxgene_schema/validate.py | 16 ++++++++-------- cellxgene_schema_cli/tests/test_validate.py | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/utils.py b/cellxgene_schema_cli/cellxgene_schema/utils.py index 459e36f0d..b409927bf 100644 --- a/cellxgene_schema_cli/cellxgene_schema/utils.py +++ b/cellxgene_schema_cli/cellxgene_schema/utils.py @@ -67,18 +67,18 @@ def get_matrix_format(adata: ad.AnnData, matrix: Union[np.ndarray, sparse.spmatr # # >>> return getattr(matrix, "format_str", "dense) # - format = "unknown" + matrix_format = "unknown" if adata.n_obs == 0 or adata.n_vars == 0: - format = "dense" + matrix_format = "dense" else: matrix_slice = matrix[0:1, 0:1] if isinstance(matrix_slice, sparse.spmatrix): - format = matrix_slice.format + matrix_format = matrix_slice.format elif isinstance(matrix_slice, np.ndarray): - format = "dense" + matrix_format = "dense" - assert format in ["unknown", "csr", "csc", "coo", "dense"] - return format + assert matrix_format in ["unknown", "csr", "csc", "coo", "dense"] + return matrix_format def getattr_anndata(adata: ad.AnnData, attr: str = None): diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index d63fc2a05..12f1513cd 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -291,9 +291,9 @@ def _count_matrix_nonzero(self, matrix_name: str, matrix: Union[np.ndarray, spar logger.debug(f"Counting non-zero values in {matrix_name}") nnz = 0 - format = get_matrix_format(self.adata, matrix) + matrix_format = get_matrix_format(self.adata, matrix) for matrix_chunk, _, _ in self._chunk_matrix(matrix): - nnz += matrix_chunk.count_nonzero() if format != "dense" else np.count_nonzero(matrix_chunk) + nnz += matrix_chunk.count_nonzero() if matrix_format != "dense" else np.count_nonzero(matrix_chunk) self.number_non_zero[matrix_name] = nnz return nnz @@ -898,7 +898,7 @@ def _validate_sparsity(self): matrix_format = get_matrix_format(self.adata, x) if matrix_format == "csr": continue - assert format != "unknown" + assert matrix_format != "unknown" # It seems silly to perform this test for 'coo' and 'csc' formats, # which are, by definition, already sparse. But the old code @@ -929,11 +929,11 @@ def _validate_seurat_convertibility(self): to_validate.append((self.adata.raw.X, "raw.X")) # Check length of component arrays for matrix, matrix_name in to_validate: - format = get_matrix_format(self.adata, matrix) - if format in ["csc", "csr", "coo"]: + matrix_format = get_matrix_format(self.adata, matrix) + if matrix_format in ["csc", "csr", "coo"]: effective_r_array_size = self._count_matrix_nonzero(matrix_name, matrix) is_sparse = True - elif format == "dense": + elif matrix_format == "dense": effective_r_array_size = max(matrix.shape) is_sparse = False else: @@ -1097,8 +1097,8 @@ def _is_raw(self, force: bool = False) -> bool: raw_loc = self._get_raw_x_loc() x = self.adata.raw.X if raw_loc == "raw.X" else self.adata.X - format = get_matrix_format(self.adata, x) - assert format != "unknown" + matrix_format = get_matrix_format(self.adata, x) + assert matrix_format != "unknown" self._raw_layer_exists = True for matrix_chunk, _, _ in self._chunk_matrix(x): data = matrix_chunk if isinstance(matrix_chunk, np.ndarray) else matrix_chunk.data diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index d033fb02f..3db22eb97 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -394,25 +394,25 @@ def test_fail_mixed_column_types(self): class TestIsRaw: @staticmethod - def create_validator(data: Union[ndarray, spmatrix], format: str) -> Validator: + def create_validator(data: Union[ndarray, spmatrix], matrix_format: str) -> Validator: """ Create a sample AnnData instance with the given data and format. :param data: The data matrix. - :param format: The format of the data matrix (e.g., "dense", "csr", "csc"). + :param matrix_format: The format of the data matrix (e.g., "dense", "csr", "csc"). :return anndata.AnnData: An AnnData instance with the specified data and format. """ validator = Validator() adata = anndata.AnnData(X=data) - adata.obsm["X_" + format] = data + adata.obsm["X_" + matrix_format] = data validator.adata = adata return validator @pytest.mark.parametrize( - "data, format, expected_result", + "data, matrix_format, expected_result", [ # Test case with integer values in a dense matrix (np.array([[1, 2, 3], [4, 5, 6]], dtype=int), "dense", True), @@ -426,8 +426,8 @@ def create_validator(data: Union[ndarray, spmatrix], format: str) -> Validator: (np.array([[1, 2.2, 3], [4.4, 5, 6.6]]), "dense", False), ], ) - def test_is_raw(self, data, format, expected_result): - validator = self.create_validator(data, format) + def test_is_raw(self, data, matrix_format, expected_result): + validator = self.create_validator(data, matrix_format) assert validator._is_raw() == expected_result @mock.patch("cellxgene_schema.validate.get_matrix_format", return_value="unknown")