add testing for run_pca and run_palantie

settylab · Oct 3, 2023 · 0d8d4f8 · 0d8d4f8
1 parent b761d64
commit 0d8d4f8
Show file tree

Hide file tree

Showing 3 changed files with 187 additions and 0 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,42 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "main", "dev" ]
+  pull_request:
+    branches: [ "main", "dev" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest coverage typing-extensions
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        coverage run -m pytest tests/*.py
+    - name: Upload coverage reports to Codecov
+      uses: codecov/codecov-action@v3
diff --git a/tests/core_run_palantir.py b/tests/core_run_palantir.py
@@ -0,0 +1,86 @@
+import pytest
+import pandas as pd
+import scanpy as sc
+import numpy as np
+
+from palantir.presults import PResults
+from palantir.core import run_palantir
+
+
+@pytest.fixture
+def mock_data():
+    n_cells = 50
+    n_genes = 10
+    return pd.DataFrame(
+        np.random.rand(n_cells, n_genes),
+        columns=[f"gene_{i}" for i in range(n_genes)],
+        index=[f"cell_{i}" for i in range(n_cells)],
+    )
+
+
+@pytest.fixture
+def mock_anndata(mock_data):
+    ad = sc.AnnData(X=mock_data)
+    ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
+    return ad
+
+
+# Test with basic DataFrame input
+@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
+@pytest.mark.filterwarnings(
+    "ignore:Changing the sparsity structure of a csr_matrix is expensive."
+)
+def test_palantir_dataframe(mock_data):
+    result = run_palantir(mock_data, "cell_0")
+    assert isinstance(result, PResults), "Should return a PResults object"
+
+
+# Test with basic AnnData input
+@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
+@pytest.mark.filterwarnings(
+    "ignore:Changing the sparsity structure of a csr_matrix is expensive."
+)
+def test_palantir_anndata(mock_anndata):
+    run_palantir(mock_anndata, "cell_0")
+    assert (
+        "palantir_pseudotime" in mock_anndata.obs.keys()
+    ), "Pseudotime key missing in AnnData object"
+    assert (
+        "palantir_entropy" in mock_anndata.obs.keys()
+    ), "Entropy key missing in AnnData object"
+    assert (
+        "palantir_fate_probabilities" in mock_anndata.obsm.keys()
+    ), "Fate probability key missing in AnnData object"
+    assert (
+        "palantir_waypoints" in mock_anndata.uns.keys()
+    ), "Waypoint key missing in AnnData object"
+
+
+# Test terminal states
+@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
+@pytest.mark.filterwarnings(
+    "ignore:Changing the sparsity structure of a csr_matrix is expensive."
+)
+def test_palantir_terminal_states(mock_data):
+    result = run_palantir(mock_data, "cell_0", terminal_states=["cell_1", "cell_2"])
+    assert "cell_1" in result.branch_probs.columns, "Terminal state cell_1 missing"
+    assert "cell_2" in result.branch_probs.columns, "Terminal state cell_2 missing"
+
+
+# Test scaling components
+@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated.")
+@pytest.mark.filterwarnings(
+    "ignore:Changing the sparsity structure of a csr_matrix is expensive."
+)
+def test_scaling_components(mock_data):
+    result1 = run_palantir(mock_data, "cell_0", scale_components=True)
+    result2 = run_palantir(mock_data, "cell_0", scale_components=False)
+    assert not np.array_equal(
+        result1.pseudotime, result2.pseudotime
+    ), "Scaling components should affect pseudotime"
+
+
+# Test for invalid knn
+def test_invalid_knn(mock_data):
+    with pytest.raises(ValueError):
+        run_palantir(mock_data, "cell_0", knn=0)
diff --git a/tests/util_run_pca.py b/tests/util_run_pca.py
@@ -0,0 +1,59 @@
+import pytest
+import pandas as pd
+import scanpy as sc
+import numpy as np
+
+from palantir.utils import run_pca
+
+
+@pytest.fixture
+def mock_data():
+    n_cells = 50
+    n_genes = 500
+    return pd.DataFrame(
+        np.random.rand(n_cells, n_genes),
+        columns=[f"gene_{i}" for i in range(n_genes)],
+        index=[f"cell_{i}" for i in range(n_cells)],
+    )
+
+
+@pytest.fixture
+def mock_anndata(mock_data):
+    ad = sc.AnnData(X=mock_data)
+    ad.obsm["DM_EigenVectors_multiscaled"] = mock_data
+    ad.var["highly_variable"] = np.random.choice([True, False], size=mock_data.shape[1])
+    return ad
+
+
+# Test with DataFrame
+def test_run_pca_dataframe(mock_data):
+    pca_results, var_ratio = run_pca(mock_data, use_hvg=False)
+    assert isinstance(pca_results, pd.DataFrame)
+    assert isinstance(var_ratio, np.ndarray)
+    assert pca_results.shape[1] <= 300  # Check n_components
+
+
+# Test with AnnData
+def test_run_pca_anndata(mock_anndata):
+    pca_results, var_ratio = run_pca(mock_anndata)
+    assert "X_pca" in mock_anndata.obsm.keys()
+    assert mock_anndata.obsm["X_pca"].shape[1] <= 300
+
+
+# Test n_components parameter
+def test_run_pca_components(mock_data):
+    pca_results, _ = run_pca(mock_data, n_components=5, use_hvg=False)
+    assert pca_results.shape[1] == 5
+
+
+# Test use_hvg parameter
+def test_run_pca_hvg(mock_anndata):
+    pca_results, _ = run_pca(mock_anndata, use_hvg=True)
+    assert pca_results.shape[1] <= 300
+
+
+# Test pca_key parameter
+def test_run_pca_pca_key(mock_anndata):
+    run_pca(mock_anndata, pca_key="custom_key")
+    assert "custom_key" in mock_anndata.obsm.keys()
+    assert mock_anndata.obsm["custom_key"].shape[1] <= 300