Merge pull request #75 from zprobot/master

update: test
bigbio · Nov 24, 2024 · 2b0d621 · 2b0d621
2 parents 96892e6 + b5e324e
commit 2b0d621
Show file tree

Hide file tree

Showing 14 changed files with 141 additions and 136 deletions.
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -28,7 +28,6 @@ jobs:
         python -m pip install --upgrade pip
         pip install flake8 pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        python setup.py install
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -37,6 +36,6 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with unittest
       run: |
-        cd tests/
+        python setup.py install
         python -m unittest
 
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -29,7 +29,6 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        python setup.py install
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -38,13 +37,13 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with unittest
       run: |
-        cd tests/
+        python setup.py install
         python -m unittest
     - name: Test commandline tool help
       run: |
         ibaqpy --help
     - name: Test commandline normalization features
       run: |
-        rm -rfv tests/PXD003947/PXD003947-peptides-norm.csv
-        rm -rfv tests/PXD003947/PXD003947-peptides-norm.parquet
-        ibaqpy features2peptides --parquet tests/PXD003947/PXD003947-feature.parquet --sdrf tests/PXD003947/PXD003947.sdrf.tsv --min_aa 7 --min_unique 2 --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/PXD003947/PXD003947-peptides-norm.csv --skip_normalization --nmethod median --pnmethod max_min --log2 --save_parquet
+        rm -rfv tests/example/PXD017834-ibaq-norm.csv
+        rm -rfv tests/example/PXD017834-peptides-norm.csv
+        ibaqpy features2peptides --parquet tests/example/feature.parquet --sdrf tests/example/PXD017834-TMT.sdrf.tsv --min_aa 7 --min_unique 2 --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/example/PXD017834-peptides-norm.csv --skip_normalization --nmethod median --pnmethod max_min --log2 --save_parquet
diff --git a/.gitignore b/.gitignore
@@ -15,4 +15,5 @@ venv
 /tests/PXD003947/PXD003947-peptides-norm.csv
 /tests/PXD003947/PXD003947-peptides-norm.parquet
 /build/
-/dist/
+/dist/
+/**/__pycache__/
diff --git a/data/__init__.py b/data/__init__.py
diff --git a/ibaqpy/ibaq/peptide_normalization.py b/ibaqpy/ibaq/peptide_normalization.py
@@ -327,30 +327,30 @@ def low_frequency_peptides(self, percentage=0.2) -> tuple:
         """Return peptides with low frequency"""
         f_table = self.parquet_db.sql(
             """
-                SELECT "sequence","protein_accessions",COUNT(DISTINCT sample_accession) as "count" from parquet_db
-                GROUP BY "sequence","protein_accessions"
+                SELECT "sequence","pg_accessions",COUNT(DISTINCT sample_accession) as "count" from parquet_db
+                GROUP BY "sequence","pg_accessions"
                 """
         ).df()
         try:
-            f_table["protein_accessions"] = f_table["protein_accessions"].apply(
+            f_table["pg_accessions"] = f_table["pg_accessions"].apply(
                 lambda x: x[0].split("|")[1]
             )
         except IndexError:
-            f_table["protein_accessions"] = f_table["protein_accessions"].apply(
+            f_table["pg_accessions"] = f_table["pg_accessions"].apply(
                 lambda x: x[0]
             )
         except Exception as e:
             print(e)
             exit(
-                "Some errors occurred when parsing protein_accessions column in feature parquet!"
+                "Some errors occurred when parsing pg_accessions column in feature parquet!"
             )
-        f_table.set_index(["sequence", "protein_accessions"], inplace=True)
+        f_table.set_index(["sequence", "pg_accessions"], inplace=True)
         f_table.drop(
             f_table[f_table["count"] >= (percentage * len(self.samples))].index,
             inplace=True,
         )
         f_table.reset_index(inplace=True)
-        return tuple(zip(f_table["protein_accessions"], f_table["sequence"]))
+        return tuple(zip(f_table["pg_accessions"], f_table["sequence"]))
 
     @staticmethod
     def csv2parquet(csv):

diff --git a/ibaqpy/ibaq/utils.py b/ibaqpy/ibaq/utils.py
@@ -10,7 +10,7 @@
 from sklearn.decomposition import PCA
 from sklearn.impute import KNNImputer
 
-#from combat.pycombat import pycombat
+from combat.pycombat import pycombat
 
 logging.basicConfig(
     format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG

diff --git a/tests/PXD003947/PXD003947-feature.parquet b/tests/PXD003947/PXD003947-feature.parquet
diff --git a/tests/PXD003947/PXD003947.sdrf.tsv b/tests/PXD003947/PXD003947.sdrf.tsv
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/common.py b/tests/common.py
@@ -0,0 +1,7 @@
+from pathlib import Path
+
+TEST_DATA_ROOT = Path(__file__).parent / "example"
+
+def datafile(path: str):
+    path = str(path)
+    return str(TEST_DATA_ROOT / path)
diff --git a/...-reviewed-contaminants-decoy-202210.fasta → ...-reviewed-contaminants-decoy-202210.fasta b/...-reviewed-contaminants-decoy-202210.fasta → ...-reviewed-contaminants-decoy-202210.fasta