Skip to content

Commit

Permalink
Merge pull request #75 from zprobot/master
Browse files Browse the repository at this point in the history
update: test
  • Loading branch information
ypriverol authored Nov 24, 2024
2 parents 96892e6 + b5e324e commit 2b0d621
Show file tree
Hide file tree
Showing 14 changed files with 141 additions and 136 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
python setup.py install
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand All @@ -37,6 +36,6 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with unittest
run: |
cd tests/
python setup.py install
python -m unittest
9 changes: 4 additions & 5 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ jobs:
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
python setup.py install
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand All @@ -38,13 +37,13 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with unittest
run: |
cd tests/
python setup.py install
python -m unittest
- name: Test commandline tool help
run: |
ibaqpy --help
- name: Test commandline normalization features
run: |
rm -rfv tests/PXD003947/PXD003947-peptides-norm.csv
rm -rfv tests/PXD003947/PXD003947-peptides-norm.parquet
ibaqpy features2peptides --parquet tests/PXD003947/PXD003947-feature.parquet --sdrf tests/PXD003947/PXD003947.sdrf.tsv --min_aa 7 --min_unique 2 --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/PXD003947/PXD003947-peptides-norm.csv --skip_normalization --nmethod median --pnmethod max_min --log2 --save_parquet
rm -rfv tests/example/PXD017834-ibaq-norm.csv
rm -rfv tests/example/PXD017834-peptides-norm.csv
ibaqpy features2peptides --parquet tests/example/feature.parquet --sdrf tests/example/PXD017834-TMT.sdrf.tsv --min_aa 7 --min_unique 2 --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/example/PXD017834-peptides-norm.csv --skip_normalization --nmethod median --pnmethod max_min --log2 --save_parquet
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ venv
/tests/PXD003947/PXD003947-peptides-norm.csv
/tests/PXD003947/PXD003947-peptides-norm.parquet
/build/
/dist/
/dist/
/**/__pycache__/
Empty file added data/__init__.py
Empty file.
14 changes: 7 additions & 7 deletions ibaqpy/ibaq/peptide_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,30 +327,30 @@ def low_frequency_peptides(self, percentage=0.2) -> tuple:
"""Return peptides with low frequency"""
f_table = self.parquet_db.sql(
"""
SELECT "sequence","protein_accessions",COUNT(DISTINCT sample_accession) as "count" from parquet_db
GROUP BY "sequence","protein_accessions"
SELECT "sequence","pg_accessions",COUNT(DISTINCT sample_accession) as "count" from parquet_db
GROUP BY "sequence","pg_accessions"
"""
).df()
try:
f_table["protein_accessions"] = f_table["protein_accessions"].apply(
f_table["pg_accessions"] = f_table["pg_accessions"].apply(
lambda x: x[0].split("|")[1]
)
except IndexError:
f_table["protein_accessions"] = f_table["protein_accessions"].apply(
f_table["pg_accessions"] = f_table["pg_accessions"].apply(
lambda x: x[0]
)
except Exception as e:
print(e)
exit(
"Some errors occurred when parsing protein_accessions column in feature parquet!"
"Some errors occurred when parsing pg_accessions column in feature parquet!"
)
f_table.set_index(["sequence", "protein_accessions"], inplace=True)
f_table.set_index(["sequence", "pg_accessions"], inplace=True)
f_table.drop(
f_table[f_table["count"] >= (percentage * len(self.samples))].index,
inplace=True,
)
f_table.reset_index(inplace=True)
return tuple(zip(f_table["protein_accessions"], f_table["sequence"]))
return tuple(zip(f_table["pg_accessions"], f_table["sequence"]))

@staticmethod
def csv2parquet(csv):
Expand Down
2 changes: 1 addition & 1 deletion ibaqpy/ibaq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer

#from combat.pycombat import pycombat
from combat.pycombat import pycombat

logging.basicConfig(
format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG
Expand Down
Binary file removed tests/PXD003947/PXD003947-feature.parquet
Binary file not shown.
109 changes: 0 additions & 109 deletions tests/PXD003947/PXD003947.sdrf.tsv

This file was deleted.

Empty file added tests/__init__.py
Empty file.
7 changes: 7 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path

TEST_DATA_ROOT = Path(__file__).parent / "example"

def datafile(path: str):
path = str(path)
return str(TEST_DATA_ROOT / path)
Loading

0 comments on commit 2b0d621

Please sign in to comment.