From 835677dc3c77a54284f0a09ce3fa1e8eaf71cb34 Mon Sep 17 00:00:00 2001 From: JLSteenwyk Date: Sun, 3 Dec 2023 08:14:43 -0800 Subject: [PATCH] fixed link to publication pdf --- README.md | 6 ++- tests/integration/samples/simple.fa.clipkit | 10 ++-- tests/unit/test_msa.py | 59 +++++++++++++-------- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index fc6337d..0cc78c1 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,12 @@

-

Run ClipKIT in the browser and leave the computing up to us!

+

Run ClipKIT in the browser and leave the computing up to us!


+ ClipKIT is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others.

-If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLoS Biology. doi: [10.1371/journal.pbio.3001007](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001007). +If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLOS Biology. doi: [10.1371/journal.pbio.3001007](https://jlsteenwyk.com/publication_pdfs/2020_Steenwyk_etal_PLOS_Biology.pdf). +

--- diff --git a/tests/integration/samples/simple.fa.clipkit b/tests/integration/samples/simple.fa.clipkit index 0cef391..da4f9f8 100644 --- a/tests/integration/samples/simple.fa.clipkit +++ b/tests/integration/samples/simple.fa.clipkit @@ -1,10 +1,10 @@ >1 -A-GTAT +A-GAT >2 -A-G-AT +A-GAT >3 -A-G-TA +A-GTA >4 -AGA-TA +AGATA >5 -ACa-T- +ACaT- diff --git a/tests/unit/test_msa.py b/tests/unit/test_msa.py index 7649c93..0276978 100644 --- a/tests/unit/test_msa.py +++ b/tests/unit/test_msa.py @@ -4,6 +4,7 @@ from Bio import AlignIO from clipkit.msa import MSA + def get_biopython_msa(file_path, file_format="fasta"): return AlignIO.read(open(file_path), file_format) @@ -12,14 +13,22 @@ class TestMSA(object): def test_clipkit_msa_from_bio_msa(self): bio_msa = get_biopython_msa("tests/unit/examples/simple.fa") msa = MSA.from_bio_msa(bio_msa) - assert msa.header_info == [{'id': '1', 'name': '1', 'description': '1'}, {'id': '2', 'name': '2', 'description': '2'}, {'id': '3', 'name': '3', 'description': '3'}, {'id': '4', 'name': '4', 'description': '4'}, {'id': '5', 'name': '5', 'description': '5'}] - expected_seq_records = np.array([ - ['A', '-', 'G', 'T', 'A', 'T'], - ['A', '-', 'G', '-', 'A', 'T'], - ['A', '-', 'G', '-', 'T', 'A'], - ['A', 'G', 'A', '-', 'T', 'A'], - ['A', 'C', 'a', '-', 'T', '-'] - ]) + assert msa.header_info == [ + {"id": "1", "name": "1", "description": "1"}, + {"id": "2", "name": "2", "description": "2"}, + {"id": "3", "name": "3", "description": "3"}, + {"id": "4", "name": "4", "description": "4"}, + {"id": "5", "name": "5", "description": "5"}, + ] + expected_seq_records = np.array( + [ + ["A", "-", "G", "T", "A", "T"], + ["A", "-", "G", "-", "A", "T"], + ["A", "-", "G", "-", "T", "A"], + ["A", "G", "A", "-", "T", "A"], + ["A", "C", "a", "-", "T", "-"], + ] + ) np.testing.assert_equal(msa.seq_records, expected_seq_records) def test_trim_by_provided_site_positions_np_array(self): @@ -27,13 +36,15 @@ def test_trim_by_provided_site_positions_np_array(self): msa = MSA.from_bio_msa(bio_msa) sites_to_trim = np.array([1, 4]) msa.trim(site_positions_to_trim=sites_to_trim) - expected_sites_kept = np.array([ - ['A', 'G', 'T', 'T'], - ['A', 'G', '-', 'T'], - ['A', 'G', '-', 'A'], - ['A', 'A', '-', 'A'], - ['A', 'a', '-', '-'] - ]) + expected_sites_kept = np.array( + [ + ["A", "G", "T", "T"], + ["A", "G", "-", "T"], + ["A", "G", "-", "A"], + ["A", "A", "-", "A"], + ["A", "a", "-", "-"], + ] + ) np.testing.assert_equal(msa.sites_kept, expected_sites_kept) def test_trim_by_provided_site_positions_list(self): @@ -41,11 +52,15 @@ def test_trim_by_provided_site_positions_list(self): msa = MSA.from_bio_msa(bio_msa) sites_to_trim = [1, 4] msa.trim(site_positions_to_trim=sites_to_trim) - expected_sites_kept = np.array([ - ['A', 'G', 'T', 'T'], - ['A', 'G', '-', 'T'], - ['A', 'G', '-', 'A'], - ['A', 'A', '-', 'A'], - ['A', 'a', '-', '-'] - ]) + expected_sites_kept = np.array( + [ + ["A", "G", "T", "T"], + ["A", "G", "-", "T"], + ["A", "G", "-", "A"], + ["A", "A", "-", "A"], + ["A", "a", "-", "T"], + ] + ) + msa = msa.to_bio_msa() + print(vars(msa)) np.testing.assert_equal(msa.sites_kept, expected_sites_kept)