From 835677dc3c77a54284f0a09ce3fa1e8eaf71cb34 Mon Sep 17 00:00:00 2001
From: JLSteenwyk
Date: Sun, 3 Dec 2023 08:14:43 -0800
Subject: [PATCH] fixed link to publication pdf
---
README.md | 6 ++-
tests/integration/samples/simple.fa.clipkit | 10 ++--
tests/unit/test_msa.py | 59 +++++++++++++--------
3 files changed, 46 insertions(+), 29 deletions(-)
diff --git a/README.md b/README.md
index fc6337d..0cc78c1 100644
--- a/README.md
+++ b/README.md
@@ -40,10 +40,12 @@
-
+
+
ClipKIT is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others.
-If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLoS Biology. doi: [10.1371/journal.pbio.3001007](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001007).
+If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLOS Biology. doi: [10.1371/journal.pbio.3001007](https://jlsteenwyk.com/publication_pdfs/2020_Steenwyk_etal_PLOS_Biology.pdf).
+
---
diff --git a/tests/integration/samples/simple.fa.clipkit b/tests/integration/samples/simple.fa.clipkit
index 0cef391..da4f9f8 100644
--- a/tests/integration/samples/simple.fa.clipkit
+++ b/tests/integration/samples/simple.fa.clipkit
@@ -1,10 +1,10 @@
>1
-A-GTAT
+A-GAT
>2
-A-G-AT
+A-GAT
>3
-A-G-TA
+A-GTA
>4
-AGA-TA
+AGATA
>5
-ACa-T-
+ACaT-
diff --git a/tests/unit/test_msa.py b/tests/unit/test_msa.py
index 7649c93..0276978 100644
--- a/tests/unit/test_msa.py
+++ b/tests/unit/test_msa.py
@@ -4,6 +4,7 @@
from Bio import AlignIO
from clipkit.msa import MSA
+
def get_biopython_msa(file_path, file_format="fasta"):
return AlignIO.read(open(file_path), file_format)
@@ -12,14 +13,22 @@ class TestMSA(object):
def test_clipkit_msa_from_bio_msa(self):
bio_msa = get_biopython_msa("tests/unit/examples/simple.fa")
msa = MSA.from_bio_msa(bio_msa)
- assert msa.header_info == [{'id': '1', 'name': '1', 'description': '1'}, {'id': '2', 'name': '2', 'description': '2'}, {'id': '3', 'name': '3', 'description': '3'}, {'id': '4', 'name': '4', 'description': '4'}, {'id': '5', 'name': '5', 'description': '5'}]
- expected_seq_records = np.array([
- ['A', '-', 'G', 'T', 'A', 'T'],
- ['A', '-', 'G', '-', 'A', 'T'],
- ['A', '-', 'G', '-', 'T', 'A'],
- ['A', 'G', 'A', '-', 'T', 'A'],
- ['A', 'C', 'a', '-', 'T', '-']
- ])
+ assert msa.header_info == [
+ {"id": "1", "name": "1", "description": "1"},
+ {"id": "2", "name": "2", "description": "2"},
+ {"id": "3", "name": "3", "description": "3"},
+ {"id": "4", "name": "4", "description": "4"},
+ {"id": "5", "name": "5", "description": "5"},
+ ]
+ expected_seq_records = np.array(
+ [
+ ["A", "-", "G", "T", "A", "T"],
+ ["A", "-", "G", "-", "A", "T"],
+ ["A", "-", "G", "-", "T", "A"],
+ ["A", "G", "A", "-", "T", "A"],
+ ["A", "C", "a", "-", "T", "-"],
+ ]
+ )
np.testing.assert_equal(msa.seq_records, expected_seq_records)
def test_trim_by_provided_site_positions_np_array(self):
@@ -27,13 +36,15 @@ def test_trim_by_provided_site_positions_np_array(self):
msa = MSA.from_bio_msa(bio_msa)
sites_to_trim = np.array([1, 4])
msa.trim(site_positions_to_trim=sites_to_trim)
- expected_sites_kept = np.array([
- ['A', 'G', 'T', 'T'],
- ['A', 'G', '-', 'T'],
- ['A', 'G', '-', 'A'],
- ['A', 'A', '-', 'A'],
- ['A', 'a', '-', '-']
- ])
+ expected_sites_kept = np.array(
+ [
+ ["A", "G", "T", "T"],
+ ["A", "G", "-", "T"],
+ ["A", "G", "-", "A"],
+ ["A", "A", "-", "A"],
+ ["A", "a", "-", "-"],
+ ]
+ )
np.testing.assert_equal(msa.sites_kept, expected_sites_kept)
def test_trim_by_provided_site_positions_list(self):
@@ -41,11 +52,15 @@ def test_trim_by_provided_site_positions_list(self):
msa = MSA.from_bio_msa(bio_msa)
sites_to_trim = [1, 4]
msa.trim(site_positions_to_trim=sites_to_trim)
- expected_sites_kept = np.array([
- ['A', 'G', 'T', 'T'],
- ['A', 'G', '-', 'T'],
- ['A', 'G', '-', 'A'],
- ['A', 'A', '-', 'A'],
- ['A', 'a', '-', '-']
- ])
+ expected_sites_kept = np.array(
+ [
+ ["A", "G", "T", "T"],
+ ["A", "G", "-", "T"],
+ ["A", "G", "-", "A"],
+ ["A", "A", "-", "A"],
+ ["A", "a", "-", "T"],
+ ]
+ )
+ msa = msa.to_bio_msa()
+ print(vars(msa))
np.testing.assert_equal(msa.sites_kept, expected_sites_kept)