From 835677dc3c77a54284f0a09ce3fa1e8eaf71cb34 Mon Sep 17 00:00:00 2001
From: JLSteenwyk <jlsteenwyk@gmail.com>
Date: Sun, 3 Dec 2023 08:14:43 -0800
Subject: [PATCH] fixed link to publication pdf

---
 README.md                                   |  6 ++-
 tests/integration/samples/simple.fa.clipkit | 10 ++--
 tests/unit/test_msa.py                      | 59 +++++++++++++--------
 3 files changed, 46 insertions(+), 29 deletions(-)
diff --git a/README.md b/README.md
index fc6337d..0cc78c1 100644
--- a/README.md
+++ b/README.md
@@ -40,10 +40,12 @@
 </p>
 
 
-<center><h1>Run <a href="https://clipkit.genomelybio.com/">ClipKIT in the browser</a> and leave the computing up to <a href="https://www.genomelybio.com/">us</a>!</h1></center>
+<center><h2>Run <a href="https://clipkit.genomelybio.com/">ClipKIT in the browser</a> and leave the computing up to <a href="https://www.genomelybio.com/">us</a>!</h2></center>
 <br />
+
 ClipKIT is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others.<br /><br />
-If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLoS Biology. doi: [10.1371/journal.pbio.3001007](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001007).
+If you found clipkit useful, please cite *ClipKIT: a multiple sequence alignment trimming software for accurate phylogenomic inference*. Steenwyk et al. 2020, PLOS Biology. doi: [10.1371/journal.pbio.3001007](https://jlsteenwyk.com/publication_pdfs/2020_Steenwyk_etal_PLOS_Biology.pdf).
+
 <br /><br />
 
 ---
diff --git a/tests/integration/samples/simple.fa.clipkit b/tests/integration/samples/simple.fa.clipkit
index 0cef391..da4f9f8 100644
--- a/tests/integration/samples/simple.fa.clipkit
+++ b/tests/integration/samples/simple.fa.clipkit
@@ -1,10 +1,10 @@
 >1
-A-GTAT
+A-GAT
 >2
-A-G-AT
+A-GAT
 >3
-A-G-TA
+A-GTA
 >4
-AGA-TA
+AGATA
 >5
-ACa-T-
+ACaT-
diff --git a/tests/unit/test_msa.py b/tests/unit/test_msa.py
index 7649c93..0276978 100644
--- a/tests/unit/test_msa.py
+++ b/tests/unit/test_msa.py
@@ -4,6 +4,7 @@
 from Bio import AlignIO
 from clipkit.msa import MSA
 
+
 def get_biopython_msa(file_path, file_format="fasta"):
     return AlignIO.read(open(file_path), file_format)
 
@@ -12,14 +13,22 @@ class TestMSA(object):
     def test_clipkit_msa_from_bio_msa(self):
         bio_msa = get_biopython_msa("tests/unit/examples/simple.fa")
         msa = MSA.from_bio_msa(bio_msa)
-        assert msa.header_info == [{'id': '1', 'name': '1', 'description': '1'}, {'id': '2', 'name': '2', 'description': '2'}, {'id': '3', 'name': '3', 'description': '3'}, {'id': '4', 'name': '4', 'description': '4'}, {'id': '5', 'name': '5', 'description': '5'}]
-        expected_seq_records = np.array([
-            ['A', '-', 'G', 'T', 'A', 'T'],
-            ['A', '-', 'G', '-', 'A', 'T'],
-            ['A', '-', 'G', '-', 'T', 'A'],
-            ['A', 'G', 'A', '-', 'T', 'A'],
-            ['A', 'C', 'a', '-', 'T', '-']
-        ])
+        assert msa.header_info == [
+            {"id": "1", "name": "1", "description": "1"},
+            {"id": "2", "name": "2", "description": "2"},
+            {"id": "3", "name": "3", "description": "3"},
+            {"id": "4", "name": "4", "description": "4"},
+            {"id": "5", "name": "5", "description": "5"},
+        ]
+        expected_seq_records = np.array(
+            [
+                ["A", "-", "G", "T", "A", "T"],
+                ["A", "-", "G", "-", "A", "T"],
+                ["A", "-", "G", "-", "T", "A"],
+                ["A", "G", "A", "-", "T", "A"],
+                ["A", "C", "a", "-", "T", "-"],
+            ]
+        )
         np.testing.assert_equal(msa.seq_records, expected_seq_records)
 
     def test_trim_by_provided_site_positions_np_array(self):
@@ -27,13 +36,15 @@ def test_trim_by_provided_site_positions_np_array(self):
         msa = MSA.from_bio_msa(bio_msa)
         sites_to_trim = np.array([1, 4])
         msa.trim(site_positions_to_trim=sites_to_trim)
-        expected_sites_kept = np.array([
-            ['A', 'G', 'T', 'T'],
-            ['A', 'G', '-', 'T'],
-            ['A', 'G', '-', 'A'],
-            ['A', 'A', '-', 'A'],
-            ['A', 'a', '-', '-']
-        ])
+        expected_sites_kept = np.array(
+            [
+                ["A", "G", "T", "T"],
+                ["A", "G", "-", "T"],
+                ["A", "G", "-", "A"],
+                ["A", "A", "-", "A"],
+                ["A", "a", "-", "-"],
+            ]
+        )
         np.testing.assert_equal(msa.sites_kept, expected_sites_kept)
 
     def test_trim_by_provided_site_positions_list(self):
@@ -41,11 +52,15 @@ def test_trim_by_provided_site_positions_list(self):
         msa = MSA.from_bio_msa(bio_msa)
         sites_to_trim = [1, 4]
         msa.trim(site_positions_to_trim=sites_to_trim)
-        expected_sites_kept = np.array([
-            ['A', 'G', 'T', 'T'],
-            ['A', 'G', '-', 'T'],
-            ['A', 'G', '-', 'A'],
-            ['A', 'A', '-', 'A'],
-            ['A', 'a', '-', '-']
-        ])
+        expected_sites_kept = np.array(
+            [
+                ["A", "G", "T", "T"],
+                ["A", "G", "-", "T"],
+                ["A", "G", "-", "A"],
+                ["A", "A", "-", "A"],
+                ["A", "a", "-", "T"],
+            ]
+        )
+        msa = msa.to_bio_msa()
+        print(vars(msa))
         np.testing.assert_equal(msa.sites_kept, expected_sites_kept)