KohlbacherLab · m1ch4el0 · Oct 18, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 26, 2023
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -11,9 +11,9 @@ jobs:
         python-version: [3.8]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install and configure conda
@@ -37,6 +37,6 @@ jobs:
         wget https://marks.hms.harvard.edu/evcouplings_test_cases/data/evcouplings_test_cases.tar.gz
         tar -xf evcouplings_test_cases.tar.gz -C $HOME/
     - name: Run tests in headless xvfb environment
-      uses: GabrielBB/xvfb-action@v1
+      uses: coactions/setup-xvfb@v1
       with:
         run: coverage run -m unittest discover -s test -p "Test*.py"
diff --git a/evcouplings/align/alignment.py b/evcouplings/align/alignment.py
@@ -422,7 +422,7 @@ def sequences_to_matrix(sequences):
 
     N = len(sequences)
     L = len(next(iter(sequences)))
-    matrix = np.empty((N, L), dtype=np.str)
+    matrix = np.empty((N, L), dtype=np.str_)
 
     for i, seq in enumerate(sequences):
         if len(seq) != L:

diff --git a/evcouplings/couplings/model.py b/evcouplings/couplings/model.py
diff --git a/evcouplings/utils/config.py b/evcouplings/utils/config.py
@@ -10,7 +10,7 @@
   Thomas A. Hopf
 """
 
-import ruamel.yaml as yaml
+from ruamel.yaml import YAML
 
 
 class MissingParameterError(Exception):
@@ -44,9 +44,11 @@ def parse_config(config_str, preserve_order=False):
     """
     try:
         if preserve_order:
-            return yaml.load(config_str, Loader=yaml.RoundTripLoader)
+            yaml = YAML(typ="base")
+            return yaml.load(config_str)
         else:
-            return yaml.safe_load(config_str)
+            yaml = YAML(typ="safe")
+            return yaml.load(config_str)
     except yaml.parser.ParserError as e:
         raise InvalidParameterError(
             "Could not parse input configuration. "
@@ -90,9 +92,7 @@ def write_config_file(out_filename, config):
         dumper = yaml.Dumper
 
     with open(out_filename, "w") as f:
-        f.write(
-            yaml.dump(config, Dumper=dumper, default_flow_style=False)
-        )
+        f.write(yaml.dump(config, Dumper=dumper, default_flow_style=False))
 
 
 def check_required(params, keys):

diff --git a/evcouplings/visualize/parameters.py b/evcouplings/visualize/parameters.py
@@ -10,9 +10,14 @@
 from evcouplings.couplings.pairs import add_mixture_probability
 
 
-def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
-                Jij_threshold=10, score="cn",
-                reorder=None):
+def evzoom_data(
+    model,
+    ec_threshold=0.9,
+    freq_threshold=0.01,
+    Jij_threshold=10,
+    score="cn",
+    reorder=None,
+):
     """
     Generate data for EVzoom visualization. Use evzoom_json()
     to get final JSON string to use with EVzoom.
@@ -61,7 +66,7 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
         ecs = add_mixture_probability(ecs, score=score)
         ecs_sel = ecs.loc[ecs.probability >= ec_threshold]
     else:
-        ecs_sel = ecs.iloc[:int(ec_threshold)]
+        ecs_sel = ecs.iloc[: int(ec_threshold)]
 
     # if cutoff for couplings is given as int, interpret
     # as percentage of biggest absolute coupling value
@@ -71,14 +76,10 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
 
     if reorder is not None:
         alphabet = np.array(list(reorder))
-        alphabet_order = [
-            model.alphabet_map[c] for c in reorder
-        ]
+        alphabet_order = [model.alphabet_map[c] for c in reorder]
     else:
         alphabet = model.alphabet
-        alphabet_order = sorted(
-            model.alphabet_map.values()
-        )
+        alphabet_order = sorted(model.alphabet_map.values())
 
     # Map containing sequence and indeces
     map_ = {
@@ -92,21 +93,15 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
     for idx, r in ecs_sel.iterrows():
         i, j, score_ij = r["i"], r["j"], r[score]
         Jij = model.Jij(i, j)[alphabet_order, :][:, alphabet_order]
-        ai_set = np.where(
-            np.max(np.abs(Jij), axis=1) > Jij_threshold
-        )[0]
-        aj_set = np.where(
-            np.max(np.abs(Jij), axis=0) > Jij_threshold
-        )[0]
+        ai_set = np.where(np.max(np.abs(Jij), axis=1) > Jij_threshold)[0]
+        aj_set = np.where(np.max(np.abs(Jij), axis=0) > Jij_threshold)[0]
 
         cur_matrix = [
-            [round(Jij[ai, aj], DIGITS) for aj in list(aj_set)]
-            for ai in list(ai_set)
+            [round(Jij[ai, aj], DIGITS) for aj in list(aj_set)] for ai in list(ai_set)
         ]
 
         cur_matrix_T = [
-            [round(Jij[ai, aj], DIGITS) for ai in list(ai_set)]
-            for aj in list(aj_set)
+            [round(Jij[ai, aj], DIGITS) for ai in list(ai_set)] for aj in list(aj_set)
         ]
 
         cur_row = {
@@ -134,7 +129,7 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
     fi = model.fi()
     q = model.num_symbols
 
-    B = -fi * np.log2(fi)
+    B = -fi * np.log2(fi, where=fi > 0)
     B[fi <= 0] = 0
     R = np.log2(q) - B.sum(axis=1)
 
@@ -146,10 +141,12 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
         symbols = model.alphabet[frequent]
         fi_row = fi[i, frequent] * R[i]
 
-        logo.append([
-            {"code": s, "bits": round(float(h), DIGITS_LOGO)}
-            for s, h in zip(symbols, fi_row)
-        ])
+        logo.append(
+            [
+                {"code": s, "bits": round(float(h), DIGITS_LOGO)}
+                for s, h in zip(symbols, fi_row)
+            ]
+        )
 
     return map_, logo, matrix
 

diff --git a/notebooks/output_files_tutorial.ipynb b/notebooks/output_files_tutorial.ipynb
diff --git a/notebooks/running_jobs_complexes.ipynb b/notebooks/running_jobs_complexes.ipynb
@@ -63,18 +63,18 @@
     "1) In \"align_1\" and \"align_2\" section:\n",
     "* __alignment_protocol__: choose either 'existing' to use an input alignment or 'standard' to generate an alignment\n",
     "using the monomer alignment protocol.\n",
-    "* __input_alignment__: input alignment file, requried for 'existing' alignment protocol\n",
+    "* __input_alignment__: input alignment file, required for 'existing' alignment protocol\n",
     "* __override_annotation_file__: input annotation file, required for 'existing' alignment protocol when using best_hit concatenation. This is the \\_annotations.csv file from the same monomer pipeline run used to generate the input alignment, OR a user-generated .csv file with columns \"id\" containing the full sequence ids from the alignment and column \"OS\" that contains annotation information for each id. This will override the incomplete annotations generated when postprocessing the input_alignment.\n",
     "\n",
     "3) in \"concatenate\" section:\n",
-    "* __protocol__: currently two procols are available. Genome_distance will pair sequences by closest reciprocal distance on the genome. Best_hit will pair by best hit to the target sequence for each genome. \n",
+    "* __protocol__: currently two protocols are available. Genome_distance will pair sequences by closest reciprocal distance on the genome. Best_hit will pair by best hit to the target sequence for each genome. \n",
     "* __use_best_reciprocal__: if using the best_hit protocol, use_best_reciprocal specifies whether to only take the best reciprocal hit in each genome\n",
     "* __minimum_sequence_coverage__: After concatenation, only keep concatenated sequences that align to at least x% of the target concatenated sequence \n",
     "* __minimum_column_coverage__: After concatenation, only include alignment columns with at least x% residues (rather than gaps) during model inference. \n",
     "\n",
     "4) in \"couplings\" section:\n",
     "* __scoring__: Options are skewnormal, lognormal, and evcomplex. Scoring model to assess confidence in computed ECs\n",
-    "* __use_all_ecs_for_scoring__: if True, will run the scoring model on the ECs, both inter and intra, simulataneously. If false, scoring will be done for monomer 1, monomer 2, and inter-protein ECs independently. \n",
+    "* __use_all_ecs_for_scoring__: if True, will run the scoring model on the ECs, both inter and intra, simultaneously. If false, scoring will be done for monomer 1, monomer 2, and inter-protein ECs independently. \n",
     "\n",
     "5) in \"compare\" section:\n",
     "\n",

diff --git a/setup.py b/setup.py
@@ -96,9 +96,9 @@
     #setup_requires=['setuptools>=18.2', 'numpy'],
 
     install_requires=['setuptools>=18.2', 'numpy',
-        'pandas', 'scipy', 'numba', 'ruamel.yaml', 'matplotlib', 'requests',
+        'pandas', 'scipy', 'numba', 'matplotlib', 'requests',
         'mmtf-python', 'click', 'filelock', 'psutil', 'bokeh', 'jinja2',
-        'biopython', 'seaborn', 'billiard', 'scikit-learn',
+        'biopython', 'seaborn', 'billiard', 'scikit-learn', 'ruamel.yaml'
     ],
 
 )
diff --git a/test/TestComplex.py b/test/TestComplex.py
@@ -11,7 +11,7 @@
 import pandas as pd
 from unittest import TestCase
 from copy import deepcopy
-import ruamel.yaml as yaml
+from ruamel.yaml import YAML
 from evcouplings.complex.alignment import *
 from evcouplings.complex.distance import *
 from evcouplings.complex.similarity import *
@@ -72,10 +72,12 @@ def __init__(self, *args, **kwargs):
 
         # input and output configuration
         with open("{}/concatenate/test_new_concatenate.incfg".format(TRAVIS_PATH)) as inf:
-            self.incfg = yaml.safe_load(inf)
+            yaml = YAML(typ='safe')
+            self.incfg = yaml.load(inf)
 
         with open("{}/concatenate/test_new_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
-            self.outcfg = yaml.safe_load(inf)
+            yaml = YAML(typ='safe')
+            self.outcfg = yaml.load(inf)
 
     def test_genome_distance(self):
         """
@@ -145,7 +147,8 @@ def test_best_hit_normal(self):
         temporary_incfg["paralog_identity_threshold"] = 0.9
 
         with open("{}/concatenate/test_new_best_hit_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
-            _outcfg = yaml.safe_load(inf)
+            yaml = YAML(typ='safe')
+            _outcfg = yaml.load(inf)
 
         outcfg = best_hit(**temporary_incfg)
 
@@ -195,7 +198,8 @@ def test_best_hit_reciprocal(self):
         temporary_incfg["paralog_identity_threshold"] = 0.9
 
         with open("{}/concatenate/test_new_best_reciprocal_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
-            _outcfg = yaml.safe_load(inf)
+            yaml = YAML(typ='safe')
+            _outcfg = yaml.load(inf)
 
         outcfg = best_hit(**temporary_incfg)
 
@@ -402,7 +406,7 @@ def test_find_possible_partners(self):
 
         pd.testing.assert_frame_equal(
             self.possible_partners, _possible_partners,
-            check_less_precise=True, check_like=True,
+            check_exact=False, check_like=True,
             check_names=False
         )
 

diff --git a/test/TestFold.py b/test/TestFold.py
@@ -10,16 +10,17 @@
 
 import unittest
 import os
-import ruamel.yaml as yaml
+from ruamel.yaml import YAML
 from unittest import TestCase
 from evcouplings.fold import haddock_dist_restraint
 from evcouplings.fold.protocol import complex_dock
 
-TRAVIS_PATH = os.getenv('HOME') + "/evcouplings_test_cases"
+TRAVIS_PATH = os.getenv("HOME") + "/evcouplings_test_cases"
 # TRAVIS_PATH = "/home/travis/evcouplings_test_cases"
-#TRAVIS_PATH = "/Users/AG/Dropbox/evcouplings_dev/test_cases/for_B"
+# TRAVIS_PATH = "/Users/AG/Dropbox/evcouplings_dev/test_cases/for_B"
 COMPLEX_PATH = "{}/complex_test".format(TRAVIS_PATH)
 
+
 class TestComplexDock(TestCase):
     """
     NOTE: not explicitly testing evcouplings.fold.restraints.docking_restraints
@@ -32,7 +33,9 @@ def __init__(self, *args, **kwargs):
         super(TestComplexDock, self).__init__(*args, **kwargs)
 
         config_file = COMPLEX_PATH + "/couplings/test_new_couplings.outcfg"
-        config = yaml.safe_load(open(config_file, "r"))
+        with open(config_file, "r") as inf:
+            yaml = YAML(typ="safe")
+            config = yaml.load(inf)
 
         self.ec_file = COMPLEX_PATH + "/couplings/test_new_CouplingScores.csv"
         self.segments = config["segments"]
@@ -44,8 +47,13 @@ def test_haddock_restraint_no_comment(self):
         :return:
         """
         r = haddock_dist_restraint(
-            "10", "A", "11", "B",
-            "1.0", "0.0", "2.0",
+            "10",
+            "A",
+            "11",
+            "B",
+            "1.0",
+            "0.0",
+            "2.0",
         )
 
         desired_output = (
@@ -65,8 +73,7 @@ def test_haddock_restraint_with_comment(self):
         :return:
         """
         r = haddock_dist_restraint(
-            "10", "A", "11", "B",
-            "1.0", "0.0", "2.0", comment = "COMMENT"
+            "10", "A", "11", "B", "1.0", "0.0", "2.0", comment="COMMENT"
         )
 
         desired_output = (
@@ -101,7 +108,6 @@ def test_haddock_restraint_with_comment(self):
     #
     #     self.assertEqual(r, desired_output)
 
-
     def test_protocol(self):
         """
         test whether evcouplings.fold.protocol.complex_dock writes the correct
@@ -113,26 +119,27 @@ def test_protocol(self):
         tmp_prefix = "tmp_"
 
         outcfg = complex_dock(
-            prefix = tmp_prefix,
-            ec_file = self.ec_file,
-            segments = self.segments,
-            dock_probability_cutoffs = [0.9, 0.99],
-            dock_lowest_count = 5,
-            dock_highest_count = 10,
-            dock_increase = 5
+            prefix=tmp_prefix,
+            ec_file=self.ec_file,
+            segments=self.segments,
+            dock_probability_cutoffs=[0.9, 0.99],
+            dock_lowest_count=5,
+            dock_highest_count=10,
+            dock_increase=5,
         )
 
         file_output_keys = [
             "tmp__significant_ECs_0.9_restraints.tbl",
             "tmp__significant_ECs_0.99_restraints.tbl",
             "tmp__5_restraints.tbl",
-            "tmp__10_restraints.tbl"
+            "tmp__10_restraints.tbl",
         ]
 
         for _file in file_output_keys:
             self.assertTrue(os.path.isfile(_file))
             self.assertTrue(os.path.getsize(_file) > 0)
             os.unlink(_file)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()