Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update alignment.py and running_jobs_complexes.ipynb #1

Open
wants to merge 22 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ jobs:
python-version: [3.8]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install and configure conda
Expand All @@ -37,6 +37,6 @@ jobs:
wget https://marks.hms.harvard.edu/evcouplings_test_cases/data/evcouplings_test_cases.tar.gz
tar -xf evcouplings_test_cases.tar.gz -C $HOME/
- name: Run tests in headless xvfb environment
uses: GabrielBB/xvfb-action@v1
uses: coactions/setup-xvfb@v1
with:
run: coverage run -m unittest discover -s test -p "Test*.py"
2 changes: 1 addition & 1 deletion evcouplings/align/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def sequences_to_matrix(sequences):

N = len(sequences)
L = len(next(iter(sequences)))
matrix = np.empty((N, L), dtype=np.str)
matrix = np.empty((N, L), dtype=np.str_)

for i, seq in enumerate(sequences):
if len(seq) != L:
Expand Down
222 changes: 112 additions & 110 deletions evcouplings/couplings/model.py

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions evcouplings/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
Thomas A. Hopf
"""

import ruamel.yaml as yaml
from ruamel.yaml import YAML


class MissingParameterError(Exception):
Expand Down Expand Up @@ -44,9 +44,11 @@ def parse_config(config_str, preserve_order=False):
"""
try:
if preserve_order:
return yaml.load(config_str, Loader=yaml.RoundTripLoader)
yaml = YAML(typ="base")
return yaml.load(config_str)
else:
return yaml.safe_load(config_str)
yaml = YAML(typ="safe")
return yaml.load(config_str)
except yaml.parser.ParserError as e:
raise InvalidParameterError(
"Could not parse input configuration. "
Expand Down Expand Up @@ -90,9 +92,7 @@ def write_config_file(out_filename, config):
dumper = yaml.Dumper

with open(out_filename, "w") as f:
f.write(
yaml.dump(config, Dumper=dumper, default_flow_style=False)
)
f.write(yaml.dump(config, Dumper=dumper, default_flow_style=False))


def check_required(params, keys):
Expand Down
47 changes: 22 additions & 25 deletions evcouplings/visualize/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@
from evcouplings.couplings.pairs import add_mixture_probability


def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
Jij_threshold=10, score="cn",
reorder=None):
def evzoom_data(
model,
ec_threshold=0.9,
freq_threshold=0.01,
Jij_threshold=10,
score="cn",
reorder=None,
):
"""
Generate data for EVzoom visualization. Use evzoom_json()
to get final JSON string to use with EVzoom.
Expand Down Expand Up @@ -61,7 +66,7 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
ecs = add_mixture_probability(ecs, score=score)
ecs_sel = ecs.loc[ecs.probability >= ec_threshold]
else:
ecs_sel = ecs.iloc[:int(ec_threshold)]
ecs_sel = ecs.iloc[: int(ec_threshold)]

# if cutoff for couplings is given as int, interpret
# as percentage of biggest absolute coupling value
Expand All @@ -71,14 +76,10 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,

if reorder is not None:
alphabet = np.array(list(reorder))
alphabet_order = [
model.alphabet_map[c] for c in reorder
]
alphabet_order = [model.alphabet_map[c] for c in reorder]
else:
alphabet = model.alphabet
alphabet_order = sorted(
model.alphabet_map.values()
)
alphabet_order = sorted(model.alphabet_map.values())

# Map containing sequence and indeces
map_ = {
Expand All @@ -92,21 +93,15 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
for idx, r in ecs_sel.iterrows():
i, j, score_ij = r["i"], r["j"], r[score]
Jij = model.Jij(i, j)[alphabet_order, :][:, alphabet_order]
ai_set = np.where(
np.max(np.abs(Jij), axis=1) > Jij_threshold
)[0]
aj_set = np.where(
np.max(np.abs(Jij), axis=0) > Jij_threshold
)[0]
ai_set = np.where(np.max(np.abs(Jij), axis=1) > Jij_threshold)[0]
aj_set = np.where(np.max(np.abs(Jij), axis=0) > Jij_threshold)[0]

cur_matrix = [
[round(Jij[ai, aj], DIGITS) for aj in list(aj_set)]
for ai in list(ai_set)
[round(Jij[ai, aj], DIGITS) for aj in list(aj_set)] for ai in list(ai_set)
]

cur_matrix_T = [
[round(Jij[ai, aj], DIGITS) for ai in list(ai_set)]
for aj in list(aj_set)
[round(Jij[ai, aj], DIGITS) for ai in list(ai_set)] for aj in list(aj_set)
]

cur_row = {
Expand Down Expand Up @@ -134,7 +129,7 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
fi = model.fi()
q = model.num_symbols

B = -fi * np.log2(fi)
B = -fi * np.log2(fi, where=fi > 0)
B[fi <= 0] = 0
R = np.log2(q) - B.sum(axis=1)

Expand All @@ -146,10 +141,12 @@ def evzoom_data(model, ec_threshold=0.9, freq_threshold=0.01,
symbols = model.alphabet[frequent]
fi_row = fi[i, frequent] * R[i]

logo.append([
{"code": s, "bits": round(float(h), DIGITS_LOGO)}
for s, h in zip(symbols, fi_row)
])
logo.append(
[
{"code": s, "bits": round(float(h), DIGITS_LOGO)}
for s, h in zip(symbols, fi_row)
]
)

return map_, logo, matrix

Expand Down
22 changes: 11 additions & 11 deletions notebooks/output_files_tutorial.ipynb

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions notebooks/running_jobs_complexes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,18 @@
"1) In \"align_1\" and \"align_2\" section:\n",
"* __alignment_protocol__: choose either 'existing' to use an input alignment or 'standard' to generate an alignment\n",
"using the monomer alignment protocol.\n",
"* __input_alignment__: input alignment file, requried for 'existing' alignment protocol\n",
"* __input_alignment__: input alignment file, required for 'existing' alignment protocol\n",
"* __override_annotation_file__: input annotation file, required for 'existing' alignment protocol when using best_hit concatenation. This is the \\_annotations.csv file from the same monomer pipeline run used to generate the input alignment, OR a user-generated .csv file with columns \"id\" containing the full sequence ids from the alignment and column \"OS\" that contains annotation information for each id. This will override the incomplete annotations generated when postprocessing the input_alignment.\n",
"\n",
"3) in \"concatenate\" section:\n",
"* __protocol__: currently two procols are available. Genome_distance will pair sequences by closest reciprocal distance on the genome. Best_hit will pair by best hit to the target sequence for each genome. \n",
"* __protocol__: currently two protocols are available. Genome_distance will pair sequences by closest reciprocal distance on the genome. Best_hit will pair by best hit to the target sequence for each genome. \n",
"* __use_best_reciprocal__: if using the best_hit protocol, use_best_reciprocal specifies whether to only take the best reciprocal hit in each genome\n",
"* __minimum_sequence_coverage__: After concatenation, only keep concatenated sequences that align to at least x% of the target concatenated sequence \n",
"* __minimum_column_coverage__: After concatenation, only include alignment columns with at least x% residues (rather than gaps) during model inference. \n",
"\n",
"4) in \"couplings\" section:\n",
"* __scoring__: Options are skewnormal, lognormal, and evcomplex. Scoring model to assess confidence in computed ECs\n",
"* __use_all_ecs_for_scoring__: if True, will run the scoring model on the ECs, both inter and intra, simulataneously. If false, scoring will be done for monomer 1, monomer 2, and inter-protein ECs independently. \n",
"* __use_all_ecs_for_scoring__: if True, will run the scoring model on the ECs, both inter and intra, simultaneously. If false, scoring will be done for monomer 1, monomer 2, and inter-protein ECs independently. \n",
"\n",
"5) in \"compare\" section:\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@
#setup_requires=['setuptools>=18.2', 'numpy'],

install_requires=['setuptools>=18.2', 'numpy',
'pandas', 'scipy', 'numba', 'ruamel.yaml', 'matplotlib', 'requests',
'pandas', 'scipy', 'numba', 'matplotlib', 'requests',
'mmtf-python', 'click', 'filelock', 'psutil', 'bokeh', 'jinja2',
'biopython', 'seaborn', 'billiard', 'scikit-learn',
'biopython', 'seaborn', 'billiard', 'scikit-learn', 'ruamel.yaml'
],

)
16 changes: 10 additions & 6 deletions test/TestComplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pandas as pd
from unittest import TestCase
from copy import deepcopy
import ruamel.yaml as yaml
from ruamel.yaml import YAML
from evcouplings.complex.alignment import *
from evcouplings.complex.distance import *
from evcouplings.complex.similarity import *
Expand Down Expand Up @@ -72,10 +72,12 @@ def __init__(self, *args, **kwargs):

# input and output configuration
with open("{}/concatenate/test_new_concatenate.incfg".format(TRAVIS_PATH)) as inf:
self.incfg = yaml.safe_load(inf)
yaml = YAML(typ='safe')
self.incfg = yaml.load(inf)

with open("{}/concatenate/test_new_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
self.outcfg = yaml.safe_load(inf)
yaml = YAML(typ='safe')
self.outcfg = yaml.load(inf)

def test_genome_distance(self):
"""
Expand Down Expand Up @@ -145,7 +147,8 @@ def test_best_hit_normal(self):
temporary_incfg["paralog_identity_threshold"] = 0.9

with open("{}/concatenate/test_new_best_hit_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
_outcfg = yaml.safe_load(inf)
yaml = YAML(typ='safe')
_outcfg = yaml.load(inf)

outcfg = best_hit(**temporary_incfg)

Expand Down Expand Up @@ -195,7 +198,8 @@ def test_best_hit_reciprocal(self):
temporary_incfg["paralog_identity_threshold"] = 0.9

with open("{}/concatenate/test_new_best_reciprocal_concatenate.outcfg".format(TRAVIS_PATH)) as inf:
_outcfg = yaml.safe_load(inf)
yaml = YAML(typ='safe')
_outcfg = yaml.load(inf)

outcfg = best_hit(**temporary_incfg)

Expand Down Expand Up @@ -402,7 +406,7 @@ def test_find_possible_partners(self):

pd.testing.assert_frame_equal(
self.possible_partners, _possible_partners,
check_less_precise=True, check_like=True,
check_exact=False, check_like=True,
check_names=False
)

Expand Down
43 changes: 25 additions & 18 deletions test/TestFold.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,17 @@

import unittest
import os
import ruamel.yaml as yaml
from ruamel.yaml import YAML
from unittest import TestCase
from evcouplings.fold import haddock_dist_restraint
from evcouplings.fold.protocol import complex_dock

TRAVIS_PATH = os.getenv('HOME') + "/evcouplings_test_cases"
TRAVIS_PATH = os.getenv("HOME") + "/evcouplings_test_cases"
# TRAVIS_PATH = "/home/travis/evcouplings_test_cases"
#TRAVIS_PATH = "/Users/AG/Dropbox/evcouplings_dev/test_cases/for_B"
# TRAVIS_PATH = "/Users/AG/Dropbox/evcouplings_dev/test_cases/for_B"
COMPLEX_PATH = "{}/complex_test".format(TRAVIS_PATH)


class TestComplexDock(TestCase):
"""
NOTE: not explicitly testing evcouplings.fold.restraints.docking_restraints
Expand All @@ -32,7 +33,9 @@ def __init__(self, *args, **kwargs):
super(TestComplexDock, self).__init__(*args, **kwargs)

config_file = COMPLEX_PATH + "/couplings/test_new_couplings.outcfg"
config = yaml.safe_load(open(config_file, "r"))
with open(config_file, "r") as inf:
yaml = YAML(typ="safe")
config = yaml.load(inf)

self.ec_file = COMPLEX_PATH + "/couplings/test_new_CouplingScores.csv"
self.segments = config["segments"]
Expand All @@ -44,8 +47,13 @@ def test_haddock_restraint_no_comment(self):
:return:
"""
r = haddock_dist_restraint(
"10", "A", "11", "B",
"1.0", "0.0", "2.0",
"10",
"A",
"11",
"B",
"1.0",
"0.0",
"2.0",
)

desired_output = (
Expand All @@ -65,8 +73,7 @@ def test_haddock_restraint_with_comment(self):
:return:
"""
r = haddock_dist_restraint(
"10", "A", "11", "B",
"1.0", "0.0", "2.0", comment = "COMMENT"
"10", "A", "11", "B", "1.0", "0.0", "2.0", comment="COMMENT"
)

desired_output = (
Expand Down Expand Up @@ -101,7 +108,6 @@ def test_haddock_restraint_with_comment(self):
#
# self.assertEqual(r, desired_output)


def test_protocol(self):
"""
test whether evcouplings.fold.protocol.complex_dock writes the correct
Expand All @@ -113,26 +119,27 @@ def test_protocol(self):
tmp_prefix = "tmp_"

outcfg = complex_dock(
prefix = tmp_prefix,
ec_file = self.ec_file,
segments = self.segments,
dock_probability_cutoffs = [0.9, 0.99],
dock_lowest_count = 5,
dock_highest_count = 10,
dock_increase = 5
prefix=tmp_prefix,
ec_file=self.ec_file,
segments=self.segments,
dock_probability_cutoffs=[0.9, 0.99],
dock_lowest_count=5,
dock_highest_count=10,
dock_increase=5,
)

file_output_keys = [
"tmp__significant_ECs_0.9_restraints.tbl",
"tmp__significant_ECs_0.99_restraints.tbl",
"tmp__5_restraints.tbl",
"tmp__10_restraints.tbl"
"tmp__10_restraints.tbl",
]

for _file in file_output_keys:
self.assertTrue(os.path.isfile(_file))
self.assertTrue(os.path.getsize(_file) > 0)
os.unlink(_file)

if __name__ == '__main__':

if __name__ == "__main__":
unittest.main()
Loading