Skip to content

Commit

Permalink
Merge pull request #840 from choderalab/support-rst7
Browse files Browse the repository at this point in the history
Add support for amber rst7 files and make jobid 1-based
  • Loading branch information
andrrizzi authored Dec 4, 2017
2 parents 462231a + ed4c039 commit 1a80cd5
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 101 deletions.
2 changes: 1 addition & 1 deletion Yank/commands/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
Optional Arguments:
--jobid=INTEGER You can run only a subset of the experiments by specifying jobid and njobs, where
0 <= job_id <= n_jobs-1. In this case, njobs must be specified as well and YANK will
1 <= job_id <= n_jobs. In this case, njobs must be specified as well and YANK will
run only 1/n_jobs of the experiments. This can be used to run several separate YANK
executions in parallel starting from the same script.
--njobs=INTEGER Specify the total number of parallel executions. jobid has to be specified too.
Expand Down
31 changes: 17 additions & 14 deletions Yank/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ class ExperimentBuilder(object):
can load it later by using :func:`parse` (default is None).
job_id : None or int
If you want to split the experiments among different executions,
you can set this to an integer 0 <= job_id <= n_jobs-1, and this
you can set this to an integer 1 <= job_id <= n_jobs, and this
:class:`ExperimentBuilder` will run only 1/n_jobs of the experiments.
n_jobs : None or int
If ``job_id`` is specified, this is the total number of jobs that
Expand Down Expand Up @@ -606,8 +606,8 @@ def __init__(self, script=None, job_id=None, n_jobs=None):
if job_id is not None:
if n_jobs is None:
raise ValueError('n_jobs must be specified together with job_id')
if not 0 <= job_id <= n_jobs:
raise ValueError('job_id must be between 0 and n_jobs ({})'.format(n_jobs))
if not 1 <= job_id <= n_jobs:
raise ValueError('job_id must be between 1 and n_jobs ({})'.format(n_jobs))

self._job_id = job_id
self._n_jobs = n_jobs
Expand Down Expand Up @@ -1021,7 +1021,10 @@ def _expand_experiments(self):

# Loop over all combinations
for name, combination in experiment.named_combinations(separator='_', max_name_length=50):
if self._job_id is None or experiment_id % self._n_jobs == self._job_id:
# Both self._job_id and self._job_id-1 work (self._job_id is 1-based),
# but we use the latter just because it makes it easier to identify in
# advance which job ids are associated to which experiments.
if self._job_id is None or experiment_id % self._n_jobs == self._job_id-1:
yield os.path.join(output_dir, name), combination
experiment_id += 1

Expand Down Expand Up @@ -1437,7 +1440,6 @@ def _to_unit_unless_none(input_quantity):
for solvent_id, solvent_descr in solvents_description.items():
if solvent_validator.validate(solvent_descr):
validated_solvents[solvent_id] = solvent_validator.document
print(solvent_validator.document)
else:
error = "Solvent '{}' did not validate! Check the schema error below for details\n{}"
raise YamlParseError(error.format(solvent_id, yaml.dump(solvent_validator.errors)))
Expand Down Expand Up @@ -1658,15 +1660,21 @@ def _is_pipeline_solvent_with_receptor(field, solvent_id, error):
type: string
validator: file_exists
dependencies: phase2_path
coerce: sort_alphabetically_by_extension
validator: supported_system_files
phase2_path:
required: no
type: list
schema:
type: string
validator: file_exists
validator: is_system_files_matching_phase1
coerce: sort_alphabetically_by_extension
validator: supported_system_files
gromacs_include_dir:
required: no
type: string
dependencies: [phase1_path, phase2_path]
validator: directory_exists
# Solvents
solvent:
required: no
type: string
Expand Down Expand Up @@ -1694,13 +1702,8 @@ def _is_pipeline_solvent_with_receptor(field, solvent_id, error):
oneof:
- dependencies: [phase1_path, phase2_path, solvent1]
- dependencies: [solute, solvent1]
gromacs_include_dir:
required: no
type: string
dependencies: [phase1_path, phase2_path]
validator: directory_exists
# Non-Prebuilt setup
# Automatic pipeline
receptor:
required: no
type: string
Expand Down
22 changes: 15 additions & 7 deletions Yank/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,7 @@ def get_system_files_paths(self, system_id):
"""
Paths = collections.namedtuple('Paths', ['position_path', 'parameters_path'])
system_dir = os.path.join(self.setup_dir, self.SYSTEMS_DIR, system_id)

if 'receptor' in self.systems[system_id]:
system_files_paths = [
Paths(position_path=os.path.join(system_dir, 'complex.inpcrd'),
Expand All @@ -872,13 +873,20 @@ def get_system_files_paths(self, system_id):
parameters_path=os.path.join(system_dir, 'solvent2.prmtop'))
]
else:
system_files_paths = [
Paths(position_path=self.systems[system_id]['phase1_path'][0],
parameters_path=self.systems[system_id]['phase1_path'][1]),
Paths(position_path=self.systems[system_id]['phase2_path'][0],
parameters_path=self.systems[system_id]['phase2_path'][1])
]

parameter_file_extensions = {'prmtop', 'top', 'xml'}
system_files_paths = []
for phase_path_name in ['phase1_path', 'phase2_path']:
file_paths = self.systems[system_id][phase_path_name]
assert len(file_paths) == 2

# Make sure that the position file is first.
first_file_extension = os.path.splitext(file_paths[0])[1][1:]
if first_file_extension in parameter_file_extensions:
file_paths = list(reversed(file_paths))

# Append Paths object.
system_files_paths.append(Paths(position_path=file_paths[0],
parameters_path=file_paths[1]))
return system_files_paths

def is_molecule_setup(self, molecule_id):
Expand Down
68 changes: 26 additions & 42 deletions Yank/schema/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@ class YANKCerberusValidator(cerberus.Validator):
Custom cerberus.Validator class for YANK extending the Validator to include all the methods needed
by YANK
"""

# ====================================================
# DATA COERCION
# ====================================================

def _normalize_coerce_single_str_to_list(self, value):
"""Cast a single string to a list of string"""
return [value] if isinstance(value, str) else value

def _normalize_coerce_sort_alphabetically_by_extension(self, files):
provided_extensions = [os.path.splitext(filepath)[1][1:] for filepath in files]
return [filepath for (ext, filepath) in sorted(zip(provided_extensions, files))]

# ====================================================
# DATA VALIDATORS
# ====================================================

def _validator_file_exists(self, field, filepath):
"""Assert that the file is in fact, a file!"""
if not os.path.isfile(filepath):
Expand Down Expand Up @@ -61,47 +60,32 @@ def _validator_int_or_all_string(self, field, value):
if value != 'all' and not isinstance(value, int):
self._error(field, "{} must be an int or the string 'all'".format(value))

def _validator_is_system_files_matching_phase1(self, field, phase2):
"""Ensure the phase2_filepaths match the type of the phase1 paths"""
phase1 = self.document.get('phase1_path')
# Handle non-existant
if phase1 is None:
self._error(field, "phase1_path must be present to use phase2_path")
return
# Handle the not iterable type
try:
_ = [f for f in phase1]
_ = [f for f in phase2]
except TypeError:
self._error(field, 'phase1_path and phase2_path must be a list of file paths')
return
# Now process
provided_phase1_extensions = [os.path.splitext(filepath)[1][1:] for filepath in phase1]
provided_phase2_extensions = [os.path.splitext(filepath)[1][1:] for filepath in phase2]
# Check phase1 extensions
phase1_type = None
valid_extensions = None
expected_extensions = {
'amber': ['inpcrd', 'prmtop'],
'gromacs': ['gro', 'top'],
'openmm': ['pdb', 'xml']
}
for extension_type, valid_extensions in expected_extensions.items():
if sorted(provided_phase1_extensions) == sorted(valid_extensions):
phase1_type = extension_type
def _validator_supported_system_files(self, field, file_paths):
"""Ensure the input system files are supported."""
# Obtain the extension of the system files.
file_extensions = {os.path.splitext(file_path)[1][1:] for file_path in file_paths}

# Find a match for the extensions.
expected_extensions = [
('amber', {'inpcrd', 'prmtop'}),
('amber', {'rst7', 'prmtop'}),
('gromacs', {'gro', 'top'}),
('openmm', {'pdb', 'xml'})
]
file_extension_type = None
for extension_type, valid_extensions in expected_extensions:
if file_extensions == valid_extensions:
file_extension_type = extension_type
break
if phase1_type is None:
self._error(field, 'phase1_path must have file extensions matching one of the following types: '
'{}'.format(expected_extensions))
return
# Ensure phase 2 is of the same type
if sorted(provided_phase2_extensions) != sorted(valid_extensions):
self._error(field, 'phase2_path must have files of the same extensions as phase1_path ({}) to ensure '
'phases came from the same setup pipeline.'.format(phase1_type))

# Verify we found a match.
if file_extension_type is None:
self._error(field, '{} must have file extensions matching one of the '
'following types: {}'.format(field, expected_extensions))
return

logger.debug('Correctly recognized phase1_path files ({}) and phase2_path files ({}) '
'as {} files'.format(phase1, phase2, phase1_type))
logger.debug('Correctly recognized {}files ({}) as {} '
'files'.format(field, file_paths, file_extension_type))

# ====================================================
# DATA TYPES
Expand Down
96 changes: 59 additions & 37 deletions Yank/tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import mdtraj
import numpy as np

from nose.tools import assert_raises, assert_equal
from nose.tools import assert_raises, assert_equal, assert_raises_regexp
from nose.plugins.attrib import attr

from yank.experiment import *
Expand Down Expand Up @@ -594,48 +594,70 @@ def test_validation_wrong_systems():
""".format(data_paths['lysozyme'])
basic_script = yaml.load(textwrap.dedent(basic_script))

# Each test case is a pair (regexp_error, system_description).
systems = [
{'receptor': 'rec', 'ligand': 'lig'},
{'receptor': 'rec_region', 'ligand': 'lig_region', 'solvent': 'solv'},
{'receptor': 'rec', 'ligand': 1, 'solvent': 'solv'},
{'receptor': 'rec', 'ligand': 'lig', 'solvent': ['solv', 'solv']},
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'unknown'},
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'solv4',
'leap': {'parameters': ['leaprc.gaff', 'leaprc.ff14SB']}},
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'solv3',
'parameters': 'leaprc.ff14SB'},
("'solvent' is required",
{'receptor': 'rec', 'ligand': 'lig'}),

{'phase1_path': data_paths['bentol-complex'][0],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent': 'solv'},
{'phase1_path': ['nonexistingpath.prmtop', 'nonexistingpath.inpcrd'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent': 'solv'},
{'phase1_path': data_paths['bentol-complex'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 3.4, 'solvent': 'solv'},
{'phase1_path': data_paths['bentol-complex'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent1': 'unknown',
'solvent2': 'solv2'},
("regions\(s\) clashing",
{'receptor': 'rec_region', 'ligand': 'lig_region', 'solvent': 'solv'}),

{'phase1_path': data_paths['bentol-complex'],
'phase2_path': data_paths['pxylene-solvent'],
'ligand_dsl': 'resname p-xylene', 'solvent': 'solv',
'gromacs_include_dir': data_paths['pxylene-gro-include']},
("ligand: \[must be of string type\]",
{'receptor': 'rec', 'ligand': 1, 'solvent': 'solv'}),

{'phase1_path': data_paths['toluene-solvent'],
'phase2_path': data_paths['toluene-vacuum'],
'ligand_dsl': 'resname TOL', 'solvent': 'cantbespecified'},
("solvent: \[must be of string type\]",
{'receptor': 'rec', 'ligand': 'lig', 'solvent': ['solv', 'solv']}),

("unallowed value unknown",
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'unknown'}),

("solv4 does not specify clearance",
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'solv4',
'leap': {'parameters': ['leaprc.gaff', 'leaprc.ff14SB']}}),

("parameters: \[unknown field\]",
{'receptor': 'rec', 'ligand': 'lig', 'solvent': 'solv3',
'parameters': 'leaprc.ff14SB'}),

("phase1_path: \[must be of list type\]",
{'phase1_path': data_paths['bentol-complex'][0],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent': 'solv'}),

("File path nonexistingpath.prmtop does not exist.",
{'phase1_path': ['nonexistingpath.prmtop', 'nonexistingpath.inpcrd'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent': 'solv'}),

("ligand_dsl: \[must be of string type\]",
{'phase1_path': data_paths['bentol-complex'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 3.4, 'solvent': 'solv'}),

("unallowed value unknown",
{'phase1_path': data_paths['bentol-complex'],
'phase2_path': data_paths['bentol-solvent'],
'ligand_dsl': 'resname BEN', 'solvent1': 'unknown',
'solvent2': 'solv2'}),

{'receptor': 'rec', 'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv'},
{'ligand': 'lig', 'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv'},
{'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv', 'leap': 'leaprc.gaff'}
("unallowed value cantbespecified",
{'phase1_path': data_paths['toluene-solvent'],
'phase2_path': data_paths['toluene-vacuum'],
'ligand_dsl': 'resname TOL', 'solvent': 'cantbespecified'}),

("field 'ligand' is required",
{'receptor': 'rec', 'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv'}),

("''ligand'' must not be present with ''solute''",
{'ligand': 'lig', 'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv'}),

("leap: \[must be of dict type\]",
{'solute': 'lig', 'solvent1': 'solv', 'solvent2': 'solv', 'leap': 'leaprc.gaff'})
]
for system in systems:
for regexp, system in systems:
modified_script = basic_script.copy()
modified_script['systems'] = {'sys': system}
yield assert_raises, YamlParseError, exp_builder.parse, modified_script
yield assert_raises_regexp, YamlParseError, regexp, exp_builder.parse, modified_script


def test_order_phases():
Expand Down Expand Up @@ -1853,11 +1875,11 @@ def test_expand_experiments():
experiment_systems = utils.CombinatorialLeaf(['explicit-system', 'implicit-system', 'hydration-system'])
template_script['experiments']['system'] = experiment_systems

exp_builder = ExperimentBuilder(script=template_script, job_id=0, n_jobs=2)
exp_builder = ExperimentBuilder(script=template_script, job_id=1, n_jobs=2)
experiments = list(exp_builder._expand_experiments())
assert len(experiments) == 2

exp_builder = ExperimentBuilder(script=template_script, job_id=1, n_jobs=2)
exp_builder = ExperimentBuilder(script=template_script, job_id=2, n_jobs=2)
experiments = list(exp_builder._expand_experiments())
assert len(experiments) == 1

Expand Down

0 comments on commit 1a80cd5

Please sign in to comment.