From ae76a558f4bab11e2adc3dbb847ec6b5238572d2 Mon Sep 17 00:00:00 2001 From: kt Date: Thu, 22 Jun 2023 22:31:02 -0400 Subject: [PATCH 1/6] parameterize protein with espaloma --- perses/app/relative_setup.py | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index d95bd99c6..09cd21627 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -381,6 +381,81 @@ def __init__(self, self._complex_topology_old, self._complex_positions_old,phase='complex',box_dimensions=self._complex_box_dimensions, ionic_strength=self._ionic_strength) _logger.info(f"successfully generated complex topology, positions, system") + + # Save and serialize system + from openmm import XmlSerializer + with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf: + xml = XmlSerializer.serialize(self._complex_system_old_solvated) + wf.write(xml) + + + # + # ESPALOMA + # + _logger.info(f"{type(self._complex_topology_old_solvated)}, {type(self._complex_positions_old_solvated)}, {type(self._complex_system_old_solvated)}") + # Update toplogy and system if protein parameterization with espaloma is requested + chain_ids = [ chain.id for chain in self._complex_topology_old_solvated.chains() ] + #_logger.info(f"Detected {len(chain_ids)} chains ({chain_ids})") + + # Initialize + new_complex_topology_old_solvated = app.Topology() + new_complex_topology_old_solvated.setPeriodicBoxVectors(self._complex_topology_old_solvated.getPeriodicBoxVectors()) + new_atoms = {} + + # Regenerate protein topology as a single residue assuming the first chain is a protein. + _logger.info(f"Regenerating protein topology...") + for chain in self._complex_topology_old_solvated.chains(): + new_chain = new_complex_topology_old_solvated.addChain(chain.id) + if chain.id == chain_ids[0]: + resname = 'ESP' + resid = '1' + new_residue = new_complex_topology_old_solvated.addResidue(resname, new_chain, resid) + for i, residue in enumerate(chain.residues()): + if residue.chain.id != chain_ids[0]: + new_residue = new_complex_topology_old_solvated.addResidue(residue.name, new_chain, residue.id) + for atom in residue.atoms(): + new_atom = new_complex_topology_old_solvated.addAtom(atom.name, atom.element, new_residue, atom.id) + new_atoms[atom] = new_atom + #_logger.info(f"{new_atom.residue.name}, {new_atom.residue.chain.id}, {new_atom.residue.id}") + #_logger.info(f"{new_residue.chain.id}, {new_residue.id}") + #_logger.info(f"{new_residue} {new_atom}") + + # Regenerate bond information + for bond in self._complex_topology_old_solvated.bonds(): + if bond[0] in new_atoms and bond[1] in new_atoms: + new_complex_topology_old_solvated.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) + + pdb_filename = f"{self._trajectory_directory}/out-complex-esplaoma.pdb" + with open(pdb_filename, 'w') as outfile: + app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) + + # EspalomaTemplateGenerator to build Espaloma system + t = md.load_pdb(f'{pdb_filename}') + indices = t.topology.select('resname ESP') + pdb_filename = "target-espaloma.pdb" + t.atom_slice(indices).save_pdb(f'{pdb_filename}') + protein_molecule = Molecule.from_file(f'{pdb_filename}', file_format='pdb') + self._system_generator.template_generator.add_molecules(protein_molecule) + new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) + + _logger.info(f"{type(new_complex_system_old_solvated)}") + + # Save and serialize system + #new_complex_system_old_solvated.topology = new_complex_topology_old_solvated + #new_complex_system_old_solvated.positions = self._complex_positions_old_solvated + with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf: + xml = XmlSerializer.serialize(new_complex_system_old_solvated) + wf.write(xml) + + # Update + self._complex_topology_old_solvated = new_complex_topology_old_solvated + self._complex_system_old_solvated = new_complex_system_old_solvated + + # + # END + # + + self._complex_md_topology_old_solvated = md.Topology.from_openmm(self._complex_topology_old_solvated) _logger.info(f"creating TopologyProposal...") From 59b78be9d26dc60d36e11de4b91ad4f01601d8a0 Mon Sep 17 00:00:00 2001 From: kt Date: Fri, 23 Jun 2023 18:57:23 -0400 Subject: [PATCH 2/6] add use_protein_espaloma option and _regenerate_espaloma_system func --- perses/app/relative_setup.py | 162 ++++++++++++----------- perses/app/setup_relative_calculation.py | 7 + 2 files changed, 95 insertions(+), 74 deletions(-) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index 09cd21627..71e82f9ed 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -62,6 +62,7 @@ def __init__(self, anneal_14s=False, small_molecule_forcefield='gaff-2.11', small_molecule_parameters_cache=None, + use_protein_espaloma=False, trajectory_directory=None, trajectory_prefix=None, spectator_filenames=None, @@ -110,6 +111,10 @@ def __init__(self, ['gaff-1.81', 'gaff-2.11', 'smirnoff99Frosst-1.1.0', 'openff-2.0.0'] small_molecule_parameters_cache : str, optional, default=None If specified, this filename will be used for a small molecule parameter cache by the SystemGenerator. + use_protein_espaloma : bool, default False + Whether to parameterize protein with epaloma force field. + If True, complex topology and system created by protein force fields in ``forcefield_files`` will be regenerated + using espaloma force field specified in ``small_molecule_forcefield``. trajectory_directory : str, default None Where to write out trajectories resulting from the calculation. If none, no writing is done. trajectory_prefix : str, default None @@ -160,6 +165,7 @@ def __init__(self, self._use_given_geometries = use_given_geometries self._given_geometries_tolerance = given_geometries_tolerance self._ligand_input = ligand_input + self._use_protein_espaloma if self._use_given_geometries: assert self._ligand_input[-3:] == 'sdf' or self._ligand_input[-4:] == 'mol2', f"cannot use deterministic atom placement if the ligand input files do not contain geometry information (e.g. in .sdf or .mol2 format)" @@ -381,80 +387,17 @@ def __init__(self, self._complex_topology_old, self._complex_positions_old,phase='complex',box_dimensions=self._complex_box_dimensions, ionic_strength=self._ionic_strength) _logger.info(f"successfully generated complex topology, positions, system") - - # Save and serialize system - from openmm import XmlSerializer - with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf: - xml = XmlSerializer.serialize(self._complex_system_old_solvated) - wf.write(xml) - - - # - # ESPALOMA - # - _logger.info(f"{type(self._complex_topology_old_solvated)}, {type(self._complex_positions_old_solvated)}, {type(self._complex_system_old_solvated)}") - # Update toplogy and system if protein parameterization with espaloma is requested - chain_ids = [ chain.id for chain in self._complex_topology_old_solvated.chains() ] - #_logger.info(f"Detected {len(chain_ids)} chains ({chain_ids})") - - # Initialize - new_complex_topology_old_solvated = app.Topology() - new_complex_topology_old_solvated.setPeriodicBoxVectors(self._complex_topology_old_solvated.getPeriodicBoxVectors()) - new_atoms = {} - - # Regenerate protein topology as a single residue assuming the first chain is a protein. - _logger.info(f"Regenerating protein topology...") - for chain in self._complex_topology_old_solvated.chains(): - new_chain = new_complex_topology_old_solvated.addChain(chain.id) - if chain.id == chain_ids[0]: - resname = 'ESP' - resid = '1' - new_residue = new_complex_topology_old_solvated.addResidue(resname, new_chain, resid) - for i, residue in enumerate(chain.residues()): - if residue.chain.id != chain_ids[0]: - new_residue = new_complex_topology_old_solvated.addResidue(residue.name, new_chain, residue.id) - for atom in residue.atoms(): - new_atom = new_complex_topology_old_solvated.addAtom(atom.name, atom.element, new_residue, atom.id) - new_atoms[atom] = new_atom - #_logger.info(f"{new_atom.residue.name}, {new_atom.residue.chain.id}, {new_atom.residue.id}") - #_logger.info(f"{new_residue.chain.id}, {new_residue.id}") - #_logger.info(f"{new_residue} {new_atom}") - - # Regenerate bond information - for bond in self._complex_topology_old_solvated.bonds(): - if bond[0] in new_atoms and bond[1] in new_atoms: - new_complex_topology_old_solvated.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) - - pdb_filename = f"{self._trajectory_directory}/out-complex-esplaoma.pdb" - with open(pdb_filename, 'w') as outfile: - app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) - - # EspalomaTemplateGenerator to build Espaloma system - t = md.load_pdb(f'{pdb_filename}') - indices = t.topology.select('resname ESP') - pdb_filename = "target-espaloma.pdb" - t.atom_slice(indices).save_pdb(f'{pdb_filename}') - protein_molecule = Molecule.from_file(f'{pdb_filename}', file_format='pdb') - self._system_generator.template_generator.add_molecules(protein_molecule) - new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) - - _logger.info(f"{type(new_complex_system_old_solvated)}") - - # Save and serialize system - #new_complex_system_old_solvated.topology = new_complex_topology_old_solvated - #new_complex_system_old_solvated.positions = self._complex_positions_old_solvated - with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf: - xml = XmlSerializer.serialize(new_complex_system_old_solvated) - wf.write(xml) - - # Update - self._complex_topology_old_solvated = new_complex_topology_old_solvated - self._complex_system_old_solvated = new_complex_system_old_solvated - - # - # END - # - + # Assign espaloma to protein. Topology and system will be regenerated. + if self._use_protein_espaloma: + _logger.info(f"Regenerating toplogy and system with espaloma...") + # Save and serialize current system + from openmm import XmlSerializer + with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf: + xml = XmlSerializer.serialize(self._complex_system_old_solvated) + wf.write(xml) + assert 'espaloma' in small_molecule_forcefield, "Espaloma force field not defined in ``small_molecule_forcefield``" + self._complex_topology_old_solvated, self._complex_system_old_solvated = self._regenerate_espaloma_system() + _logger.info(f"Regenerated toplogy and system with espaloma") self._complex_md_topology_old_solvated = md.Topology.from_openmm(self._complex_topology_old_solvated) @@ -938,6 +881,77 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo return solvated_topology, solvated_positions, solvated_system + def _regenerate_espaloma_system(self): + """ + Regnerate topology and system with espaloma force field. Protein topology will be updated with + espaloma force field specified by ``small_molecule_forcefield``. + + Returns + ------- + new_complex_topology_old_solvated : app.Topology + Topology of the system with added waters. + new_complex_system_old_solvated : openmm.System + The parameterized system with the same kwargs used to generate + ``self._system_generator`` with ``openmmforcefields.generators.SystemGenerator``. + """ + chain_ids = [ chain.id for chain in self._complex_topology_old_solvated.chains() ] + new_complex_topology_old_solvated = app.Topology() + new_complex_topology_old_solvated.setPeriodicBoxVectors(self._complex_topology_old_solvated.getPeriodicBoxVectors()) + new_atoms = {} + + # Regenerate protein topology as a single residue assuming the first chain is a protein. + _logger.info(f"Regenerating protein topology...") + for chain in self._complex_topology_old_solvated.chains(): + new_chain = new_complex_topology_old_solvated.addChain(chain.id) + if chain.id == chain_ids[0]: + resname = 'ESP' + resid = '1' + new_residue = new_complex_topology_old_solvated.addResidue(resname, new_chain, resid) + for i, residue in enumerate(chain.residues()): + if residue.chain.id != chain_ids[0]: + new_residue = new_complex_topology_old_solvated.addResidue(residue.name, new_chain, residue.id) + for atom in residue.atoms(): + new_atom = new_complex_topology_old_solvated.addAtom(atom.name, atom.element, new_residue, atom.id) + new_atoms[atom] = new_atom + #_logger.info(f"{new_atom.residue.name}, {new_atom.residue.chain.id}, {new_atom.residue.id}") + #_logger.info(f"{new_residue.chain.id}, {new_residue.id}") + #_logger.info(f"{new_residue} {new_atom}") + + # Regenerate bond information + for bond in self._complex_topology_old_solvated.bonds(): + if bond[0] in new_atoms and bond[1] in new_atoms: + new_complex_topology_old_solvated.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) + + # TODO: Is there a better way to handle this? + # Currently, the updated complex openmm.app.topology is saved as a new pdb and reloaded with mdtraj. + # Proteins are selected and saved as pdb and loaded as openff.toolkit.topology.Molecule. + + # Save updated complex topology as pdb + pdb_filename = f"{self._trajectory_directory}/out-complex-esplaoma.pdb" + with open(pdb_filename, 'w') as outfile: + app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) + + # Load protein structure converted into a single residue as openff.toolkit.topology.Molecule + t = md.load_pdb(f'{pdb_filename}') + indices = t.topology.select('resname ESP') + pdb_filename = "target-espaloma.pdb" + t.atom_slice(indices).save_pdb(f'{pdb_filename}') + protein_molecule = Molecule.from_file(f'{pdb_filename}', file_format='pdb') + + # We already added small molecules to template generator when we first created ``self._system_generator``. + # So we only need to add protein molecule to template generator (EspalomaTemplateGenerator). + self._system_generator.template_generator.add_molecules(protein_molecule) + # Regenerate system with system generator + new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) + + # DEBUG + from openmm import XmlSerializer + with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf: + xml = XmlSerializer.serialize(new_complex_system_old_solvated) + wf.write(xml) + + return new_complex_topology_old_solvated, new_complex_system_old_solvated + def _handle_charge_changes(self, topology_proposal, new_positions): """ modifies the atom mapping in the topology proposal and the new system parameters to handle the transformation of diff --git a/perses/app/setup_relative_calculation.py b/perses/app/setup_relative_calculation.py index 9efe7b7b1..51597e2e1 100644 --- a/perses/app/setup_relative_calculation.py +++ b/perses/app/setup_relative_calculation.py @@ -100,6 +100,7 @@ def getSetupOptions(filename, override_string=None): if 'small_molecule_parameters_cache' not in setup_options: setup_options['small_molecule_parameters_cache'] = None + if 'remove_constraints' not in setup_options: setup_options['remove_constraints'] = False @@ -394,6 +395,11 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True): # such as deferring to defaults for modules we call unless the user # chooses to override them. + if 'use_protein_espaloma' not in list(setup_options.keys()): + use_protein_espaloma = False + else: + assert type(setup_options['use_protein_espaloma']) == type(True) + use_protein_espaloma = setup_options['use_protein_espaloma'] if 'use_given_geometries' not in list(setup_options.keys()): use_given_geometries = False @@ -520,6 +526,7 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True): atom_expr=setup_options['atom_expr'], bond_expr=setup_options['bond_expr'], neglect_angles = setup_options['neglect_angles'], anneal_14s = setup_options['anneal_1,4s'], small_molecule_forcefield=setup_options['small_molecule_forcefield'], small_molecule_parameters_cache=setup_options['small_molecule_parameters_cache'], + use_protein_espaloma=use_protein_espaloma, trajectory_directory=trajectory_directory, trajectory_prefix=setup_options['trajectory_prefix'], nonbonded_method=setup_options['nonbonded_method'], complex_box_dimensions=setup_options['complex_box_dimensions'],solvent_box_dimensions=setup_options['solvent_box_dimensions'], ionic_strength=ionic_strength, remove_constraints=setup_options['remove_constraints'], use_given_geometries=use_given_geometries, given_geometries_tolerance=given_geometries_tolerance, From 7164eae6b03650b8fe4cbe254761f3ebde167586 Mon Sep 17 00:00:00 2001 From: kt Date: Fri, 23 Jun 2023 20:13:27 -0400 Subject: [PATCH 3/6] minor update --- perses/app/relative_setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index 71e82f9ed..89251a762 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -165,7 +165,6 @@ def __init__(self, self._use_given_geometries = use_given_geometries self._given_geometries_tolerance = given_geometries_tolerance self._ligand_input = ligand_input - self._use_protein_espaloma if self._use_given_geometries: assert self._ligand_input[-3:] == 'sdf' or self._ligand_input[-4:] == 'mol2', f"cannot use deterministic atom placement if the ligand input files do not contain geometry information (e.g. in .sdf or .mol2 format)" @@ -388,7 +387,7 @@ def __init__(self, _logger.info(f"successfully generated complex topology, positions, system") # Assign espaloma to protein. Topology and system will be regenerated. - if self._use_protein_espaloma: + if use_protein_espaloma: _logger.info(f"Regenerating toplogy and system with espaloma...") # Save and serialize current system from openmm import XmlSerializer From 45ce1258868bf3c48e347c29b3328a823c90e8de Mon Sep 17 00:00:00 2001 From: kt Date: Fri, 23 Jun 2023 20:30:46 -0400 Subject: [PATCH 4/6] fix import error --- perses/app/relative_setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index 89251a762..4b3e4ddc7 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -935,6 +935,8 @@ def _regenerate_espaloma_system(self): indices = t.topology.select('resname ESP') pdb_filename = "target-espaloma.pdb" t.atom_slice(indices).save_pdb(f'{pdb_filename}') + + from openff.toolkit.topology import Molecule protein_molecule = Molecule.from_file(f'{pdb_filename}', file_format='pdb') # We already added small molecules to template generator when we first created ``self._system_generator``. From ac0a48036405f1923beb71e08631b9889fa9344a Mon Sep 17 00:00:00 2001 From: kt Date: Fri, 23 Jun 2023 23:01:40 -0400 Subject: [PATCH 5/6] clean up _regenerate_espaloma_system --- perses/app/relative_setup.py | 44 +++++++++++++++++------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index 4b3e4ddc7..f5971fa82 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -389,11 +389,11 @@ def __init__(self, # Assign espaloma to protein. Topology and system will be regenerated. if use_protein_espaloma: _logger.info(f"Regenerating toplogy and system with espaloma...") - # Save and serialize current system - from openmm import XmlSerializer - with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf: - xml = XmlSerializer.serialize(self._complex_system_old_solvated) - wf.write(xml) + # DEBUG: Save and serialize current system + #from openmm import XmlSerializer + #with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf: + # xml = XmlSerializer.serialize(self._complex_system_old_solvated) + # wf.write(xml) assert 'espaloma' in small_molecule_forcefield, "Espaloma force field not defined in ``small_molecule_forcefield``" self._complex_topology_old_solvated, self._complex_system_old_solvated = self._regenerate_espaloma_system() _logger.info(f"Regenerated toplogy and system with espaloma") @@ -924,20 +924,18 @@ def _regenerate_espaloma_system(self): # TODO: Is there a better way to handle this? # Currently, the updated complex openmm.app.topology is saved as a new pdb and reloaded with mdtraj. # Proteins are selected and saved as pdb and loaded as openff.toolkit.topology.Molecule. - - # Save updated complex topology as pdb - pdb_filename = f"{self._trajectory_directory}/out-complex-esplaoma.pdb" - with open(pdb_filename, 'w') as outfile: - app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) - - # Load protein structure converted into a single residue as openff.toolkit.topology.Molecule - t = md.load_pdb(f'{pdb_filename}') - indices = t.topology.select('resname ESP') - pdb_filename = "target-espaloma.pdb" - t.atom_slice(indices).save_pdb(f'{pdb_filename}') - + protein_espaloma_filename = "protein-espaloma.pdb" + if not os.path.exists(protein_espaloma_filename): + # Save updated complex topology as pdb + complex_espaloma_filename = f"{self._trajectory_directory}/out-complex-espaloma.pdb" + with open(complex_espaloma_filename, 'w') as outfile: + app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) + # Load protein structure converted into a single residue as openff.toolkit.topology.Molecule + t = md.load_pdb(complex_espaloma_filename) + indices = t.topology.select('resname ESP') + t.atom_slice(indices).save_pdb(protein_espaloma_filename) from openff.toolkit.topology import Molecule - protein_molecule = Molecule.from_file(f'{pdb_filename}', file_format='pdb') + protein_molecule = Molecule.from_file(protein_espaloma_filename, file_format='pdb') # We already added small molecules to template generator when we first created ``self._system_generator``. # So we only need to add protein molecule to template generator (EspalomaTemplateGenerator). @@ -945,11 +943,11 @@ def _regenerate_espaloma_system(self): # Regenerate system with system generator new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) - # DEBUG - from openmm import XmlSerializer - with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf: - xml = XmlSerializer.serialize(new_complex_system_old_solvated) - wf.write(xml) + # DEBUG: Save updated system + #from openmm import XmlSerializer + #with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf: + # xml = XmlSerializer.serialize(new_complex_system_old_solvated) + # wf.write(xml) return new_complex_topology_old_solvated, new_complex_system_old_solvated From 0d069fc1cf31b8cce1ae7a1482c3fa46bc1382d2 Mon Sep 17 00:00:00 2001 From: kt Date: Wed, 28 Jun 2023 11:49:19 -0400 Subject: [PATCH 6/6] change keyword for assigning epaloma to proteins support protein subunits for espaloma --- perses/annihilation/relative.py | 9 ++- perses/app/relative_setup.py | 82 +++++++++++++++++------- perses/app/setup_relative_calculation.py | 10 +-- 3 files changed, 71 insertions(+), 30 deletions(-) diff --git a/perses/annihilation/relative.py b/perses/annihilation/relative.py index bb6ccb3a3..46b56d30c 100644 --- a/perses/annihilation/relative.py +++ b/perses/annihilation/relative.py @@ -2049,7 +2049,14 @@ def _impose_virtual_bonds(self): core_heavy_atoms = [ int(index) for index in set(core_atoms).intersection(set(heavy_atoms)) ] # Determine protein CA atoms - protein_atoms = [ int(index) for index in self._hybrid_topology.select('protein and name CA') ] + # NOTE: Residue name starting with XX is a special residue name for proteins parameterized with espaloma. + # The special residue name is also defined in app/relative_setup.py#L929 and should be changed jointly + # when chaning the residue name. + x = self._hybrid_topology.select("resname =~ 'XX.*'") + if x.any(): + protein_atoms = [ int(index) for index in self._hybrid_topology.select("resname =~ 'XX.*'" and "name CA") ] + else: + protein_atoms = [ int(index) for index in self._hybrid_topology.select('protein and name CA') ] if len(core_heavy_atoms)==0 or len(protein_atoms)==0: # No restraint to be added diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py index f5971fa82..e79a0d944 100644 --- a/perses/app/relative_setup.py +++ b/perses/app/relative_setup.py @@ -62,7 +62,7 @@ def __init__(self, anneal_14s=False, small_molecule_forcefield='gaff-2.11', small_molecule_parameters_cache=None, - use_protein_espaloma=False, + use_espaloma_for_protein=False, trajectory_directory=None, trajectory_prefix=None, spectator_filenames=None, @@ -111,7 +111,7 @@ def __init__(self, ['gaff-1.81', 'gaff-2.11', 'smirnoff99Frosst-1.1.0', 'openff-2.0.0'] small_molecule_parameters_cache : str, optional, default=None If specified, this filename will be used for a small molecule parameter cache by the SystemGenerator. - use_protein_espaloma : bool, default False + use_espaloma_for_protein : bool, default False Whether to parameterize protein with epaloma force field. If True, complex topology and system created by protein force fields in ``forcefield_files`` will be regenerated using espaloma force field specified in ``small_molecule_forcefield``. @@ -386,8 +386,8 @@ def __init__(self, self._complex_topology_old, self._complex_positions_old,phase='complex',box_dimensions=self._complex_box_dimensions, ionic_strength=self._ionic_strength) _logger.info(f"successfully generated complex topology, positions, system") - # Assign espaloma to protein. Topology and system will be regenerated. - if use_protein_espaloma: + # Apply espaloma model to protein. Topology and system will be regenerated. + if use_espaloma_for_protein: _logger.info(f"Regenerating toplogy and system with espaloma...") # DEBUG: Save and serialize current system #from openmm import XmlSerializer @@ -396,7 +396,7 @@ def __init__(self, # wf.write(xml) assert 'espaloma' in small_molecule_forcefield, "Espaloma force field not defined in ``small_molecule_forcefield``" self._complex_topology_old_solvated, self._complex_system_old_solvated = self._regenerate_espaloma_system() - _logger.info(f"Regenerated toplogy and system with espaloma") + _logger.info(f"Toplogy and system regenerated with espaloma") self._complex_md_topology_old_solvated = md.Topology.from_openmm(self._complex_topology_old_solvated) @@ -883,7 +883,7 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo def _regenerate_espaloma_system(self): """ Regnerate topology and system with espaloma force field. Protein topology will be updated with - espaloma force field specified by ``small_molecule_forcefield``. + espaloma model specified in ``small_molecule_forcefield``. Returns ------- @@ -893,21 +893,46 @@ def _regenerate_espaloma_system(self): The parameterized system with the same kwargs used to generate ``self._system_generator`` with ``openmmforcefields.generators.SystemGenerator``. """ - chain_ids = [ chain.id for chain in self._complex_topology_old_solvated.chains() ] + # Check protein chains + mdtop = md.Topology.from_openmm(self._complex_topology_old_solvated) + chain_indices = [ chain.index for chain in self._complex_topology_old_solvated.chains() ] + + # DEBUG + #for chainid in chain_indices: + # indice = mdtop.select(f"protein and chainid == {chainid}") + # _logger.info(f"Protein chain ID {chainid}: {indice}") + + protein_chain_indices = [ chain_index for chain_index in chain_indices if mdtop.select(f"protein and chainid == {chain_index}").any() ] + assert len(protein_chain_indices) != 0, "Could not detect protein" + assert len(protein_chain_indices) <= 10, "Only supports up to 10 chains for proteins" + _logger.info(f"Protein chain indices: {protein_chain_indices}") + + # Check conflicting residue names + conflict_resnames = [ residue.name for residue in mdtop.residues if residue.name.startswith("XX") ] + if conflict_resnames: + raise Exception('Found conflict residue name in protein.') + + # Initialize new_complex_topology_old_solvated = app.Topology() new_complex_topology_old_solvated.setPeriodicBoxVectors(self._complex_topology_old_solvated.getPeriodicBoxVectors()) new_atoms = {} - # Regenerate protein topology as a single residue assuming the first chain is a protein. + # Regenerate protein topology + chain_counter = 0 _logger.info(f"Regenerating protein topology...") for chain in self._complex_topology_old_solvated.chains(): new_chain = new_complex_topology_old_solvated.addChain(chain.id) - if chain.id == chain_ids[0]: - resname = 'ESP' + # Convert protein into a single residue + # NOTE: ``resname`` is used in ``annihilation/relative.py#L2051.`` to deterimine the protein CA atoms to + # impose virtual bonds to keep the core atoms closeby with the protein subunits. If ``resname`` is changed, + # then protein CA atom selection expression in ``annihilation/relative.py#L2051.`` should also be changed. + if chain.index in protein_chain_indices: + resname = f'XX{chain_counter:01d}' resid = '1' + chain_counter += 1 new_residue = new_complex_topology_old_solvated.addResidue(resname, new_chain, resid) for i, residue in enumerate(chain.residues()): - if residue.chain.id != chain_ids[0]: + if residue.chain.index not in protein_chain_indices: new_residue = new_complex_topology_old_solvated.addResidue(residue.name, new_chain, residue.id) for atom in residue.atoms(): new_atom = new_complex_topology_old_solvated.addAtom(atom.name, atom.element, new_residue, atom.id) @@ -922,26 +947,35 @@ def _regenerate_espaloma_system(self): new_complex_topology_old_solvated.addBond(new_atoms[bond[0]], new_atoms[bond[1]]) # TODO: Is there a better way to handle this? - # Currently, the updated complex openmm.app.topology is saved as a new pdb and reloaded with mdtraj. - # Proteins are selected and saved as pdb and loaded as openff.toolkit.topology.Molecule. - protein_espaloma_filename = "protein-espaloma.pdb" - if not os.path.exists(protein_espaloma_filename): - # Save updated complex topology as pdb - complex_espaloma_filename = f"{self._trajectory_directory}/out-complex-espaloma.pdb" + # Currently, the new solvated complex openmm.app.topology ``new_complex_topology_old_solvated`` + # saved as a pdb (self._trajectory_directory/out-complex-espaloma.pdb) is loaded with mdtraj. + # Proteins are selected and saved as pdb files and loaded as openff.toolkit.topology.Molecule. + + # Save the updated complex model as pdb + complex_espaloma_filename = f"{self._trajectory_directory}/out-complex-espaloma.pdb" + if not os.path.exists(complex_espaloma_filename): with open(complex_espaloma_filename, 'w') as outfile: app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile) - # Load protein structure converted into a single residue as openff.toolkit.topology.Molecule - t = md.load_pdb(complex_espaloma_filename) - indices = t.topology.select('resname ESP') - t.atom_slice(indices).save_pdb(protein_espaloma_filename) + # Seperate proteins into indivdual pdb files according to chain ID. + import glob + protein_espaloma_filenames = glob.glob("protein-espaloma-*.pdb") + if not protein_espaloma_filenames: + for chain_index in protein_chain_indices: + t = md.load_pdb(complex_espaloma_filename) + indices = t.topology.select(f"chainid == {chain_index}") + # Should this be saved all trajectory directories? We are assuming all trajectory directories share + # the same input protein structure. + t.atom_slice(indices).save_pdb(f"protein-espaloma-{chain_index}.pdb") + protein_espaloma_filenames = glob.glob("protein-espaloma-*.pdb") + # Load individual protein structure into openff.toolkit.topology.Molecule from openff.toolkit.topology import Molecule - protein_molecule = Molecule.from_file(protein_espaloma_filename, file_format='pdb') + protein_molecules = [ Molecule.from_file(protein_filename) for protein_filename in protein_espaloma_filenames ] # We already added small molecules to template generator when we first created ``self._system_generator``. # So we only need to add protein molecule to template generator (EspalomaTemplateGenerator). - self._system_generator.template_generator.add_molecules(protein_molecule) + self._system_generator.template_generator.add_molecules(protein_molecules) # Regenerate system with system generator - new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) + new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated) # DEBUG: Save updated system #from openmm import XmlSerializer diff --git a/perses/app/setup_relative_calculation.py b/perses/app/setup_relative_calculation.py index 51597e2e1..717c1a1fa 100644 --- a/perses/app/setup_relative_calculation.py +++ b/perses/app/setup_relative_calculation.py @@ -395,11 +395,11 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True): # such as deferring to defaults for modules we call unless the user # chooses to override them. - if 'use_protein_espaloma' not in list(setup_options.keys()): - use_protein_espaloma = False + if 'use_espaloma_for_protein' not in list(setup_options.keys()): + use_espaloma_for_protein = False else: - assert type(setup_options['use_protein_espaloma']) == type(True) - use_protein_espaloma = setup_options['use_protein_espaloma'] + assert type(setup_options['use_espaloma_for_protein']) == type(True) + use_espaloma_for_protein = setup_options['use_espaloma_for_protein'] if 'use_given_geometries' not in list(setup_options.keys()): use_given_geometries = False @@ -526,7 +526,7 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True): atom_expr=setup_options['atom_expr'], bond_expr=setup_options['bond_expr'], neglect_angles = setup_options['neglect_angles'], anneal_14s = setup_options['anneal_1,4s'], small_molecule_forcefield=setup_options['small_molecule_forcefield'], small_molecule_parameters_cache=setup_options['small_molecule_parameters_cache'], - use_protein_espaloma=use_protein_espaloma, + use_espaloma_for_protein=use_espaloma_for_protein, trajectory_directory=trajectory_directory, trajectory_prefix=setup_options['trajectory_prefix'], nonbonded_method=setup_options['nonbonded_method'], complex_box_dimensions=setup_options['complex_box_dimensions'],solvent_box_dimensions=setup_options['solvent_box_dimensions'], ionic_strength=ionic_strength, remove_constraints=setup_options['remove_constraints'], use_given_geometries=use_given_geometries, given_geometries_tolerance=given_geometries_tolerance,