Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Self-consistently parameterize protein-ligand with espaloma-0.3.0 #1215

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion perses/annihilation/relative.py
Original file line number Diff line number Diff line change
Expand Up @@ -2049,7 +2049,14 @@ def _impose_virtual_bonds(self):
core_heavy_atoms = [ int(index) for index in set(core_atoms).intersection(set(heavy_atoms)) ]

# Determine protein CA atoms
protein_atoms = [ int(index) for index in self._hybrid_topology.select('protein and name CA') ]
# NOTE: Residue name starting with XX is a special residue name for proteins parameterized with espaloma.
# The special residue name is also defined in app/relative_setup.py#L929 and should be changed jointly
# when chaning the residue name.
x = self._hybrid_topology.select("resname =~ 'XX.*'")
if x.any():
protein_atoms = [ int(index) for index in self._hybrid_topology.select("resname =~ 'XX.*'" and "name CA") ]
else:
protein_atoms = [ int(index) for index in self._hybrid_topology.select('protein and name CA') ]

if len(core_heavy_atoms)==0 or len(protein_atoms)==0:
# No restraint to be added
Expand Down
122 changes: 122 additions & 0 deletions perses/app/relative_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def __init__(self,
anneal_14s=False,
small_molecule_forcefield='gaff-2.11',
small_molecule_parameters_cache=None,
use_espaloma_for_protein=False,
trajectory_directory=None,
trajectory_prefix=None,
spectator_filenames=None,
Expand Down Expand Up @@ -110,6 +111,10 @@ def __init__(self,
['gaff-1.81', 'gaff-2.11', 'smirnoff99Frosst-1.1.0', 'openff-2.0.0']
small_molecule_parameters_cache : str, optional, default=None
If specified, this filename will be used for a small molecule parameter cache by the SystemGenerator.
use_espaloma_for_protein : bool, default False
Whether to parameterize protein with epaloma force field.
If True, complex topology and system created by protein force fields in ``forcefield_files`` will be regenerated
using espaloma force field specified in ``small_molecule_forcefield``.
trajectory_directory : str, default None
Where to write out trajectories resulting from the calculation. If none, no writing is done.
trajectory_prefix : str, default None
Expand Down Expand Up @@ -381,6 +386,18 @@ def __init__(self,
self._complex_topology_old, self._complex_positions_old,phase='complex',box_dimensions=self._complex_box_dimensions, ionic_strength=self._ionic_strength)
_logger.info(f"successfully generated complex topology, positions, system")

# Apply espaloma model to protein. Topology and system will be regenerated.
if use_espaloma_for_protein:
_logger.info(f"Regenerating toplogy and system with espaloma...")
# DEBUG: Save and serialize current system
#from openmm import XmlSerializer
#with open(f"{self._trajectory_directory}/out-complex.xml", "w") as wf:
# xml = XmlSerializer.serialize(self._complex_system_old_solvated)
# wf.write(xml)
assert 'espaloma' in small_molecule_forcefield, "Espaloma force field not defined in ``small_molecule_forcefield``"
self._complex_topology_old_solvated, self._complex_system_old_solvated = self._regenerate_espaloma_system()
_logger.info(f"Toplogy and system regenerated with espaloma")

self._complex_md_topology_old_solvated = md.Topology.from_openmm(self._complex_topology_old_solvated)

_logger.info(f"creating TopologyProposal...")
Expand Down Expand Up @@ -863,6 +880,111 @@ def _solvate_system(self, topology, positions, model='tip3p',phase='complex', bo

return solvated_topology, solvated_positions, solvated_system

def _regenerate_espaloma_system(self):
"""
Regnerate topology and system with espaloma force field. Protein topology will be updated with
espaloma model specified in ``small_molecule_forcefield``.

Returns
-------
new_complex_topology_old_solvated : app.Topology
Topology of the system with added waters.
new_complex_system_old_solvated : openmm.System
The parameterized system with the same kwargs used to generate
``self._system_generator`` with ``openmmforcefields.generators.SystemGenerator``.
"""
# Check protein chains
mdtop = md.Topology.from_openmm(self._complex_topology_old_solvated)
chain_indices = [ chain.index for chain in self._complex_topology_old_solvated.chains() ]

# DEBUG
#for chainid in chain_indices:
# indice = mdtop.select(f"protein and chainid == {chainid}")
# _logger.info(f"Protein chain ID {chainid}: {indice}")

protein_chain_indices = [ chain_index for chain_index in chain_indices if mdtop.select(f"protein and chainid == {chain_index}").any() ]
assert len(protein_chain_indices) != 0, "Could not detect protein"
assert len(protein_chain_indices) <= 10, "Only supports up to 10 chains for proteins"
_logger.info(f"Protein chain indices: {protein_chain_indices}")

# Check conflicting residue names
conflict_resnames = [ residue.name for residue in mdtop.residues if residue.name.startswith("XX") ]
if conflict_resnames:
raise Exception('Found conflict residue name in protein.')

# Initialize
new_complex_topology_old_solvated = app.Topology()
new_complex_topology_old_solvated.setPeriodicBoxVectors(self._complex_topology_old_solvated.getPeriodicBoxVectors())
new_atoms = {}

# Regenerate protein topology
chain_counter = 0
_logger.info(f"Regenerating protein topology...")
for chain in self._complex_topology_old_solvated.chains():
new_chain = new_complex_topology_old_solvated.addChain(chain.id)
# Convert protein into a single residue
# NOTE: ``resname`` is used in ``annihilation/relative.py#L2051.`` to deterimine the protein CA atoms to
# impose virtual bonds to keep the core atoms closeby with the protein subunits. If ``resname`` is changed,
# then protein CA atom selection expression in ``annihilation/relative.py#L2051.`` should also be changed.
if chain.index in protein_chain_indices:
resname = f'XX{chain_counter:01d}'
resid = '1'
chain_counter += 1
new_residue = new_complex_topology_old_solvated.addResidue(resname, new_chain, resid)
for i, residue in enumerate(chain.residues()):
if residue.chain.index not in protein_chain_indices:
new_residue = new_complex_topology_old_solvated.addResidue(residue.name, new_chain, residue.id)
for atom in residue.atoms():
new_atom = new_complex_topology_old_solvated.addAtom(atom.name, atom.element, new_residue, atom.id)
new_atoms[atom] = new_atom
#_logger.info(f"{new_atom.residue.name}, {new_atom.residue.chain.id}, {new_atom.residue.id}")
#_logger.info(f"{new_residue.chain.id}, {new_residue.id}")
#_logger.info(f"{new_residue} {new_atom}")

# Regenerate bond information
for bond in self._complex_topology_old_solvated.bonds():
if bond[0] in new_atoms and bond[1] in new_atoms:
new_complex_topology_old_solvated.addBond(new_atoms[bond[0]], new_atoms[bond[1]])

# TODO: Is there a better way to handle this?
# Currently, the new solvated complex openmm.app.topology ``new_complex_topology_old_solvated``
# saved as a pdb (self._trajectory_directory/out-complex-espaloma.pdb) is loaded with mdtraj.
# Proteins are selected and saved as pdb files and loaded as openff.toolkit.topology.Molecule.

# Save the updated complex model as pdb
complex_espaloma_filename = f"{self._trajectory_directory}/out-complex-espaloma.pdb"
if not os.path.exists(complex_espaloma_filename):
with open(complex_espaloma_filename, 'w') as outfile:
app.PDBFile.writeFile(new_complex_topology_old_solvated, self._complex_positions_old_solvated, outfile)
# Seperate proteins into indivdual pdb files according to chain ID.
import glob
protein_espaloma_filenames = glob.glob("protein-espaloma-*.pdb")
if not protein_espaloma_filenames:
for chain_index in protein_chain_indices:
t = md.load_pdb(complex_espaloma_filename)
indices = t.topology.select(f"chainid == {chain_index}")
# Should this be saved all trajectory directories? We are assuming all trajectory directories share
# the same input protein structure.
t.atom_slice(indices).save_pdb(f"protein-espaloma-{chain_index}.pdb")
protein_espaloma_filenames = glob.glob("protein-espaloma-*.pdb")
# Load individual protein structure into openff.toolkit.topology.Molecule
from openff.toolkit.topology import Molecule
protein_molecules = [ Molecule.from_file(protein_filename) for protein_filename in protein_espaloma_filenames ]

# We already added small molecules to template generator when we first created ``self._system_generator``.
# So we only need to add protein molecule to template generator (EspalomaTemplateGenerator).
self._system_generator.template_generator.add_molecules(protein_molecules)
# Regenerate system with system generator
new_complex_system_old_solvated = self._system_generator.create_system(new_complex_topology_old_solvated)

# DEBUG: Save updated system
#from openmm import XmlSerializer
#with open(f"{self._trajectory_directory}/out-complex-espaloma.xml", "w") as wf:
# xml = XmlSerializer.serialize(new_complex_system_old_solvated)
# wf.write(xml)

return new_complex_topology_old_solvated, new_complex_system_old_solvated

def _handle_charge_changes(self, topology_proposal, new_positions):
"""
modifies the atom mapping in the topology proposal and the new system parameters to handle the transformation of
Expand Down
7 changes: 7 additions & 0 deletions perses/app/setup_relative_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def getSetupOptions(filename, override_string=None):

if 'small_molecule_parameters_cache' not in setup_options:
setup_options['small_molecule_parameters_cache'] = None


if 'remove_constraints' not in setup_options:
setup_options['remove_constraints'] = False
Expand Down Expand Up @@ -394,6 +395,11 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True):
# such as deferring to defaults for modules we call unless the user
# chooses to override them.

if 'use_espaloma_for_protein' not in list(setup_options.keys()):
use_espaloma_for_protein = False
else:
assert type(setup_options['use_espaloma_for_protein']) == type(True)
use_espaloma_for_protein = setup_options['use_espaloma_for_protein']

if 'use_given_geometries' not in list(setup_options.keys()):
use_given_geometries = False
Expand Down Expand Up @@ -520,6 +526,7 @@ def run_setup(setup_options, serialize_systems=True, build_samplers=True):
atom_expr=setup_options['atom_expr'], bond_expr=setup_options['bond_expr'],
neglect_angles = setup_options['neglect_angles'], anneal_14s = setup_options['anneal_1,4s'],
small_molecule_forcefield=setup_options['small_molecule_forcefield'], small_molecule_parameters_cache=setup_options['small_molecule_parameters_cache'],
use_espaloma_for_protein=use_espaloma_for_protein,
trajectory_directory=trajectory_directory, trajectory_prefix=setup_options['trajectory_prefix'], nonbonded_method=setup_options['nonbonded_method'],
complex_box_dimensions=setup_options['complex_box_dimensions'],solvent_box_dimensions=setup_options['solvent_box_dimensions'], ionic_strength=ionic_strength, remove_constraints=setup_options['remove_constraints'],
use_given_geometries=use_given_geometries, given_geometries_tolerance=given_geometries_tolerance,
Expand Down