Skip to content

Commit

Permalink
Add support for non-deposited templates
Browse files Browse the repository at this point in the history
Add a new class CustomTemplate to handle templates
that have not been deposited. This takes a list
of objects which specify the coordinates
of the template atoms. Read and write the
corresponding mmCIF tables `ma_template_customized`
and `ma_template_coord`. Closes #1.
  • Loading branch information
benmwebb committed Dec 6, 2024
1 parent e92bb5b commit b68638a
Show file tree
Hide file tree
Showing 6 changed files with 391 additions and 33 deletions.
3 changes: 3 additions & 0 deletions docs/main.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,8 @@ The :mod:`modelcif` Python module
.. autoclass:: Template
:members:

.. autoclass:: CustomTemplate
:members:

.. autoclass:: ReferenceDatabase
:members:
132 changes: 110 additions & 22 deletions modelcif/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,42 @@ def __init__(self, template, gapped_sequence, seq_id_begin, seq_id_end):
self.seq_id_range = (seq_id_begin, seq_id_end)


class Template(modelcif.data.Data):
"""A single chain that was used as a template structure for modeling.
class _TemplateBase(modelcif.data.Data):
"""Base class for all templates; use Template or CustomTemplate"""

data_content_type = "template structure"

def __init__(self, entity, asym_id, model_num, transformation,
name=None, strand_id=None, entity_id=None):
super(_TemplateBase, self).__init__(name)
self.entity = entity
self.asym_id, self.model_num = asym_id, model_num
self.transformation = transformation
self._strand_id = strand_id
self.entity_id = entity_id

def segment(self, gapped_sequence, seq_id_begin, seq_id_end):
"""Get an object representing the alignment of part of this sequence.
:param str gapped_sequence: Sequence of the segment, including gaps.
:param int seq_id_begin: Start of the segment.
:param int seq_id_end: End of the segment.
"""
# todo: cache so we return the same object for same parameters
return TemplateSegment(self, gapped_sequence, seq_id_begin, seq_id_end)

seq_id_range = property(lambda self: self.entity.seq_id_range,
doc="Sequence range")

template = property(lambda self: self)

strand_id = property(lambda self: self._strand_id or self.asym_id,
doc="PDB or author-provided strand/chain ID")


class Template(_TemplateBase):
"""A single database chain that was used as a template structure
for modeling.
After creating a polymer template, use :meth:`segment` to denote the
part of its sequence used in any modeling alignments
Expand All @@ -488,6 +522,10 @@ class Template(modelcif.data.Data):
Template objects can also be used as inputs or outputs in modeling
protocol steps; see :class:`modelcif.protocol.Step`.
This class is intended for templates that were taken from reference
databases such as PDB. For a non-deposited "custom" template,
use the :class:`CustomTemplate` class instead.
:param entity: The sequence of the chain.
:type entity: :class:`Entity`
:param str asym_id: The asym or chain ID in the template structure.
Expand All @@ -505,36 +543,86 @@ class Template(modelcif.data.Data):
:param str entity_id: If known, the ID of the entity for this template
in its own mmCIF file.
"""
data_content_type = "template structure"

def __init__(self, entity, asym_id, model_num, transformation,
name=None, references=[], strand_id=None, entity_id=None):
super(Template, self).__init__(name)
self.entity = entity
self.asym_id, self.model_num = asym_id, model_num
self.transformation = transformation
super(Template, self).__init__(
entity=entity, asym_id=asym_id, model_num=model_num,
transformation=transformation, name=name, strand_id=strand_id,
entity_id=entity_id)
self.references = []
self.references.extend(references)
self._strand_id = strand_id
self.entity_id = entity_id

def segment(self, gapped_sequence, seq_id_begin, seq_id_end):
"""Get an object representing the alignment of part of this sequence.

:param str gapped_sequence: Sequence of the segment, including gaps.
:param int seq_id_begin: Start of the segment.
:param int seq_id_end: End of the segment.
"""
# todo: cache so we return the same object for same parameters
return TemplateSegment(self, gapped_sequence, seq_id_begin, seq_id_end)
class CustomTemplate(_TemplateBase):
"""A chain that was used as a template structure for modeling.
seq_id_range = property(lambda self: self.entity.seq_id_range,
doc="Sequence range")
This class is intended for templates that have not been deposited
in a database such as PDB (for deposited templates, use the
:class:`Template` class instead). The coordinates of the atoms
in these "custom" templates will be included in the mmCIF file;
see the :attr:`atoms` member.
template = property(lambda self: self)
:param str details: Information on how the template was created.
strand_id = property(lambda self: self._strand_id or self.asym_id,
doc="PDB or author-provided strand/chain ID")
See :class:`Template` for a description of the other parameters.
"""
def __init__(self, entity, asym_id, model_num, transformation,
name=None, strand_id=None, entity_id=None, details=None):
super(CustomTemplate, self).__init__(
entity=entity, asym_id=asym_id, model_num=model_num,
transformation=transformation, name=name, strand_id=strand_id,
entity_id=entity_id)
self.details = details

#: Coordinates of all atoms as :class:`TemplateAtom` objects
self.atoms = []


class TemplateAtom(object):
"""Coordinates of a single atom in a custom template.
This provides the coordinates for a template that has not been
deposited in a database. See :class:`CustomTemplate` for more
information. These objects are added to the
:attr:`CustomTemplate.atoms` list.
:param int seq_id: The sequence ID of the residue represented by this
atom. This should generally be a number starting at 1 for any
polymer chain, water, or oligosaccharide. For ligands, a seq_id
is not needed (as a given asym can only contain a single ligand),
so either 1 or None can be used.
:param str atom_id: The name of the atom in the residue
:param str type_symbol: Element name
:param float x: x coordinate of the atom
:param float y: y coordinate of the atom
:param float z: z coordinate of the atom
:param bool het: True for HETATM sites, False (default) for ATOM
:param float biso: Temperature factor or equivalent (if applicable)
:param float occupancy: Fraction of the atom type present
(if applicable)
:param float charge: Formal charge (if applicable)
:param int auth_seq_id: Author-provided sequence ID (if applicable;
this is optional for polymers but required for ligands).
:param str auth_atom_id: Author-provided atom name (if needed)
:param str auth_comp_id: Author-provided residue name (if needed)
"""

# Reduce memory usage
__slots__ = ['seq_id', 'atom_id', 'type_symbol', 'x', 'y', 'z', 'het',
'biso', 'occupancy', 'charge', 'auth_seq_id', 'auth_atom_id',
'auth_comp_id']

def __init__(self, seq_id, atom_id, type_symbol, x, y, z,
het=False, biso=None, occupancy=None, charge=None,
auth_seq_id=None, auth_atom_id=None, auth_comp_id=None):
self.seq_id, self.atom_id = seq_id, atom_id
self.type_symbol = type_symbol
self.x, self.y, self.z = x, y, z
self.het, self.biso = het, biso
self.occupancy, self.charge = occupancy, charge
self.auth_seq_id = auth_seq_id
self.auth_atom_id, self.auth_comp_id = auth_atom_id, auth_comp_id


class NonPolymerFromTemplate(AsymUnit):
Expand Down
49 changes: 47 additions & 2 deletions modelcif/dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ def dump(self, system, writer):
self.dump_template_non_poly(system, writer)
self.dump_template_ref_db(system, writer)
self.dump_target_template_poly_mapping(system, writer)
self.dump_template_customized(system, writer)
self.dump_template_coord(system, writer)
self.dump_info(system, writer)
self.dump_details(system, writer)
self.dump_sequences(system, writer)
Expand All @@ -366,8 +368,8 @@ def dump_template_details(self, system, writer):
ordinal = itertools.count(1)

def write_template(tmpl, tgt_asym, lp):
org = ("reference database" if tmpl.references
else "customized")
org = ("customized" if isinstance(tmpl, modelcif.CustomTemplate)
else "reference database")
poly = ("polymer" if tmpl.entity.is_polymeric()
else "non-polymer")
lp.write(ordinal_id=next(ordinal),
Expand Down Expand Up @@ -460,6 +462,8 @@ def dump_template_ref_db(self, system, writer):
["template_id", "db_name", "db_name_other_details",
"db_accession_code", "db_version_date"]) as lp:
for tmpl in system.templates:
if not isinstance(tmpl, modelcif.Template):
continue
for ref in tmpl.references:
lp.write(template_id=tmpl._id, db_name=ref.name,
db_name_other_details=ref.other_details,
Expand All @@ -468,6 +472,47 @@ def dump_template_ref_db(self, system, writer):
ref.db_version_date)
if ref.db_version_date else None)

def dump_template_customized(self, system, writer):
with writer.loop(
"_ma_template_customized", ["template_id", "details"]) as lp:
for tmpl in system.templates:
if isinstance(tmpl, modelcif.CustomTemplate):
lp.write(template_id=tmpl._id, details=tmpl.details)

def dump_template_coord(self, system, writer):
ordinal = itertools.count(1)
with writer.loop(
"_ma_template_coord",
["template_id", "group_PDB", "ordinal_id", "type_symbol",
"label_atom_id", "label_comp_id", "label_seq_id",
"label_asym_id", "auth_seq_id", "auth_asym_id",
"auth_atom_id", "auth_comp_id",
"Cartn_x", "Cartn_y", "Cartn_z",
"occupancy", "label_entity_id", "B_iso_or_equiv",
"formal_charge"]) as lp:
for tmpl in system.templates:
if not isinstance(tmpl, modelcif.CustomTemplate):
continue
e = tmpl.entity
for atom in tmpl.atoms:
lp.write(template_id=tmpl._id,
group_PDB='HETATM' if atom.het else 'ATOM',
ordinal_id=next(ordinal),
type_symbol=atom.type_symbol,
label_atom_id=atom.atom_id,
label_comp_id=e.sequence[atom.seq_id - 1].id,
label_seq_id=atom.seq_id,
label_asym_id=tmpl.asym_id,
auth_seq_id=atom.auth_seq_id,
auth_asym_id=tmpl.strand_id,
auth_atom_id=atom.auth_atom_id,
auth_comp_id=atom.auth_comp_id,
Cartn_x=atom.x, Cartn_y=atom.y, Cartn_z=atom.z,
occupancy=atom.occupancy,
label_entity_id=tmpl.entity_id,
B_iso_or_equiv=atom.biso,
formal_charge=atom.charge)

def dump_target_template_poly_mapping(self, system, writer):
ordinal = itertools.count(1)
with writer.loop("_ma_target_template_poly_mapping",
Expand Down
55 changes: 51 additions & 4 deletions modelcif/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ def _make_new_object(self, newcls=None):
return newcls(*(None,) * 3)


class _TemplateIDMapper(IDMapper):
"""Add extra handling to IDMapper for modelcif.Template objects"""

def _update_old_object(self, obj, newcls=None):
super(_TemplateIDMapper, self)._update_old_object(obj, newcls)
# Add missing members if the wrong class was originally instantianted
if newcls is modelcif.CustomTemplate and not hasattr(obj, 'atoms'):
obj.details = None
obj.atoms = []


class _SystemReader(object):
def __init__(self, model_class, starting_model_class, system=None):
self.system = system or modelcif.System()
Expand Down Expand Up @@ -101,8 +112,8 @@ def __init__(self, model_class, starting_model_class, system=None):
self.transformations = IDMapper(self.system.template_transformations,
modelcif.Transformation, *(None,) * 2)

self.templates = IDMapper(self.system.templates, modelcif.Template,
*(None,) * 4)
self.templates = _TemplateIDMapper(
self.system.templates, modelcif.Template, *(None,) * 4)

self.template_segments = IDMapper(
self.system.template_segments, modelcif.TemplateSegment,
Expand Down Expand Up @@ -483,8 +494,11 @@ class _TemplateDetailsHandler(Handler):
def __call__(self, template_id, template_trans_matrix_id,
template_data_id, target_asym_id, template_label_asym_id,
template_label_entity_id, template_model_num,
template_auth_asym_id):
template = self.sysr.templates.get_by_id(template_id)
template_auth_asym_id, template_origin):
newcls = None
if template_origin and template_origin.lower() == 'customized':
newcls = modelcif.CustomTemplate
template = self.sysr.templates.get_by_id(template_id, newcls)
template.transformation = self.sysr.transformations.get_by_id(
template_trans_matrix_id)
# Add empty sequence (hopefully will fill in from _ma_template_poly
Expand Down Expand Up @@ -528,6 +542,38 @@ def __call__(self, id, template_id, residue_number_begin,
int(residue_number_end))


class _TemplateCustomizedHandler(Handler):
category = '_ma_template_customized'

def __call__(self, template_id, details):
template = self.sysr.templates.get_by_id(template_id,
modelcif.CustomTemplate)
template.details = details


class _TemplateCoordHandler(Handler):
category = '_ma_template_coord'

def __call__(self, template_id, group_pdb, type_symbol, label_atom_id,
label_seq_id, auth_seq_id, auth_atom_id, auth_comp_id,
cartn_x, cartn_y, cartn_z, occupancy, b_iso_or_equiv,
formal_charge):
template = self.sysr.templates.get_by_id(template_id,
modelcif.CustomTemplate)
atom = modelcif.TemplateAtom(
het=group_pdb is not None and group_pdb != 'ATOM',
type_symbol=type_symbol, atom_id=label_atom_id,
seq_id=self.get_int(label_seq_id),
auth_seq_id=self.get_int(auth_seq_id),
auth_atom_id=auth_atom_id,
auth_comp_id=auth_comp_id,
x=float(cartn_x), y=float(cartn_y), z=float(cartn_z),
occupancy=self.get_float(occupancy),
biso=self.get_float(b_iso_or_equiv),
charge=self.get_float(formal_charge))
template.atoms.append(atom)


def _get_align_class(type_class, mode_class, align_class_map):
"""Create and return a new class to represent an alignment"""
k = (type_class, mode_class)
Expand Down Expand Up @@ -945,6 +991,7 @@ class ModelCIFVariant(Variant):
ihm.reader._StructRefSeqHandler, ihm.reader._StructRefSeqDifHandler,
_TargetRefDBHandler, _TransformationHandler, _TemplateDetailsHandler,
_TemplateRefDBHandler, _TemplatePolySegmentHandler,
_TemplateCustomizedHandler, _TemplateCoordHandler,
_TemplatePolyHandler, _TemplateNonPolyHandler,
_AlignmentHandler, _AlignmentInfoHandler, _AlignmentDetailsHandler,
_TargetTemplatePolyMappingHandler,
Expand Down
Loading

0 comments on commit b68638a

Please sign in to comment.