Skip to content

Commit

Permalink
Read/write branched entity composition info
Browse files Browse the repository at this point in the history
Read information about the chemical components
that make up a branched entity (oligosaccharide)
from mmCIF, and write it back.
  • Loading branch information
benmwebb committed Sep 22, 2023
1 parent ffd2930 commit 7533fc5
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 7 deletions.
53 changes: 50 additions & 3 deletions ihm/dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,29 @@ def dump(self, system, writer):
comp_id_end=entity.sequence[rng.seq_id_range[1] - 1].id)


class _EntityBranchListDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_entity_branch_list",
["entity_id", "num", "comp_id", "hetero"]) as lp:
for entity in system.entities:
if not entity.is_branched():
continue
for num, comp in enumerate(entity.sequence):
lp.write(entity_id=entity._id, num=num + 1,
comp_id=comp.id)


class _EntityBranchDumper(Dumper):
def dump(self, system, writer):
# todo: we currently only support branched oligosaccharides
with writer.loop("_pdbx_entity_branch",
["entity_id", "type"]) as lp:
for entity in system.entities:
if not entity.is_branched():
continue
lp.write(entity_id=entity._id, type="oligosaccharide")


class _PolySeqSchemeDumper(Dumper):
"""Output the _pdbx_poly_seq_scheme table.
This is needed because it is a parent category of atom_site.
Expand Down Expand Up @@ -689,7 +712,7 @@ def dump(self, system, writer):
"pdb_ins_code"]) as lp:
for asym in system.asym_units:
entity = asym.entity
if entity.is_polymeric():
if entity.is_polymeric() or entity.is_branched():
continue
for num, comp in enumerate(asym.sequence):
auth_seq_num, ins = asym._get_auth_seq_id_ins_code(num + 1)
Expand All @@ -706,6 +729,28 @@ def dump(self, system, writer):
auth_mon_id=comp.id, pdb_ins_code=ins)


class _BranchSchemeDumper(Dumper):
def dump(self, system, writer):
with writer.loop("_pdbx_branch_scheme",
["asym_id", "entity_id", "mon_id", "num",
"pdb_seq_num", "auth_seq_num",
"auth_mon_id", "pdb_asym_id"]) as lp:
for asym in system.asym_units:
entity = asym.entity
if not entity.is_branched():
continue
for num, comp in enumerate(asym.sequence):
auth_seq_num, ins = asym._get_auth_seq_id_ins_code(num + 1)
# Assume num counts sequentially from 1 (like seq_id)
lp.write(asym_id=asym._id, pdb_asym_id=asym.strand_id,
entity_id=entity._id,
num=num + 1,
pdb_seq_num=auth_seq_num,
auth_seq_num=auth_seq_num,
mon_id=comp.id,
auth_mon_id=comp.id)


class _AsymIDProvider(object):
"""Provide unique asym IDs"""
def __init__(self, seen_ids):
Expand Down Expand Up @@ -3246,8 +3291,10 @@ class IHMVariant(Variant):
_ChemDescriptorDumper, _EntityDumper, _EntitySrcGenDumper,
_EntitySrcNatDumper, _EntitySrcSynDumper, _StructRefDumper,
_EntityPolyDumper, _EntityNonPolyDumper, _EntityPolySeqDumper,
_EntityPolySegmentDumper, _StructAsymDumper, _PolySeqSchemeDumper,
_NonPolySchemeDumper, _AssemblyDumper, _ExternalReferenceDumper,
_EntityPolySegmentDumper, _EntityBranchListDumper, _EntityBranchDumper,
_StructAsymDumper, _PolySeqSchemeDumper,
_NonPolySchemeDumper, _BranchSchemeDumper,
_AssemblyDumper, _ExternalReferenceDumper,
_DatasetDumper, _ModelRepresentationDumper, _StartingModelDumper,
_ProtocolDumper, _PostProcessDumper, _PseudoSiteDumper,
_GeometricObjectDumper, _FeatureDumper, _CrossLinkDumper,
Expand Down
44 changes: 40 additions & 4 deletions ihm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2437,13 +2437,18 @@ def finalize(self):
def _get_auth_seq_id_offset(self, asym):
"""Get the offset from seq_id to auth_seq_id. Return None if no
consistent offset exists."""
# Do nothing if the entity is not polymeric
if asym.entity is None or not asym.entity.is_polymeric():
# Do nothing if the entity is not polymeric or branched
if asym.entity is None or (not asym.entity.is_polymeric()
and not asym.entity.is_branched()):
return
# Do nothing if no map exists
if asym.auth_seq_id_map == 0:
return
rng = asym.seq_id_range
if asym.entity.is_branched():
# Hack, as branched entities don't technically have seq_ids
rng = (1, len(asym.entity.sequence))
else:
rng = asym.seq_id_range
offset = None
for seq_id in range(rng[0], rng[1] + 1):
# If a residue isn't in the map, it has an effective offset of 0,
Expand Down Expand Up @@ -2509,6 +2514,36 @@ def __call__(self, asym_id, entity_id, auth_seq_num, mon_id, pdb_ins_code,
asym.auth_seq_id_map = {1: (auth_seq_num, pdb_ins_code)}


class _BranchSchemeHandler(Handler):
category = '_pdbx_branch_scheme'

def __call__(self, asym_id, num, auth_seq_num, pdb_asym_id):
asym = self.sysr.asym_units.get_by_id(asym_id)
if pdb_asym_id not in (None, ihm.unknown, asym_id):
asym._strand_id = pdb_asym_id
auth_seq_num = self.get_int_or_string(auth_seq_num)
num = self.get_int(num)
# Note any residues that have different num and auth_seq_id
# These will be finalized by _PolySeqSchemeHandler
if num is not None and auth_seq_num is not None \
and num != auth_seq_num:
if asym.auth_seq_id_map == 0:
asym.auth_seq_id_map = {}
asym.auth_seq_id_map[num] = auth_seq_num, None


class _EntityBranchListHandler(Handler):
category = '_pdbx_entity_branch_list'

def __call__(self, entity_id, comp_id, num):
s = self.sysr.entities.get_by_id(entity_id)
# Assume num is 1-based (appears to be)
seq_id = int(num)
if seq_id > len(s.sequence):
s.sequence.extend([None] * (seq_id - len(s.sequence)))
s.sequence[seq_id - 1] = self.sysr.chem_comps.get_by_id(comp_id)


class _CrossLinkListHandler(Handler):
category = '_ihm_cross_link_list'
ignored_keywords = ['entity_description_1', 'entity_description_2',
Expand Down Expand Up @@ -3327,7 +3362,8 @@ class IHMVariant(Variant):
_CenterHandler, _TransformationHandler, _GeometricObjectHandler,
_SphereHandler, _TorusHandler, _HalfTorusHandler, _AxisHandler,
_PlaneHandler, _GeometricRestraintHandler, _PolySeqSchemeHandler,
_NonPolySchemeHandler, _CrossLinkListHandler,
_NonPolySchemeHandler, _BranchSchemeHandler, _EntityBranchListHandler,
_CrossLinkListHandler,
_CrossLinkRestraintHandler, _CrossLinkPseudoSiteHandler,
_CrossLinkResultHandler, _StartingModelSeqDifHandler,
_OrderedEnsembleHandler]
Expand Down
70 changes: 70 additions & 0 deletions test/test_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4451,6 +4451,76 @@ def test_dumper_unwrapped(self):
#
""") # noqa: E501

def test_entity_branch_list_dumper(self):
"""Test EntityBranchListDumper"""
system = ihm.System()
system.entities.append(ihm.Entity(
[ihm.SaccharideChemComp('NAG')]))
# Non-branched entity
system.entities.append(ihm.Entity('ACGT'))
ed = ihm.dumper._EntityDumper()
ed.finalize(system) # Assign IDs
dumper = ihm.dumper._EntityBranchListDumper()
out = _get_dumper_output(dumper, system)
self.assertEqual(out, """#
loop_
_pdbx_entity_branch_list.entity_id
_pdbx_entity_branch_list.num
_pdbx_entity_branch_list.comp_id
_pdbx_entity_branch_list.hetero
1 1 NAG .
#
""")

def test_entity_branch_dumper(self):
"""Test EntityBranchDumper"""
system = ihm.System()
system.entities.append(ihm.Entity(
[ihm.SaccharideChemComp('NAG')]))
# Non-branched entity
system.entities.append(ihm.Entity('ACGT'))
ed = ihm.dumper._EntityDumper()
ed.finalize(system) # Assign IDs
dumper = ihm.dumper._EntityBranchDumper()
out = _get_dumper_output(dumper, system)
self.assertEqual(out, """#
loop_
_pdbx_entity_branch.entity_id
_pdbx_entity_branch.type
1 oligosaccharide
#
""")

def test_branch_scheme_dumper(self):
"""Test BranchSchemeDumper"""
system = ihm.System()
e1 = ihm.Entity([ihm.SaccharideChemComp('NAG')])
e2 = ihm.Entity([ihm.SaccharideChemComp('FUC')])
# Non-branched entity
e3 = ihm.Entity('ACT')
system.entities.extend((e1, e2, e3))
system.asym_units.append(ihm.AsymUnit(e1, 'foo'))
system.asym_units.append(ihm.AsymUnit(e2, 'bar', auth_seq_id_map=5))
system.asym_units.append(ihm.AsymUnit(e3, 'baz'))
ihm.dumper._EntityDumper().finalize(system)
ihm.dumper._StructAsymDumper().finalize(system)
dumper = ihm.dumper._BranchSchemeDumper()
out = _get_dumper_output(dumper, system)
self.assertEqual(out, """#
loop_
_pdbx_branch_scheme.asym_id
_pdbx_branch_scheme.entity_id
_pdbx_branch_scheme.mon_id
_pdbx_branch_scheme.num
_pdbx_branch_scheme.pdb_seq_num
_pdbx_branch_scheme.auth_seq_num
_pdbx_branch_scheme.auth_mon_id
_pdbx_branch_scheme.pdb_asym_id
A 1 NAG 1 1 1 NAG A
B 2 FUC 1 6 6 FUC B
#
""")


if __name__ == '__main__':
unittest.main()
83 changes: 83 additions & 0 deletions test/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4317,6 +4317,89 @@ def test_write_variant(self):
s, = ihm.reader.read(fh, variant=ihm.reader.IHMVariant())
self.assertEqual(s.id, 'testid')

def test_branch_scheme_handler(self):
"""Test BranchSchemeHandler"""
fh = StringIO("""
loop_
_chem_comp.id
_chem_comp.type
_chem_comp.name
_chem_comp.formula
_chem_comp.formula_weight
BGC 'D-saccharide, beta linking' beta-D-glucopyranose 'C6 H12 O6' 180.156
#
loop_
_pdbx_entity_branch_list.entity_id
_pdbx_entity_branch_list.num
_pdbx_entity_branch_list.comp_id
_pdbx_entity_branch_list.hetero
1 1 BGC .
1 2 BGC .
1 3 BGC .
1 4 BGC .
#
loop_
_struct_asym.id
_struct_asym.entity_id
_struct_asym.details
A 1 foo
B 1 bar
C 1 baz
#
loop_
_pdbx_branch_scheme.asym_id
_pdbx_branch_scheme.entity_id
_pdbx_branch_scheme.mon_id
_pdbx_branch_scheme.num
_pdbx_branch_scheme.pdb_seq_num
_pdbx_branch_scheme.auth_seq_num
_pdbx_branch_scheme.auth_mon_id
_pdbx_branch_scheme.pdb_asym_id
A 1 BGC 1 5 5 BGC 0
A 1 BGC 2 6 6 BGC 0
A 1 BGC 3 7 7 BGC 0
A 1 BGC 4 8 8 BGC 0
B 1 BGC 1 1 1 BGC .
B 1 BGC 2 2 2 BGC .
B 1 BGC 3 3 3 BGC .
B 1 BGC 4 4 4 BGC .
C 1 BGC 1 2 2 BGC .
C 1 BGC 2 4 4 BGC .
C 1 BGC 3 6 6 BGC .
C 1 BGC 4 8 8 BGC .
""")
s, = ihm.reader.read(fh)
asym_a, asym_b, asym_c = s.asym_units
self.assertEqual(asym_a.auth_seq_id_map, 4)
self.assertEqual(asym_a._strand_id, '0')
self.assertEqual(asym_a.residue(1).auth_seq_id, 5)
self.assertEqual(asym_b.auth_seq_id_map, 0)
self.assertIsNone(asym_b._strand_id)
self.assertEqual(asym_b.residue(1).auth_seq_id, 1)
self.assertEqual(asym_c.auth_seq_id_map,
{1: (2, None), 2: (4, None), 3: (6, None),
4: (8, None)})
self.assertIsNone(asym_c._strand_id)
self.assertEqual(asym_c.residue(1).auth_seq_id, 2)

def test_entity_branch_list_handler(self):
"""Test EntityBranchListHandler"""
fh = StringIO("""
loop_
_pdbx_entity_branch_list.entity_id
_pdbx_entity_branch_list.num
_pdbx_entity_branch_list.comp_id
_pdbx_entity_branch_list.hetero
1 1 BGC .
1 2 BGC .
1 3 BGC .
1 4 BGC .
""")
s, = ihm.reader.read(fh)
e1, = s.entities
c1, c2, c3, c4 = e1.sequence
self.assertEqual([c.id for c in e1.sequence], ['BGC'] * 4)


if __name__ == '__main__':
unittest.main()

0 comments on commit 7533fc5

Please sign in to comment.