Skip to content

Commit

Permalink
Allow combining multiple mmCIF files into one
Browse files Browse the repository at this point in the history
  • Loading branch information
benmwebb committed Feb 13, 2024
1 parent a3baf8b commit 3f6543b
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 3 deletions.
55 changes: 55 additions & 0 deletions test/input/mini_add.cif
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
data_model
#
_exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31'
#
_modeller.version 9.24
#
loop_
_struct_asym.id
_struct_asym.entity_id
_struct_asym.details
A 1 ?
B 2 ?
#
loop_
_entity_poly_seq.entity_id
_entity_poly_seq.num
_entity_poly_seq.mon_id
1 1 VAL
1 2 GLY
1 3 GLN
1 4 GLN
1 5 TYR
1 6 SER
1 7 SER
2 1 PRO
2 2 GLU
#
loop_
_atom_site.group_PDB
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.auth_asym_id
_atom_site.label_seq_id
_atom_site.auth_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.label_entity_id
_atom_site.id
_atom_site.pdbx_PDB_model_num
ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1
ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1
ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1
ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1
ATOM C CA . TYR A A 5 6 ? 116.743 22.770 -34.259 1.000 103.700 1 31 1
ATOM C CA . SER A A 6 7 ? 116.626 25.161 -37.229 1.000 93.490 1 43 1
ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1
ATOM C CA . PRO B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1
ATOM C CA . GLU B B 2 4 ? 68.584 58.274 48.425 1.000 152.090 2 63 1
44 changes: 44 additions & 0 deletions test/test_make_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,50 @@ def test_pass_through(self):
self.assertEqual(s.title, 'Output from simple-docking example')
os.unlink('output.cif')

@unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3")
def test_add_polymers(self):
"""Check that make-mmcif combines polymer information"""
# mini.cif contains two chains A, B
incif = utils.get_input_file_name(TOPDIR, 'mini.cif')
# mini_add.cif also contains A, B; A is the same sequence as mini.cif
# but B is different (so should be renamed C when we add)
addcif = utils.get_input_file_name(TOPDIR, 'mini_add.cif')
subprocess.check_call([sys.executable, MAKE_MMCIF, incif,
'--add', addcif])
with open('output.cif') as fh:
s, = ihm.reader.read(fh)
self.assertEqual(len(s.entities), 3)
self.assertEqual(len(s.asym_units), 3)
self.assertEqual(len(s.state_groups), 2)
# Model from mini.cif
self.assertEqual(len(s.state_groups[0]), 1)
self.assertEqual(len(s.state_groups[0][0]), 1)
self.assertEqual(len(s.state_groups[0][0][0]), 1)
m = s.state_groups[0][0][0][0]
self.assertEqual(m.protocol.name, 'modeling')
self.assertEqual(m.assembly.name, 'Modeled assembly')
chain_a, chain_b, = m.representation
self.assertIs(chain_a.asym_unit.asym, s.asym_units[0])
self.assertIs(chain_b.asym_unit.asym, s.asym_units[1])
for chain in chain_a, chain_b:
self.assertIsInstance(chain, ihm.representation.AtomicSegment)
self.assertFalse(chain.rigid)
# Model from mini_add.cif
self.assertEqual(len(s.state_groups[1]), 1)
self.assertEqual(len(s.state_groups[1][0]), 1)
self.assertEqual(len(s.state_groups[1][0][0]), 1)
m = s.state_groups[1][0][0][0]
self.assertEqual(m.protocol.name, 'modeling')
self.assertEqual(m.assembly.name, 'Modeled assembly')
chain_a, chain_c, = m.representation
self.assertIs(chain_a.asym_unit.asym, s.asym_units[0])
self.assertIs(chain_c.asym_unit.asym, s.asym_units[2])
for chain in chain_a, chain_c:
self.assertIsInstance(chain, ihm.representation.AtomicSegment)
self.assertFalse(chain.rigid)
self.assertEqual(s.title, 'Auto-generated system')
os.unlink('output.cif')


if __name__ == '__main__':
unittest.main()
136 changes: 133 additions & 3 deletions util/make-mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
b) input files that aren't compliant with the PDBx dictionary, or that
contain syntax errors or other problems, may crash or otherwise confuse
python-ihm.
The --add option can also be used to combine multiple input mmCIF files into
one. This is typically used when the mmCIF files contain models with
differing composition. Only model (coordinate) information is combined, not
other IHM information such as starting models or restraints.
"""


Expand Down Expand Up @@ -53,13 +58,129 @@ def add_ihm_info(s):
return s


def add_ihm_info_one_system(fname):
"""Read mmCIF file `fname`, which must contain a single System, and
return it with any missing IHM data added."""
with open(fname) as fh:
systems = ihm.reader.read(fh)
if len(systems) != 1:
raise ValueError("mmCIF file %s must contain exactly 1 data block "
"(%d found)" % (fname, len(systems)))
return add_ihm_info(systems[0])


def combine(s, other_s):
"""Add models from the System `other_s` into the System `s`.
After running this function, `s` will contain all Models from both
systems. The models are added to new StateGroup(s) in `s`.
Note that this function also modifies `other_s` in place, so that
System should no longer be used after calling this function."""
# First map all Entity and AsymUnit objects in `other_s` to equivalent
# objects in `s`
entity_map = combine_entities(s, other_s)
asym_map = combine_asyms(s, other_s, entity_map)
# Now handle the Models themselves
combine_atoms(s, other_s, asym_map)


def combine_entities(s, other_s):
"""Add `other_s` entities into `s`. Returns a dict that maps Entities
in `other_s` to equivalent objects in `s`."""
entity_map = {}
sequences = dict((e.sequence, e) for e in s.entities)
for e in other_s.entities:
if e.sequence in sequences:
# If the `other_s` Entity already exists in `s`, map to it
entity_map[e] = sequences[e.sequence]
else:
# Otherwise, add the `other_s` Entity to `s`
s.entities.append(e)
entity_map[e] = e
return entity_map


def combine_asyms(s, other_s, entity_map):
"""Add `other_s` asyms into `s`. Returns a dict that maps AsymUnits
in `other_s` to equivalent objects in `s`."""
asym_map = {}
# Collect author-provided information for existing asyms. For polymers,
# we use the author-provided chain ID; for non-polymers, we also use
# the author-provided residue number of the first (only) residue
poly_asyms = dict(((a.entity, a.strand_id), a)
for a in s.asym_units if a.entity.is_polymeric())
nonpoly_asyms = dict(((a.entity, a.strand_id, a.auth_seq_id_map[1]), a)
for a in s.asym_units
if a.entity.type == 'non-polymer')

def map_asym(asym, orig_asym):
if orig_asym:
# If an equivalent asym already exists, use it (and its asym_id)
asym_map[asym] = orig_asym
else:
# Otherwise, add a new asym
asym_map[asym] = asym
asym.id = None # Assign new ID
s.asym_units.append(asym)

for asym in other_s.asym_units:
# Point to Entity in `s`, not `other_s`
asym.entity = entity_map[asym.entity]
# For polymers and non-polymers, if an asym in `other_s` has the
# same author-provided information and entity_id as an asym in `s`,
# reuse the asym_id
if asym.entity.is_polymeric():
map_asym(asym, poly_asyms.get((asym.entity, asym.strand_id)))
elif asym.entity.type == 'non-polymer':
map_asym(asym, nonpoly_asyms.get((asym.entity, asym.strand_id,
asym.auth_seq_id_map[1])))
else:
# For waters and branched entities, always assign a new asym_id
asym_map[asym] = asym
asym.id = None # Assign new ID
s.asym_units.append(asym)
return asym_map


def combine_atoms(s, other_s, asym_map):
"""Add `other_s` atoms into `s`"""
seen_asmb = set()
seen_rep = set()
for state_group in other_s.state_groups:
for state in state_group:
for model_group in state:
for model in model_group:
# Assembly, Representation and Atom and Sphere objects
# all reference `other_s` asyms. We must map these to
# asyms in `s`.
asmb = model.assembly
if id(asmb) not in seen_asmb:
seen_asmb.add(id(asmb))
# todo: also handle AsymUnitRange
asmb[:] = [asym_map[asym] for asym in asmb]
rep = model.representation
if id(rep) not in seen_rep:
seen_rep.add(id(rep))
for seg in rep:
seg.asym_unit = asym_map[seg.asym_unit]
for atom in model._atoms:
atom.asym_unit = asym_map[atom.asym_unit]
for sphere in model._spheres:
sphere.asym_unit = asym_map[sphere.asym_unit]

# Add all models as new state groups
s.state_groups.extend(other_s.state_groups)


def get_args():
p = argparse.ArgumentParser(
description="Add minimal IHM-related tables to an mmCIF file.")
p.add_argument("input", metavar="input.cif", help="input mmCIF file name")
p.add_argument("output", metavar="output.cif",
help="output mmCIF file name",
default="output.cif", nargs="?")
p.add_argument("--add", "-a", action='append', metavar="add.cif",
help="also add model information from the named mmCIF "
"file to the output file")
return p.parse_args()


Expand All @@ -69,8 +190,17 @@ def get_args():
and os.path.samefile(args.input, args.output)):
raise ValueError("Input and output are the same file")

with open(args.input) as fh:
if args.add:
s = add_ihm_info_one_system(args.input)
for other in args.add:
other_s = add_ihm_info_one_system(other)
combine(s, other_s)
with open(args.output, 'w') as fhout:
ihm.dumper.write(
fhout, [add_ihm_info(s) for s in ihm.reader.read(fh)],
variant=ihm.dumper.IgnoreVariant(['_audit_conform']))
fhout, [s], variant=ihm.dumper.IgnoreVariant(['_audit_conform']))
else:
with open(args.input) as fh:
with open(args.output, 'w') as fhout:
ihm.dumper.write(
fhout, [add_ihm_info(s) for s in ihm.reader.read(fh)],
variant=ihm.dumper.IgnoreVariant(['_audit_conform']))

0 comments on commit 3f6543b

Please sign in to comment.