-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add make_mmcif script, exclude not-modeled residues
Add a make_mmcif utility script, similar to that in python-ihm, to add minimal ModelCIF tables to an arbitrary mmCIF file. Have this script detect any non-modeled ranges, add NotModeledResidueRange objects for them, and use those objects to exclude these ranges from the pdbx_poly_seq_scheme table, in a similar fashion to python-ihm (ihmwg/python-ihm#150).
- Loading branch information
Showing
10 changed files
with
298 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Add minimal ModelCIF-related tables to an mmCIF file. | ||
Given any mmCIF file as input, this script will add any missing | ||
ModelCIF-related tables and write out a new file that is minimally compliant | ||
with the ModelCIF dictionary. | ||
This is done by simply reading in the original file with python-modelcif and | ||
then writing it out again, so | ||
a) any data in the input file that is not understood by python-modelcif | ||
will be lost on output; and | ||
b) input files that aren't compliant with the PDBx dictionary, or that | ||
contain syntax errors or other problems, may crash or otherwise confuse | ||
python-modelcif. | ||
""" | ||
|
||
|
||
import modelcif.reader | ||
import modelcif.dumper | ||
import modelcif.model | ||
import ihm.util | ||
import os | ||
import argparse | ||
|
||
|
||
def add_modelcif_info(s): | ||
if not s.title: | ||
s.title = 'Auto-generated system' | ||
|
||
for model_group in s.model_groups: | ||
for model in model_group: | ||
model.not_modeled_residue_ranges.extend( | ||
_get_not_modeled_residues(model)) | ||
return s | ||
|
||
|
||
def _get_not_modeled_residues(model): | ||
"""Yield NotModeledResidueRange objects for all residue ranges in the | ||
Model that are not referenced by Atom, Sphere, or pre-existing | ||
NotModeledResidueRange objects""" | ||
for assem in model.assembly: | ||
asym = assem.asym if hasattr(assem, 'asym') else assem | ||
if not asym.entity.is_polymeric(): | ||
continue | ||
# Make a set of all residue indices of this asym "handled" either | ||
# by being modeled (with Atom or Sphere objects) or by being | ||
# explicitly marked as not-modeled | ||
handled_residues = set() | ||
for rr in model.not_modeled_residue_ranges: | ||
if rr.asym_unit is asym: | ||
for seq_id in range(rr.seq_id_begin, rr.seq_id_end + 1): | ||
handled_residues.add(seq_id) | ||
for atom in model._atoms: | ||
if atom.asym_unit is asym: | ||
handled_residues.add(atom.seq_id) | ||
# Convert set to a list of residue ranges | ||
handled_residues = ihm.util._make_range_from_list( | ||
sorted(handled_residues)) | ||
# Return not-modeled for each non-handled range | ||
for r in ihm.util._invert_ranges(handled_residues, | ||
end=assem.seq_id_range[1], | ||
start=assem.seq_id_range[0]): | ||
yield modelcif.model.NotModeledResidueRange(asym, r[0], r[1]) | ||
|
||
|
||
def get_args(): | ||
p = argparse.ArgumentParser( | ||
description="Add minimal ModelCIF-related tables to an mmCIF file.") | ||
p.add_argument("input", metavar="input.cif", help="input mmCIF file name") | ||
p.add_argument("output", metavar="output.cif", | ||
help="output mmCIF file name", | ||
default="output.cif", nargs="?") | ||
return p.parse_args() | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
|
||
if (os.path.exists(args.input) and os.path.exists(args.output) | ||
and os.path.samefile(args.input, args.output)): | ||
raise ValueError("Input and output are the same file") | ||
|
||
with open(args.input) as fh: | ||
with open(args.output, 'w') as fhout: | ||
modelcif.dumper.write( | ||
fhout, | ||
[add_modelcif_info(s) for s in modelcif.reader.read(fh)]) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
author='Ben Webb', | ||
author_email='[email protected]', | ||
url='https://github.com/ihmwg/python-modelcif', | ||
packages=['modelcif'], | ||
packages=['modelcif', 'modelcif.util'], | ||
install_requires=['ihm>=1.5'], | ||
classifiers=[ | ||
"Programming Language :: Python :: 2.7", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
data_PDBDEV_00000025 | ||
_entry.id PDBDEV_00000025 | ||
_struct.entry_id PDBDEV_00000025 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
data_model | ||
# | ||
_exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31' | ||
# | ||
_modeller.version 9.24 | ||
# | ||
loop_ | ||
_struct_asym.id | ||
_struct_asym.entity_id | ||
_struct_asym.details | ||
A 1 ? | ||
B 2 ? | ||
# | ||
loop_ | ||
_entity_poly_seq.entity_id | ||
_entity_poly_seq.num | ||
_entity_poly_seq.mon_id | ||
1 1 VAL | ||
1 2 GLY | ||
1 3 GLN | ||
1 4 GLN | ||
1 5 TYR | ||
1 6 SER | ||
1 7 SER | ||
2 1 ASP | ||
2 2 GLU | ||
# | ||
loop_ | ||
_atom_site.group_PDB | ||
_atom_site.type_symbol | ||
_atom_site.label_atom_id | ||
_atom_site.label_alt_id | ||
_atom_site.label_comp_id | ||
_atom_site.label_asym_id | ||
_atom_site.auth_asym_id | ||
_atom_site.label_seq_id | ||
_atom_site.auth_seq_id | ||
_atom_site.pdbx_PDB_ins_code | ||
_atom_site.Cartn_x | ||
_atom_site.Cartn_y | ||
_atom_site.Cartn_z | ||
_atom_site.occupancy | ||
_atom_site.B_iso_or_equiv | ||
_atom_site.label_entity_id | ||
_atom_site.id | ||
_atom_site.pdbx_PDB_model_num | ||
ATOM N N . VAL A A 1 2 ? 115.846 27.965 -26.370 1.000 141.830 1 1 1 | ||
ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1 | ||
ATOM C C . VAL A A 1 2 ? 113.517 27.504 -27.287 1.000 143.910 1 3 1 | ||
ATOM O O . VAL A A 1 2 ? 113.885 27.746 -28.441 1.000 146.600 1 4 1 | ||
ATOM C CB . VAL A A 1 2 ? 113.901 29.406 -25.683 1.000 143.750 1 5 1 | ||
ATOM C CG1 . VAL A A 1 2 ? 115.030 30.438 -25.931 1.000 144.590 1 6 1 | ||
ATOM C CG2 . VAL A A 1 2 ? 112.669 29.783 -26.486 1.000 144.500 1 7 1 | ||
ATOM N N . GLY A A 2 3 ? 112.371 26.869 -27.012 1.000 142.200 1 8 1 | ||
ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1 | ||
ATOM C C . GLY A A 2 3 ? 111.719 24.869 -28.275 1.000 135.820 1 10 1 | ||
ATOM O O . GLY A A 2 3 ? 110.768 24.093 -28.268 1.000 134.380 1 11 1 | ||
ATOM N N . GLN A A 3 4 ? 112.989 24.479 -28.392 1.000 134.310 1 12 1 | ||
ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1 | ||
ATOM C C . GLN A A 3 4 ? 113.556 22.956 -30.163 1.000 121.240 1 14 1 | ||
ATOM O O . GLN A A 3 4 ? 113.552 23.977 -30.840 1.000 127.090 1 15 1 | ||
ATOM C CB . GLN A A 3 4 ? 112.614 22.038 -27.919 1.000 132.340 1 16 1 | ||
ATOM C CG . GLN A A 3 4 ? 113.028 21.943 -26.407 1.000 135.370 1 17 1 | ||
ATOM C CD . GLN A A 3 4 ? 112.604 20.667 -25.677 1.000 138.260 1 18 1 | ||
ATOM O OE1 . GLN A A 3 4 ? 112.836 19.543 -26.150 1.000 141.450 1 19 1 | ||
ATOM N NE2 . GLN A A 3 4 ? 112.006 20.839 -24.497 1.000 139.310 1 20 1 | ||
ATOM N N . GLN A A 4 5 ? 113.648 21.739 -30.710 1.000 124.970 1 21 1 | ||
ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1 | ||
ATOM C C . GLN A A 4 5 ? 114.778 22.519 -32.833 1.000 112.980 1 23 1 | ||
ATOM O O . GLN A A 4 5 ? 114.677 23.727 -32.677 1.000 116.850 1 24 1 | ||
ATOM C CB . GLN A A 4 5 ? 112.456 21.545 -32.905 1.000 121.870 1 25 1 | ||
ATOM C CG . GLN A A 4 5 ? 111.763 20.153 -32.917 1.000 123.750 1 26 1 | ||
ATOM C CD . GLN A A 4 5 ? 110.863 19.874 -34.145 1.000 123.650 1 27 1 | ||
ATOM O OE1 . GLN A A 4 5 ? 110.040 20.712 -34.537 1.000 122.500 1 28 1 | ||
ATOM N NE2 . GLN A A 4 5 ? 111.008 18.674 -34.737 1.000 122.090 1 29 1 | ||
ATOM N N . SER A A 7 8 ? 117.999 25.245 -39.224 1.000 89.750 1 48 1 | ||
ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1 | ||
ATOM C C . SER A A 7 8 ? 119.224 27.089 -40.277 1.000 84.820 1 50 1 | ||
ATOM O O . SER A A 7 8 ? 120.074 27.594 -41.008 1.000 84.020 1 51 1 | ||
ATOM C CB . SER A A 7 8 ? 119.112 24.859 -41.383 1.000 88.180 1 52 1 | ||
ATOM O OG . SER A A 7 8 ? 117.956 25.221 -42.117 1.000 88.850 1 53 1 | ||
ATOM N N . ASP B B 1 3 ? 71.339 57.678 52.031 1.000 152.010 2 54 1 | ||
ATOM C CA . ASP B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1 | ||
ATOM C C . ASP B B 1 3 ? 70.144 58.821 50.222 1.000 151.960 2 56 1 | ||
ATOM O O . ASP B B 1 3 ? 70.984 59.245 49.435 1.000 151.590 2 57 1 | ||
ATOM C CB . ASP B B 1 3 ? 71.083 60.142 52.119 1.000 153.250 2 58 1 | ||
ATOM C CG . ASP B B 1 3 ? 71.660 60.105 53.526 1.000 154.120 2 59 1 | ||
ATOM O OD1 . ASP B B 1 3 ? 72.652 59.371 53.741 1.000 154.200 2 60 1 | ||
ATOM O OD2 . ASP B B 1 3 ? 71.119 60.804 54.415 1.000 154.250 2 61 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
data_PDBDEV_00000025 | ||
_entry.id PDBDEV_00000025 | ||
_struct.entry_id PDBDEV_00000025 | ||
_struct.title 'Architecture of Pol II(G) and molecular mechanism of transcription regulation by Gdown1' |
Oops, something went wrong.