diff --git a/prody/proteins/ciffile.py b/prody/proteins/ciffile.py index 921736ac0..a0e310047 100644 --- a/prody/proteins/ciffile.py +++ b/prody/proteins/ciffile.py @@ -19,6 +19,8 @@ from .cifheader import getCIFHeaderDict from .header import buildBiomolecules, assignSecstr, isHelix, isSheet +from string import ascii_uppercase + __all__ = ['parseMMCIFStream', 'parseMMCIF', 'parseCIF'] @@ -300,6 +302,7 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset, doneAtomBlock = False start = 0 stop = 0 + warnedAltloc = False while not doneAtomBlock: line = lines[i] if line[:11] == '_atom_site.': @@ -431,7 +434,7 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset, continue alt = line.split()[fields['label_alt_id']] - if alt not in which_altlocs and which_altlocs != 'all': + if not (alt in which_altlocs or ascii_uppercase[int(alt)-1] in which_altlocs) and which_altlocs != 'all': continue if alt == '.': @@ -505,12 +508,8 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset, anisou = None siguij = None - try: - data = parseSTARSection(lines, "_atom_site_anisotrop") - x = data[0] # check if data has anything in it - except IndexError: - LOGGER.warn("No anisotropic B factors found") - else: + data = parseSTARSection(lines, "_atom_site_anisotrop", report=False) + if len(data) > 0: anisou = np.zeros((acount, 6), dtype=float) diff --git a/prody/proteins/cifheader.py b/prody/proteins/cifheader.py index 962a86299..adf4fb99b 100644 --- a/prody/proteins/cifheader.py +++ b/prody/proteins/cifheader.py @@ -178,8 +178,8 @@ def _getBiomoltrans(lines): # 2 blocks are needed for this: # _pdbx_struct_assembly_gen: what to apply to which chains # _pdbx_struct_oper_list: everything else - data1 = parseSTARSection(lines, '_pdbx_struct_assembly_gen') - data2 = parseSTARSection(lines, '_pdbx_struct_oper_list') + data1 = parseSTARSection(lines, '_pdbx_struct_assembly_gen', report=False) + data2 = parseSTARSection(lines, '_pdbx_struct_oper_list', report=False) # extracting the data for n, item1 in enumerate(data1): @@ -225,7 +225,7 @@ def _getRelatedEntries(lines): try: key = "_pdbx_database_related" - data = parseSTARSection(lines, key) + data = parseSTARSection(lines, key, report=False) for item in data: dbref = DBRef() dbref.accession = item[key + ".db_id"] @@ -715,8 +715,8 @@ def _getReference(lines): # JRNL double block. Blocks 6 and 7 as copied from COMPND # Block 1 has most info. Block 2 has author info - items1 = parseSTARSection(lines, "_citation") - items2 = parseSTARSection(lines, "_citation_author") + items1 = parseSTARSection(lines, "_citation", report=False) + items2 = parseSTARSection(lines, "_citation_author", report=False) for row in items1: for k, value in row.items(): @@ -767,7 +767,7 @@ def _getPolymers(lines): entities = defaultdict(list) # SEQRES block - items1 = parseSTARSection(lines, '_entity_poly') + items1 = parseSTARSection(lines, '_entity_poly', report=False) for item in items1: chains = item['_entity_poly.pdbx_strand_id'] @@ -781,7 +781,7 @@ def _getPolymers(lines): '_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split()) # DBREF block 1 - items2 = parseSTARSection(lines, '_struct_ref') + items2 = parseSTARSection(lines, '_struct_ref', report=False) for item in items2: entity = item["_struct_ref.id"] @@ -798,7 +798,7 @@ def _getPolymers(lines): poly.dbrefs.append(dbref) # DBREF block 2 - items3 = parseSTARSection(lines, "_struct_ref_seq") + items3 = parseSTARSection(lines, "_struct_ref_seq", report=False) for i, item in enumerate(items3): i += 1 @@ -884,7 +884,7 @@ def _getPolymers(lines): last = temp # MODRES block - data4 = parseSTARSection(lines, "_pdbx_struct_mod_residue") + data4 = parseSTARSection(lines, "_pdbx_struct_mod_residue", report=False) for data in data4: ch = data["_pdbx_struct_mod_residue.label_asym_id"] @@ -904,7 +904,7 @@ def _getPolymers(lines): data["_pdbx_struct_mod_residue.details"])) # SEQADV block - data5 = parseSTARSection(lines, "_struct_ref_seq_dif") + data5 = parseSTARSection(lines, "_struct_ref_seq_dif", report=False) for i, data in enumerate(data5): ch = data["_struct_ref_seq_dif.pdbx_pdb_strand_id"] @@ -964,8 +964,8 @@ def _getPolymers(lines): # COMPND double block. # Block 6 has most info. Block 7 has synonyms - data6 = parseSTARSection(lines, "_entity") - data7 = parseSTARSection(lines, "_entity_name_com") + data6 = parseSTARSection(lines, "_entity", report=False) + data7 = parseSTARSection(lines, "_entity_name_com", report=False) dict_ = {} for molecule in data6: @@ -1045,7 +1045,7 @@ def _getChemicals(lines): # 1st block we need is has info about location in structure # this instance only includes single sugars not branched structures - items = parseSTARSection(lines, "_pdbx_nonpoly_scheme") + items = parseSTARSection(lines, "_pdbx_nonpoly_scheme", report=False) for data in items: resname = data["_pdbx_nonpoly_scheme.mon_id"] @@ -1064,7 +1064,7 @@ def _getChemicals(lines): chemicals[chem.resname].append(chem) # next we get the equivalent one for branched sugars part - items = parseSTARSection(lines, "_pdbx_branch_scheme") + items = parseSTARSection(lines, "_pdbx_branch_scheme", report=False) for data in items: resname = data["_pdbx_branch_scheme.mon_id"] @@ -1080,7 +1080,7 @@ def _getChemicals(lines): chemicals[chem.resname].append(chem) # 2nd block to get has general info e.g. name and formula - items = parseSTARSection(lines, "_chem_comp") + items = parseSTARSection(lines, "_chem_comp", report=False) for data in items: resname = data["_chem_comp.id"] @@ -1155,7 +1155,7 @@ def _getTitle(lines): title = '' try: - data = parseSTARSection(lines, "_struct") + data = parseSTARSection(lines, "_struct", report=False) for item in data: title += item['_struct.title'].upper() except: @@ -1172,7 +1172,7 @@ def _getAuthors(lines): authors = [] try: - data = parseSTARSection(lines, "_audit_author") + data = parseSTARSection(lines, "_audit_author", report=False) for item in data: author = ''.join(item['_audit_author.name'].split(', ')[::-1]) authors.append(author.upper()) @@ -1192,7 +1192,7 @@ def _getSplit(lines): key = "_pdbx_database_related" try: - data, _ = parseSTARSection(lines, key) + data, _ = parseSTARSection(lines, key, report=False) for item in data: if item[key + '.content_type'] == 'split': split.append(item[key + '.db_id']) @@ -1227,7 +1227,7 @@ def _getOther(lines, key=None): data = [] try: - data = parseSTARSection(lines, key) + data = parseSTARSection(lines, key, report=False) except: pass @@ -1242,7 +1242,7 @@ def _getUnobservedSeq(lines): key_unobs = '_pdbx_unobs_or_zero_occ_residues' try: - unobs = parseSTARSection(lines, key_unobs) + unobs = parseSTARSection(lines, key_unobs, report=False) polymers = _getPolymers(lines) except: pass diff --git a/prody/proteins/mmtffile.py b/prody/proteins/mmtffile.py index 66896c282..0480e7045 100644 --- a/prody/proteins/mmtffile.py +++ b/prody/proteins/mmtffile.py @@ -323,6 +323,9 @@ def set_info(atomgroup, mmtf_data,get_bonds=False,altloc_sel='A'): if altloc_sel != 'all': #mask out any unwanted alternative locations mask = (altlocs == '') | (altlocs == altloc_sel) + + if np.all(mask == False): + mask = (altlocs == '') | (altlocs == altlocs[0]) atomgroup.setCoords(coords[:,mask]) atomgroup.setNames(atom_names[mask]) diff --git a/prody/proteins/starfile.py b/prody/proteins/starfile.py index a527daee6..4cd24cccb 100644 --- a/prody/proteins/starfile.py +++ b/prody/proteins/starfile.py @@ -1026,7 +1026,7 @@ def parseImagesFromSTAR(particlesSTAR, **kwargs): return np.array(images), parsed_images_data -def parseSTARSection(lines, key): +def parseSTARSection(lines, key, report=True): """Parse a section of data from *lines* from a STAR file corresponding to a *key* (part before the dot). This can be a loop or data block. @@ -1077,7 +1077,8 @@ def parseSTARSection(lines, key): else: data = [loop_dict["data"]] else: - LOGGER.warn("Could not find {0} in lines.".format(key)) + if report: + LOGGER.warn("Could not find {0} in lines.".format(key)) return [] return data