Merge pull request prody#1791 from jamesmkrieger/find_cif_emd

look for cif and emd in path
jamesmkrieger · Nov 8, 2023 · 0a0c500 · 0a0c500
2 parents 2658cbe + 53fbbc8
commit 0a0c500
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 10 deletions.
diff --git a/prody/proteins/ciffile.py b/prody/proteins/ciffile.py
@@ -88,7 +88,7 @@ def parseMMCIF(pdb, **kwargs):
     auto_bonds = SETTINGS.get('auto_bonds')
     get_bonds = kwargs.get('bonds', auto_bonds)
     if get_bonds:
-        LOGGER.warn('Parsing struct_conn information from mmCIF is current unsupported and no bond information is added to the results')
+        LOGGER.warn('Parsing struct_conn information from mmCIF is currently unsupported and no bond information is added to the results')
     if not os.path.isfile(pdb):
         if len(pdb) == 5 and pdb.isalnum():
             if chain is None:
@@ -107,8 +107,12 @@ def parseMMCIF(pdb, **kwargs):
 
             if os.path.isfile(pdb + '.cif'):
                 filename = pdb + '.cif'
+                LOGGER.debug('CIF file is found in working directory ({0}).'
+                            .format(filename))
             elif os.path.isfile(pdb + '.cif.gz'):
                 filename = pdb + '.cif.gz'
+                LOGGER.debug('CIF file is found in working directory ({0}).'
+                            .format(filename))
             else:
                 filename = fetchPDB(pdb, report=True,
                                     format='cif', compressed=False)

diff --git a/prody/proteins/emdfile.py b/prody/proteins/emdfile.py
@@ -72,8 +72,12 @@ def parseEMD(emd, **kwargs):
 
             if os.path.isfile(emd + '.map'):
                 filename = emd + '.map'
+                LOGGER.debug('EMD file is found in working directory ({0}).'
+                            .format(filename))
             elif os.path.isfile(emd + '.map.gz'):
                 filename = emd + '.map.gz'
+                LOGGER.debug('EMD file is found in working directory ({0}).'
+                            .format(filename))
             else:
                 filename = fetchPDB(emd, report=True,
                                     format='emd', compressed=False)
@@ -91,6 +95,14 @@ def parseEMD(emd, **kwargs):
     result = parseEMDStream(emdStream, **kwargs)
     emdStream.close()
 
+    if hasattr(result, 'numAtoms'):
+        LOGGER.info('Output is an AtomGroup with {0} atoms fitted.'.format(result.numAtoms()))
+    elif hasattr(result, 'apix'):
+        LOGGER.info('Output is an EMDMAP with {:4.2f} A/pix.'.format(result.apix[0]))
+    else:
+        LOGGER.warn('Atomic data could not be parsed, please '
+                    'check the input file.')
+
     return result
 
 
@@ -128,8 +140,6 @@ def parseEMDStream(stream, **kwargs):
     else:
         make_nodes = False
         map = True
-        LOGGER.info('As n_nodes is less than or equal to 0, no nodes will be'
-                    ' made and the raw map will be returned')
 
     emd = EMDMAP(stream, min_cutoff, max_cutoff)
 

diff --git a/prody/proteins/localpdb.py b/prody/proteins/localpdb.py
@@ -212,16 +212,15 @@ def fetchPDB(*pdb, **kwargs):
     if len(pdb) == 1 and isinstance(pdb[0], list):
         pdb = pdb[0]
 
-    if 'format' in kwargs and kwargs.get('format') != 'pdb':
-        return fetchPDBviaFTP(*pdb, **kwargs)
-
     identifiers = checkIdentifiers(*pdb)
 
     folder = kwargs.get('folder', '.')
     compressed = kwargs.get('compressed')
+    format_ = kwargs.get('format')
 
     # check *folder* specified by the user, usually pwd ('.')
-    filedict = findPDBFiles(folder, compressed=compressed)
+    filedict = findPDBFiles(folder, compressed=compressed, 
+                            format=format_)
 
     filenames = []
     not_found = []
@@ -240,8 +239,8 @@ def fetchPDB(*pdb, **kwargs):
         if len(filenames) == 1:
             filenames = filenames[0]
             if exists:
-                LOGGER.debug('PDB file is found in working directory ({0}).'
-                             .format(sympath(filenames)))
+                LOGGER.debug('{0} file is found in working directory ({1}).'
+                             .format(format_.upper(), sympath(filedict[pdb])))
         return filenames
 
     if not isWritable(folder):
@@ -414,6 +413,8 @@ def iterPDBFilenames(path=None, sort=False, unique=True, **kwargs):
 
     from re import compile, IGNORECASE
 
+    format = kwargs.get('format')
+
     if path is None or kwargs.get('mirror') is True:
         if path is None:
             path = pathPDBMirror()
@@ -436,10 +437,20 @@ def iterPDBFilenames(path=None, sort=False, unique=True, **kwargs):
         compressed = kwargs.get('compressed')
         if compressed is None:
             pdbext = compile('\.(pdb|ent)(\.gz)?$', IGNORECASE)
+            cifext = compile('\.(cif)(\.gz)?$', IGNORECASE)
+            emdext = compile('\.(emd|map|mrc)(\.gz)?$', IGNORECASE)
         elif compressed:
             pdbext = compile('\.(pdb|ent)\.gz$', IGNORECASE)
+            cifext = compile('\.(cif)\.gz$', IGNORECASE)
+            emdext = compile('\.(emd|map|mrc)\.gz$', IGNORECASE)
         else:
             pdbext = compile('\.(pdb|ent)$', IGNORECASE)
+            cifext = compile('\.(cif)$', IGNORECASE)
+            emdext = compile('\.(emd|map|mrc)$', IGNORECASE)
+        if format == 'cif':
+            pdbext = cifext
+        if format == 'emd':
+            pdbext = emdext
         pdbs = [pdb for pdb in iglob(join(path, '*')) if pdbext.search(pdb)]
         if sort:
             pdbs.sort(reverse=kwargs.get('reverse'))
@@ -476,7 +487,7 @@ def findPDBFiles(path, case=None, **kwargs):
         pdb = splitext(split(fn)[1])[0]
         ending = splitext(splitext(split(fn)[1])[0])[1]
         if ending == 'gz':
-            pdb = splittext(pdb)[0]
+            pdb = splitext(pdb)[0]
         if len(pdb) == 7 and pdb.startswith('pdb'):
             pdb = pdb[3:]
         if upper: