Merge pull request prody#1860 from jamesmkrieger/pqr_fix

split pqr coords by line positions when needed
jamesmkrieger · Aug 22, 2024 · 70072a1 · 70072a1
2 parents f40d8cd + dde1bea
commit 70072a1
Show file tree

Hide file tree

Showing 8 changed files with 213 additions and 18 deletions.
diff --git a/prody/proteins/pdbfile.py b/prody/proteins/pdbfile.py
@@ -490,11 +490,10 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
         elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
         bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
         occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)
-        anisou = None
         siguij = None
     else:
         radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype)
-
+    anisou = None
     asize = 2000 # increase array length by this much when needed
 
     start = split
@@ -537,19 +536,7 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
     dec = True
     while i < stop:
         line = lines[i]
-        if not isPDB:
-            fields = line.split()
-            if len(fields) == 10:
-                fields.insert(4, '')
-            elif len(fields) != 11:
-                LOGGER.warn('wrong number of fields for PQR format at line %d'%i)
-                i += 1
-                continue
-
-        if isPDB:
-            startswith = line[0:6].strip()
-        else:
-            startswith = fields[0]
+        startswith = line[0:6].strip()
 
         if startswith == 'ATOM' or startswith == 'HETATM':
             if isPDB:
@@ -560,6 +547,17 @@ def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                 else:
                     resname = line[17:20].strip()
             else:
+                fields = line.split()
+                if fields[5].find('.') != -1:
+                    # coords too early as no chid
+                    fields.insert(4, '')
+                if len(fields) != 11:
+                    try:
+                        fields = fields[:6] + [line[30:38].strip(), line[38:46].strip(), line[46:54].strip()] + line[54:].split()
+                    except:
+                        LOGGER.warn('wrong number of fields for PQR format at line %d'%i)
+                        i += 1
+                        continue
                 atomname= fields[2]
                 resname = fields[3]
 
@@ -1689,7 +1687,10 @@ def writePQRStream(stream, atoms, **kwargs):
     write = stream.write
 
     calphas = atoms.ca
-    ssa = calphas.getSecstrs()
+    if calphas is not None:
+        ssa = calphas.getSecstrs()
+    else:
+        ssa = None
     helix = []
     sheet = []
     if ssa is not None:

diff --git a/prody/tests/datafiles/__init__.py b/prody/tests/datafiles/__init__.py
@@ -381,6 +381,26 @@
         'file': 'pdb7pbl.pdb',
         'atoms': 31396,
         'nucleoside': 252
+    },
+    'pqrUnknown': {
+        'file': 'pqr_snippet1.pqr',
+        'atoms': 5,
+        'models': 1
+    },
+    'pqrTranscomp': {
+        'file': 'pqr_snippet2_transcomp.pqr',
+        'atoms': 5,
+        'models': 1
+    },
+    'pqrFpocket': {
+        'file': 'pqr_snippet3_fpocket.pqr',
+        'atoms': 5,
+        'models': 1
+    },
+    'pqrPymol': {
+        'file': 'pqr_snippet4_pymol.pqr',
+        'atoms': 5,
+        'models': 1
     }
 }
 

diff --git a/prody/tests/datafiles/pqr_snippet1.pqr b/prody/tests/datafiles/pqr_snippet1.pqr
@@ -0,0 +1,5 @@
+ATOM      1    C STP     1     -26.417  62.269-123.283    0.00     3.30
+ATOM      2    O STP     1     -30.495  65.669-122.669    0.00     3.08
+ATOM      3    O STP     1     -25.792  61.516-124.085    0.00     3.32
+ATOM      4    O STP     1     -25.262  61.506-124.230    0.00     3.05
+ATOM      5    O STP     1     -30.521  65.070-122.439    0.00     3.05
diff --git a/prody/tests/datafiles/pqr_snippet2_transcomp.pqr b/prody/tests/datafiles/pqr_snippet2_transcomp.pqr
@@ -0,0 +1,5 @@
+ATOM      1  N   VAL     1      16.783  48.812  26.447  0.0577   1.550
+ATOM      2  H1  VAL     1      15.848  48.422  26.463  0.2272   1.200
+ATOM      3  H2  VAL     1      16.734  49.803  26.251  0.2272   1.200
+ATOM      4  H3  VAL     1      17.195  48.663  27.359  0.2272   1.200
+ATOM      5  CA  VAL     1      17.591  48.101  25.416 -0.0054   1.700
diff --git a/prody/tests/datafiles/pqr_snippet3_fpocket.pqr b/prody/tests/datafiles/pqr_snippet3_fpocket.pqr
@@ -0,0 +1,5 @@
+ATOM      1    C STP     1     -26.417  62.269-123.283    0.00     3.30
+ATOM      2    O STP     1     -30.495  65.669-122.669    0.00     3.08
+ATOM      3    O STP     1     -25.792  61.516-124.085    0.00     3.32
+ATOM      4    O STP     1     -25.262  61.506-124.230    0.00     3.05
+ATOM      5    O STP     1     -30.521  65.070-122.439    0.00     3.05
diff --git a/prody/tests/datafiles/pqr_snippet4_pymol.pqr b/prody/tests/datafiles/pqr_snippet4_pymol.pqr
@@ -0,0 +1,5 @@
+ATOM     29  P     G    11     -17.189  -6.642 -23.827  0.00000000   0.000
+ATOM     30  C5'   G    11     -15.744  -5.986 -25.911  0.00000000   0.000
+ATOM     31  O5'   G    11     -16.783  -5.642 -25.005  0.00000000   0.000
+ATOM     32  C4'   G    11     -15.722  -5.012 -27.074  0.00000000   0.000
+ATOM     33  O4'   G    11     -16.947  -5.133 -27.838  0.00000000   0.000
diff --git a/prody/tests/proteins/test_pdbfile.py b/prody/tests/proteins/test_pdbfile.py
@@ -19,7 +19,7 @@
 class TestParsePDB(unittest.TestCase):
 
     def setUp(self):
-        """Set PDB file data and parse the PDB file."""
+        """Set PDB file data."""
 
         self.pdb = DATA_FILES['multi_model_truncated']
         self.one = DATA_FILES['oneatom']
@@ -108,14 +108,22 @@ def testSubsetArgument(self):
                         'failed to parse correct number of "bb" atoms')
 
     def testAgArgument(self):
-        """Test outcome of valid and invalid *ag* arguments."""
+        """Test outcome of 2 invalid and 2 valid *ag* arguments."""
 
         path = pathDatafile(self.pdb['file'])
         self.assertRaises(TypeError, parsePDB, path, ag='AtomGroup')
+
         ag = prody.AtomGroup('One atom')
         ag.setCoords(np.array([[0., 0., 0.]]))
         self.assertRaises(ValueError, parsePDB, path, ag=ag)
+
+        ag = prody.AtomGroup('Test')
+        self.assertEqual(parsePDB(path, ag=ag).numAtoms(),
+            self.pdb['atoms'],
+            'parsePDB failed to parse correct number of atoms')
+
         ag = prody.AtomGroup('Test')
+        ag.setCoords(np.array([[0., 0., 0.]]*self.pdb['atoms']))
         self.assertEqual(parsePDB(path, ag=ag).numAtoms(),
             self.pdb['atoms'],
             'parsePDB failed to parse correct number of atoms')

diff --git a/prody/tests/proteins/test_pqrfile.py b/prody/tests/proteins/test_pqrfile.py
@@ -0,0 +1,146 @@
+"""This module contains unit tests for :mod:`~prody.proteins`."""
+
+import os
+
+import numpy as np
+from numpy.testing import *
+
+from prody.utilities import importDec
+dec = importDec()
+
+from prody import *
+from prody import LOGGER
+from prody.utilities import which
+from prody.tests import TEMPDIR, unittest
+from prody.tests.datafiles import *
+
+LOGGER.verbosity = 'none'
+
+class TestParsePRQ(unittest.TestCase):
+
+    def setUp(self):
+        """Set PQR file data."""
+
+        self.pqr1 = DATA_FILES['pqrUnknown']
+        self.pqr2 = DATA_FILES['pqrTranscomp']
+        self.pqr3 = DATA_FILES['pqrFpocket']
+        self.pqr4 = DATA_FILES['pqrPymol']
+
+    def testExamplePQR(self):
+        """Test the outcome of a simple parsing scenario with example 1 (unnamed)."""
+
+        path = pathDatafile(self.pqr1['file'])
+        ag = parsePQR(path)
+
+        self.assertIsInstance(ag, prody.AtomGroup,
+            'parsePQR failed to return an AtomGroup instance')
+
+        self.assertEqual(ag.numAtoms(), self.pqr1['atoms'],
+            'parsePQR failed to parse correct number of atoms')
+
+        self.assertEqual(ag.numCoordsets(), self.pqr1['models'],
+            'parsePQR failed to parse correct number of coordinate sets '
+            '(models)')
+
+        self.assertEqual(ag.getTitle(),
+             os.path.splitext(self.pqr1['file'])[0],
+            'failed to set AtomGroup title based on filename')
+
+    def testTranscompPQR(self):
+        """Test the outcome of a simple parsing scenario with transcomp example."""
+
+        path = pathDatafile(self.pqr2['file'])
+        ag = parsePQR(path)
+
+        self.assertIsInstance(ag, prody.AtomGroup,
+            'parsePQR failed to return an AtomGroup instance')
+
+        self.assertEqual(ag.numAtoms(), self.pqr2['atoms'],
+            'parsePQR failed to parse correct number of atoms')
+
+        self.assertEqual(ag.numCoordsets(), self.pqr2['models'],
+            'parsePQR failed to parse correct number of coordinate sets '
+            '(models)')
+
+        self.assertEqual(ag.getTitle(),
+             os.path.splitext(self.pqr2['file'])[0],
+            'failed to set AtomGroup title based on filename')
+
+    def testFpocketPQR(self):
+        """Test the outcome of a simple parsing scenario with example 1 (unnamed)."""
+
+        path = pathDatafile(self.pqr3['file'])
+        ag = parsePQR(path)
+
+        self.assertIsInstance(ag, prody.AtomGroup,
+            'parsePQR failed to return an AtomGroup instance')
+
+        self.assertEqual(ag.numAtoms(), self.pqr3['atoms'],
+            'parsePQR failed to parse correct number of atoms')
+
+        self.assertEqual(ag.numCoordsets(), self.pqr3['models'],
+            'parsePQR failed to parse correct number of coordinate sets '
+            '(models)')
+
+        self.assertEqual(ag.getTitle(),
+             os.path.splitext(self.pqr3['file'])[0],
+            'failed to set AtomGroup title based on filename')
+
+    def testPymolPQR(self):
+        """Test the outcome of a simple parsing scenario with example 1 (unnamed)."""
+
+        path = pathDatafile(self.pqr4['file'])
+        ag = parsePQR(path)
+
+        self.assertIsInstance(ag, prody.AtomGroup,
+            'parsePQR failed to return an AtomGroup instance')
+
+        self.assertEqual(ag.numAtoms(), self.pqr4['atoms'],
+            'parsePQR failed to parse correct number of atoms')
+
+        self.assertEqual(ag.numCoordsets(), self.pqr4['models'],
+            'parsePQR failed to parse correct number of coordinate sets '
+            '(models)')
+
+        self.assertEqual(ag.getTitle(),
+             os.path.splitext(self.pqr4['file'])[0],
+            'failed to set AtomGroup title based on filename')
+
+    def testTitleArgument(self):
+        """Test outcome of *title* argument."""
+
+        path = pathDatafile(self.pqr1['file'])
+        title = 'small protein'
+        self.assertEqual(parsePQR(path, title=title).getTitle(),
+             title, 'parsePQR failed to set user given title')
+
+    def testSubsetArgument(self):
+        """Test outcome of valid and invalid *subset* arguments."""
+
+        path = pathDatafile(self.pqr2['file'])
+        self.assertRaises(TypeError, parsePQR, path, subset=['A'])
+        self.assertEqual(parsePQR(path, subset='ca').numAtoms(), 1,
+                        'failed to parse correct number of "ca" atoms')
+        self.assertEqual(parsePQR(path, subset='bb').numAtoms(), 2,
+                        'failed to parse correct number of "bb" atoms')
+
+    def testAgArgument(self):
+        """Test outcome of 2 invalid and 2 valid *ag* arguments."""
+
+        path = pathDatafile(self.pqr1['file'])
+        self.assertRaises(TypeError, parsePQR, path, ag='AtomGroup')
+
+        ag = prody.AtomGroup('One atom')
+        ag.setCoords(np.array([[0., 0., 0.]]))
+        self.assertRaises(ValueError, parsePQR, path, ag=ag)
+
+        ag = prody.AtomGroup('Test')
+        self.assertEqual(parsePQR(path, ag=ag).numAtoms(),
+            self.pqr1['atoms'],
+            'parsePQR failed to parse correct number of atoms')
+
+        ag = prody.AtomGroup('Test')
+        ag.setCoords(np.array([[0., 0., 0.]]*5))
+        self.assertEqual(parsePQR(path, ag=ag).numAtoms(),
+            self.pqr1['atoms'],
+            'parsePQR failed to parse correct number of atoms')