Skip to content

Commit

Permalink
Merge pull request prody#1865 from jamesmkrieger/bioexcel_cv19
Browse files Browse the repository at this point in the history
Bioexcel cv19
  • Loading branch information
jamesmkrieger authored Apr 17, 2024
2 parents 29610e7 + 9b8a9aa commit b7b2be2
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 109 deletions.
31 changes: 16 additions & 15 deletions prody/database/bioexcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,21 +192,17 @@ def parseBioexcelTopology(query, **kwargs):
ag._n_csets = 1
ag._acsi = 0

nodes = ag.select('name N')
indices = np.ix_(*[np.array(data['atom_residue_indices'])])

residue_chids = [data['chain_names'][chain_index] for chain_index in data['residue_chain_indices']]
chids, _ = extendAtomicData(residue_chids, nodes, ag)
ag.setChids(chids)
chids = np.array([data['chain_names'][chain_index]
for chain_index in data['residue_chain_indices']])
ag.setChids(chids[indices])

resnames, _ = extendAtomicData(data['residue_names'], nodes, ag)
ag.setResnames(resnames)

resnums, _ = extendAtomicData(data['residue_numbers'], nodes, ag)
ag.setResnums(resnums)
ag.setResnames(np.array(data['residue_names'])[indices])
ag.setResnums(np.array(data['residue_numbers'])[indices])

if data['residue_icodes'] is not None:
icodes, _ = extendAtomicData(data['residue_icodes'], nodes, ag)
ag.setIcodes(icodes)
ag.setIcodes(np.array(data['residue_icodes'])[indices])

# restore acsi and n_csets to defaults
ag._acsi = None
Expand Down Expand Up @@ -261,8 +257,13 @@ def parseBioexcelPDB(query, **kwargs):
ag = parsePDB(filename)
if ag is None:
filename = fetchBioexcelPDB(query, **kwargs)
ag = parsePDB(filename)

acc = basename(splitext(filename)[0])
ag2 = parseBioexcelTopology(acc, **kwargs)

return parsePDB(filename)
ag.setElements(ag2.getElements())
return ag

def convertXtcToDcd(filepath, **kwargs):
"""Convert xtc trajectories to dcd files using mdtraj.
Expand Down Expand Up @@ -320,7 +321,7 @@ def requestFromUrl(url, timeout, filepath, source=None):
fo.write(response)
fo.close()

top = mdtraj.load_psf(fetchBioexcelTopology(acc))
top = mdtraj.load_psf(fetchBioexcelTopology(acc, timeout=timeout))
mdtraj.load_xtc(filepath, top=top)

elif source == 'pdb':
Expand All @@ -339,7 +340,7 @@ def requestFromUrl(url, timeout, filepath, source=None):
else:
break

sleep = 20 if int(sleep * 1.5) >= 20 else int(sleep * 1.5)
sleep = 100 if int(sleep * 1.5) >= 100 else int(sleep * 1.5)
LOGGER.sleep(int(sleep), '. Trying to reconnect...')

return filepath
Expand Down Expand Up @@ -373,7 +374,7 @@ def checkConvert(**kwargs):
return convert

def checkTimeout(**kwargs):
timeout = kwargs.get('timeout', 60)
timeout = kwargs.get('timeout', 200)
if not isinstance(timeout, (Number, type(None))):
raise TypeError('timeout should be number')
return timeout
Expand Down
222 changes: 128 additions & 94 deletions prody/tests/database/test_bioexcel.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def testReplace(self):
self.assertEqual(checkTimeout(**{'timeout': 50}), 50)

def testDefault(self):
self.assertEqual(checkTimeout(**{}), 60)
self.assertEqual(checkTimeout(**{}), 200)

class TestCheckFrames(unittest.TestCase):
"""Test that checkFrames gives the right errors and outputs."""
Expand Down Expand Up @@ -415,20 +415,23 @@ def testFetchFrames1(self):
"""Test the outcome of a simple fetch scenario using
default options."""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)

self.assertIsInstance(a, str,
'fetchBioexcelTrajectory failed to return a str instance')

self.assertTrue(os.path.isfile(a),
'fetchBioexcelTrajectory failed to return a file')

self.assertTrue(a.endswith('.dcd'),
'fetchBioexcelTrajectory default failed to return a dcd file')

self.assertEqual(a, os.path.join(self.workdir, self.query + '.dcd'),
'fetchBioexcelTrajectory default run did not give the right path')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)
except OSError:
pass
else:
self.assertIsInstance(a, str,
'fetchBioexcelTrajectory failed to return a str instance')

self.assertTrue(os.path.isfile(a),
'fetchBioexcelTrajectory failed to return a file')

self.assertTrue(a.endswith('.dcd'),
'fetchBioexcelTrajectory default failed to return a dcd file')

self.assertEqual(a, os.path.join(self.workdir, self.query + '.dcd'),
'fetchBioexcelTrajectory default run did not give the right path')

ens = prody.parseDCD(a)

Expand All @@ -443,108 +446,126 @@ def testFetchSelectionFrames2(self):
"""Test the outcome of a simple fetch scenario
using selection='_C'."""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
selection='_C', frames=self.frames2)

ens = prody.parseDCD(a)
self.assertIsInstance(ens, prody.Ensemble,
'parseDCD failed to return an Ensemble from fetchBioexcelTrajectory')
self.assertEqual(ens.numAtoms(), SELE_N_ATOMS,
'fetchBioexcelTrajectory selection _C output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_2,
'fetchBioexcelTrajectory output with example frames 2 does not have correct number of frames')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
selection='_C', frames=self.frames2)
except OSError:
pass
else:
ens = prody.parseDCD(a)
self.assertIsInstance(ens, prody.Ensemble,
'parseDCD failed to return an Ensemble from fetchBioexcelTrajectory')
self.assertEqual(ens.numAtoms(), SELE_N_ATOMS,
'fetchBioexcelTrajectory selection _C output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_2,
'fetchBioexcelTrajectory output with example frames 2 does not have correct number of frames')

def testFetchConvertFalse(self):
"""Test the outcome of a simple fetch scenario using
convert=False."""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)

self.assertIsInstance(a, str,
'fetchBioexcelTrajectory failed to return a str instance')

self.assertTrue(os.path.isfile(a),
'fetchBioexcelTrajectory failed to return a file')

self.assertTrue(a.endswith('.xtc'),
'fetchBioexcelTrajectory default failed to return a xtc file')

self.assertEqual(a, os.path.join(self.workdir, self.query + '.xtc'),
'fetchBioexcelTrajectory default run did not give the right path')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)
except OSError:
pass
else:
self.assertIsInstance(a, str,
'fetchBioexcelTrajectory failed to return a str instance')

self.assertTrue(os.path.isfile(a),
'fetchBioexcelTrajectory failed to return a file')

self.assertTrue(a.endswith('.xtc'),
'fetchBioexcelTrajectory default failed to return a xtc file')

self.assertEqual(a, os.path.join(self.workdir, self.query + '.xtc'),
'fetchBioexcelTrajectory default run did not give the right path')

def testParseFrames1(self):
"""Test the outcome of a simple fetch and parse scenario
with default parameters."""

ens = parseBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')
try:
ens = parseBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)
except OSError:
pass
else:
self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')

def testParseSelectionFrames2(self):
"""Test the outcome of a simple fetch and parse scenario
using selection='_C'."""

ens = parseBioexcelTrajectory(self.query, folder=self.workdir,
selection='_C', frames=self.frames2)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory with selection failed to return an Ensemble')
self.assertEqual(ens.numAtoms(), SELE_N_ATOMS,
'parseBioexcelTrajectory selection _C output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_2,
'parseBioexcelTrajectory output with example frames 2 does not have correct number of frames')
try:
ens = parseBioexcelTrajectory(self.query, folder=self.workdir,
selection='_C', frames=self.frames2)
except OSError:
pass
else:
self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory with selection failed to return an Ensemble')
self.assertEqual(ens.numAtoms(), SELE_N_ATOMS,
'parseBioexcelTrajectory selection _C output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_2,
'parseBioexcelTrajectory output with example frames 2 does not have correct number of frames')

def testFetchAndParse(self):
"""Test the outcome of a simple fetch and parse scenario"""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)

ens = parseBioexcelTrajectory(a, folder=self.workdir)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
frames=self.frames1)
except OSError:
pass
else:
ens = parseBioexcelTrajectory(a, folder=self.workdir)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')

def testFetchNoConvParse(self):
"""Test the outcome of a simple fetch, then internally convert and parse scenario."""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)

ens = parseBioexcelTrajectory(a)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)
except OSError:
pass
else:
ens = parseBioexcelTrajectory(a)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')

def testFetchConvParse(self):
"""Test the outcome of a simple fetch, externally convert and then parse scenario."""

a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)
b = convertXtcToDcd(a)
ens = parseBioexcelTrajectory(b)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')
try:
a = fetchBioexcelTrajectory(self.query, folder=self.workdir,
convert=False, frames=self.frames1)
except OSError:
pass
else:
b = convertXtcToDcd(a)
ens = parseBioexcelTrajectory(b)

self.assertIsInstance(ens, prody.Ensemble,
'parseBioexcelTrajectory failed to return an Ensemble instance')
self.assertEqual(ens.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTrajectory default output does not have correct number of atoms')
self.assertEqual(ens.numCoordsets(), N_FRAMES_1,
'parseBioexcelTrajectory output with example frames 1 does not have correct number of frames')

def testConvertWrongType(self):
with self.assertRaises(TypeError):
Expand All @@ -564,12 +585,25 @@ def setUpClass(cls):
cls.xtcPath = pathDatafile(cls.query + '.xtc')
cls.dcdPath = pathDatafile(cls.query + '.dcd')

cls.jsonPath = pathDatafile('MCV1900193.json')
cls.PROTEIN_GLYCAN_N_ATOMS = 72759
cls.CA_N_ATOMS = 3768

def testParseBioexcelTop(self):
ag = parseBioexcelTopology(self.psfPath)
self.assertIsInstance(ag, prody.AtomGroup,
'parseBioexcelTopology failed to return an AtomGroup from data files')
self.assertEqual(ag.numAtoms(), FULL_N_ATOMS,
'parseBioexcelTopology data files output does not have correct number of atoms')

def testParseBioexcelTopJsonGlycan(self):
ag = parseBioexcelTopology(self.jsonPath)
self.assertIsInstance(ag, prody.AtomGroup,
'parseBioexcelTopology failed to return an AtomGroup from data files')
self.assertEqual(ag.numAtoms(), self.PROTEIN_GLYCAN_N_ATOMS,
'parseBioexcelTopology data files output using MCV1900193 with glycans does not have correct number of atoms')
self.assertEqual(ag.ca.numAtoms(), self.CA_N_ATOMS,
'parseBioexcelTopology data files output using MCV1900193 with glycans does not have correct number of CA atoms')

def testConvertToDCD(self):
a = convertXtcToDcd(self.xtcPath, top=self.psfPath)
Expand Down
1 change: 1 addition & 0 deletions prody/tests/datafiles/MCV1900193.json

Large diffs are not rendered by default.

0 comments on commit b7b2be2

Please sign in to comment.