Skip to content

Commit

Permalink
Merge branch 'master' of github.com:prody/ProDy into scipion
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmkrieger committed Nov 24, 2023
2 parents 86114ab + 30052ce commit 6e36e19
Show file tree
Hide file tree
Showing 26 changed files with 14,158 additions and 452 deletions.
4 changes: 2 additions & 2 deletions prody/apps/prody_apps/prody_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def prody_select(selstr, *pdbs, **kwargs):
prefix = kwargs.get('prefix', None)
suffix = kwargs.get('suffix', '_selected')
output = kwargs.get('output', None)
altloc = kwargs.get('altloc', None)
altloc = kwargs.get('altloc', 'all')

for pdb in pdbs:
pdb = parsePDB(pdb, altloc=altloc)
Expand Down Expand Up @@ -83,7 +83,7 @@ def addCommand(commands):
type=str, help=('output filename prefix (default: PDB filename)'))

group.add_argument('-L', '--altloc', dest='altloc', metavar='STR',
type=str, help=('altloc (default: None (take all))'))
type=str, default='all', help=('altloc (default: %(default)s)'))

group.add_argument('-x', '--suffix', dest='suffix', metavar='STR',
type=str, default='_selected',
Expand Down
21 changes: 21 additions & 0 deletions prody/atomic/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,27 @@ def setCoords(self, coords):
self._ag._coords[acsi, self._index] = coords
self._ag._setTimeStamp(acsi)

def getAnisou(self):
"""Returns a copy of anisotropic temperature factors of the atom from the active coordinate
set."""

if self._ag._anisous is not None:
return self._ag._anisous[self.getACSIndex(), self._index].copy()

def _getAnisou(self):
"""Returns a view of anisotropic temperature factors of the atom from the active coordinate
set."""

if self._ag._anisous is not None:
return self._ag._anisous[self.getACSIndex(), self._index]

def setAnisou(self, anisou):
"""Set anisotropic temperature factors of the atom in the active coordinate set."""

acsi = self.getACSIndex()
self._ag._anisous[acsi, self._index] = anisou
self._ag._setTimeStamp(acsi)

def getCoordsets(self, indices=None):
"""Returns a copy of coordinate set(s) at given *indices*."""

Expand Down
115 changes: 112 additions & 3 deletions prody/atomic/atomgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

from prody import LOGGER, PY2K
from prody.kdtree import KDTree
from prody.utilities import checkCoords, rangeString, getDistance, copy
from prody.utilities import (checkCoords, checkAnisous,
rangeString, getDistance, copy)

from .atomic import Atomic
from .fields import ATOMIC_FIELDS, READONLY
Expand Down Expand Up @@ -130,7 +131,7 @@ class AtomGroup(Atomic):
'_donors', '_acceptors', '_nbexclusions', '_crossterms',
'_cslabels', '_acsi', '_n_csets', '_data',
'_fragments', '_flags', '_flagsts', '_subsets',
'_msa', '_sequenceMap']
'_msa', '_sequenceMap', '_anisous']

def __init__(self, title='Unnamed'):

Expand Down Expand Up @@ -170,6 +171,7 @@ def __init__(self, title='Unnamed'):
self._subsets = None
self._msa = None
self._sequenceMap = None
self._anisous = None

def __repr__(self):

Expand Down Expand Up @@ -238,12 +240,20 @@ def __add__(self, other):
if self._n_csets:
if self._n_csets == other._n_csets:
new.setCoords(np.concatenate((self._coords, other._coords), 1))
this = self._anisous
that = other._anisous
if this is not None and that is not None:
if (isinstance(this, np.ndarray) and isinstance(that, np.ndarray)
and len(this) > 0 and len(that) > 0):
new.setAnisous(np.concatenate((self._anisous, other._anisous), 1))
if self._n_csets > 1:
LOGGER.info('All {0} coordinate sets are copied to '
'{1}.'.format(self._n_csets, new.getTitle()))
else:
new.setCoords(np.concatenate((self._getCoords(),
other._getCoords())))
new.setAnisous(np.concatenate((self.getAnisous(),
other.getAnisous())))
LOGGER.info('Active coordinate sets are copied to {0}.'
.format(new.getTitle()))
elif other._n_csets:
Expand Down Expand Up @@ -565,7 +575,101 @@ def _setCoords(self, coords, label='', overwrite=False):
self._setTimeStamp(acsi)
self._cslabels[acsi] = str(label)

def addCoordset(self, coords, label=None):
def getAnisous(self):
"""Returns a copy of anisotropic temperature factors from active coordinate set."""

if self._anisous is not None:
return self._anisous[self._acsi].copy()

def _getAnisous(self):
"""Returns a view of anisotropic temperature factors from active coordinate set."""

if self._anisous is not None:
return self._anisous[self._acsi]

def setAnisous(self, anisous, label=''):
"""Set anisotropic temperature factors of atoms. *anisous* may be any array like object
or an object instance with :meth:`getAnisous` method. If the shape of
anisou array is ``(n_csets > 1, n_atoms, 3)``, it will replace all
coordinate sets and the active coordinate set index will reset to
zero. This situation can be avoided using :meth:`addCoordset`.
If shape of *coords* is ``(n_atoms, 3)`` or ``(1, n_atoms, 3)``, it
will replace the active coordinate set. *label* argument may be used
to label coordinate set(s). *label* may be a string or a list of
strings length equal to the number of coordinate sets."""

atoms = anisous
try:
if self._anisous is None and hasattr(atoms, '_getAnisous'):
anisous = atoms._getAnisous()
else:
anisous = atoms.getAnisous()
except AttributeError:
if self._anisous is None:
anisous = np.array(anisous)
else:
if anisous is None:
raise ValueError('anisous of {0} are not set'
.format(str(atoms)))

try:
checkAnisous(anisous, csets=True, dtype=(float, np.float32))
except TypeError:
raise TypeError('anisous must be a numpy array or an '
'object with `getAnisous` method')

self._setAnisous(anisous, label=label)

def _setAnisous(self, anisous, label='', overwrite=False):
"""Set anisotropic temperature factors without data type checking.
*anisous* must be a :class:`~numpy.ndarray`, but may have data type
other than :class:`~numpy.float64`, e.g. :class:`~numpy.float32`.
*label* argument may be used to label coordinate sets. *label* may be
a string or a list of strings length equal to the number of
coordinate sets."""

n_atoms = self._n_atoms
if n_atoms:
if anisous.shape[-2] != n_atoms:
raise ValueError('anisous array has incorrect number of atoms')
else:
self._n_atoms = n_atoms = anisous.shape[-2]

ndim = anisous.ndim
shape = anisous.shape
if self._anisous is None or overwrite or (ndim == 6 and shape[0] > 1):
if ndim == 2:
self._anisous = anisous.reshape((1, n_atoms, 6))
self._cslabels = [str(label)]
self._n_csets = n_csets = 1

else:
self._anisous = anisous
self._n_csets = n_csets = shape[0]

if isinstance(label, list):
if len(label) == n_csets:
self._cslabels = list(label)

else:
self._cslabels = [''] * n_csets
LOGGER.warn('Number of labels does not match number '
'of coordinate sets.')
else:
self._cslabels = [str(label)] * n_csets
self._acsi = 0
self._setTimeStamp()

else:
acsi = self._acsi
if ndim == 2:
self._anisous[acsi] = anisous
else:
self._anisous[acsi] = anisous[0]
self._setTimeStamp(acsi)
self._cslabels[acsi] = str(label)

def addCoordset(self, coords, label=None, anisous=None):
"""Add a coordinate set. *coords* argument may be an object with
:meth:`getCoordsets` method."""

Expand Down Expand Up @@ -594,8 +698,13 @@ def addCoordset(self, coords, label=None):
if coords.ndim == 2:
coords = coords.reshape((1, n_atoms, 3))

if anisous is not None and anisous.ndim == 2:
anisous = anisous.reshape((1, n_atoms, 6))

diff = coords.shape[0]
self._coords = np.concatenate((self._coords, coords), axis=0)
if anisous is not None and self._anisous is not None:
self._anisous = np.concatenate((self._anisous, anisous/10000), axis=0)
self._n_csets = self._coords.shape[0]
timestamps = self._timestamps
self._timestamps = np.zeros(self._n_csets)
Expand Down
2 changes: 0 additions & 2 deletions prody/atomic/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,6 @@ def getDocstr(self, meth, plural=True, selex=True):
'altloc': Field('altloc', DTYPE + '1',
doc='alternate location indicator',
selstr=('altloc A B', 'altloc _'),),
'anisou': Field('anisou', float, doc='anisotropic temperature factor',
ndim=2),
'chain': Field('chain', DTYPE + '6', doc='chain identifier',
meth='Chid', none=HVNONE, synonym='chid',
selstr=('chain A', 'chid A B C', 'chain _')),
Expand Down
7 changes: 7 additions & 0 deletions prody/atomic/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -2424,3 +2424,10 @@ def _getCoords(self):
if self._coords is None:
self._coords = self._atoms._getCoords()
return self._coords

def _getAnisous(self):
"""Returns anisotropic temperature factors of atoms."""

if self._anisous is None:
self._anisous = self._atoms._getAnisous()
return self._anisous
16 changes: 16 additions & 0 deletions prody/atomic/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,29 @@ def getCoords(self):

_getCoords = getCoords

def getAnisous(self):
"""Returns a copy of anisotropic temperature factors from the active coordinate set."""

if self._ag._anisous is not None:
# Since this is not slicing, a view is not returned
return self._ag._anisous[self.getACSIndex(), self._indices]

_getAnisous = getAnisous

def setCoords(self, coords):
"""Set coordinates in the active coordinate set."""

if self._ag._coords is not None:
self._ag._coords[self.getACSIndex(), self._indices] = coords
self._ag._setTimeStamp(self.getACSIndex())

def setAnisous(self, anisous):
"""Set anisotropic temperature factors in the active coordinate set."""

if self._ag._anisous is not None:
self._ag._anisous[self.getACSIndex(), self._indices] = anisous
self._ag._setTimeStamp(self.getACSIndex())

def getCoordsets(self, indices=None):
"""Returns coordinate set(s) at given *indices*, which may be an integer
or a list/array of integers."""
Expand Down
14 changes: 12 additions & 2 deletions prody/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
* :func:`.fetchPfamMSA` - download MSA files
* :func:`.searchPfam` - search for domain families of a protein
.. _Pfam: http://pfam.sanger.ac.uk/
.. _Pfam: https://www.ebi.ac.uk/interpro/entry/pfam/
UniProt
========
Expand Down Expand Up @@ -70,7 +69,14 @@
.. _GOA: https://www.ebi.ac.uk/GOA/
Interpro
====
The following functions can be used to search and retrieve Pfam_ data:
* :func:`.searchInterpro` - search for domain families of a protein
.. _Pfam: https://www.ebi.ac.uk/interpro/
"""

__all__ = []
Expand Down Expand Up @@ -98,3 +104,7 @@
from . import quartataweb
from .quartataweb import *
__all__.extend(quartataweb.__all__)

from . import interpro
from .interpro import *
__all__.extend(interpro.__all__)
102 changes: 102 additions & 0 deletions prody/database/interpro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
"""This module defines functions for interfacing Interpro database."""

__author__ = 'James Krieger'

import json
from os.path import isfile
from prody import LOGGER, PY3K

__all__ = ['searchInterpro']

prefix = 'https://www.ebi.ac.uk/interpro/wwwapi/entry/'

def searchInterpro(query, **kwargs):
"""Returns Interpro search results in a list of dictionaries.
Matching family accessions as keys will map to various properties,
including start and end residue positions.
:arg query: UniProt ID or PDB identifier with or without a
chain identifier, e.g. ``'1mkp'`` or ``'1mkpA'``.
UniProt ID of the specified chain, or the first
protein chain will be used for searching the Pfam database
:type query: str
:arg timeout: timeout for blocking connection attempt in seconds, default
is 60
:type timeout: int
"""
import requests

LOGGER.timeit('_interpro')
timeout = int(kwargs.get('timeout', 60))

if len(query) == 4:
url = prefix + "all/structure/pdb/" + query

elif len(query) == 5:
accession = None

from prody import parsePDBHeader
try:
polymers = parsePDBHeader(query[:4], 'polymers')
except Exception as err:
raise ValueError('failed to parse header for {0} ({1})'
.format(query[:4], str(err)))

chid = query[4:].upper()

for poly in polymers:
if chid and poly.chid != chid:
continue
for dbref in poly.dbrefs:
if dbref.database != 'UniProt':
continue
accession = dbref.accession
LOGGER.info('UniProt accession {0} for {1} chain '
'{2} will be used.'
.format(accession, query[:4], poly.chid))
break
if accession is not None:
break

if accession is None:
raise ValueError('A UniProt accession for PDB {0} could not be '
'parsed.'.format(repr(query)))
else:
url = prefix + "all/protein/uniprot/" + accession

else:
url = prefix + "all/protein/uniprot/" + query

LOGGER.debug('Retrieving Interpro search results: ' + url)
result = None
sleep = 2
while LOGGER.timing('_interpro') < timeout:
try:
result = requests.get(url, verify=False).content
except Exception:
pass
else:
if result not in ['PEND','RUN']:
break

sleep = 20 if int(sleep * 1.5) >= 20 else int(sleep * 1.5)
LOGGER.sleep(int(sleep), '. Trying to reconnect...')

if not result:
raise IOError('Interpro search timed out or failed to parse results, '
' check URL: ' + url)
else:
LOGGER.report('Interpro search completed in %.2fs.', '_interpro')

if PY3K:
result = result.decode()

try:
result = json.loads(result)
except Exception as err:
raise ValueError('failed to parse results as json, check URL: ' + url)

return result["results"]
Loading

0 comments on commit 6e36e19

Please sign in to comment.