Skip to content

Commit

Permalink
cosmetic changes
Browse files Browse the repository at this point in the history
  • Loading branch information
martinvoegele committed Oct 1, 2023
1 parent 8897620 commit 61e688b
Show file tree
Hide file tree
Showing 24 changed files with 580 additions and 696 deletions.
2 changes: 1 addition & 1 deletion pensa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from .features import *
from .statesinfo import *
from .clusters import *
from .comparison import *
from .comparison import *
from .dimensionality import *

4 changes: 2 additions & 2 deletions pensa/features/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# - * - coding: utf-8 - * -
"""
Methods to read in and process features from coordinates.
Expand All @@ -13,4 +13,4 @@
from .txt_features import *
from .csv_features import *
from .hbond_features import *
#from .pyemma_features import *
# from .pyemma_features import *
75 changes: 37 additions & 38 deletions pensa/features/atom_features.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# - * - coding: utf-8 - * -
"""
Methods to obtain a timeseries distribution for the atom/ion pockets' occupancies.
Expand All @@ -19,8 +19,8 @@
from gridData import Grid
from tqdm import tqdm
import os
from pensa.preprocessing.density import *

from pensa.preprocessing.density import \
get_grid, data_out, write_atom_to_pdb, convert_to_occ, local_maxima_3D


def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=10,
Expand Down Expand Up @@ -58,7 +58,6 @@ def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=
Data for all features
"""


if write is not None:
if out_name is None:
print('WARNING: You must provide out_name if writing out result.')
Expand Down Expand Up @@ -95,89 +94,89 @@ def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=
else:
g = Grid(grid_input)

##converting the density to a probability
# converting the density to a probability
atom_number = len(u.select_atoms('name ' + atomgroup))
grid_data = np.array(g.grid)*atom_number/np.sum(np.array(g.grid))
grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid))

##mask all probabilities below the average water probability
average_probability_density = atom_number/sum(1 for i in grid_data.flat if i)
##mask all grid centers with density less than threshold density
# mask all probabilities below the average water probability
average_probability_density = atom_number / sum(1 for i in grid_data.flat if i)
# mask all grid centers with density less than threshold density
grid_data[grid_data <= average_probability_density] = 0.0

xyz, val = local_maxima_3D(grid_data)
##negate the array to get descending order from most prob to least prob
val_sort = np.argsort(-1*val.copy())
# negate the array to get descending order from most prob to least prob
val_sort = np.argsort(-1 * val.copy())
# values = [val[i] for i in val_sort]
coords = [xyz[max_val] for max_val in val_sort]
maxdens_coord_str = [str(item)[1:-1] for item in coords]

atom_information=[]
atom_dists=[]

atom_information = []
atom_dists = []

if top_atoms > len(coords):
top_atoms = len(coords)

print('\n')
print('Featurizing ', top_atoms, ' Atoms')

for atom_no in range(top_atoms):

print('\n')
print('Atom no: ', atom_no+1)
print('Atom no: ', atom_no + 1)
print('\n')

counting=[]
shifted_coords=coords[atom_no]+g.origin
counting = []
shifted_coords = coords[atom_no] + g.origin
point_str = str(shifted_coords)[1:-1]
## Find all water atoms within 2.5 Angstroms of density maxima

# Find all water atoms within 2.5 Angstroms of density maxima
for i in tqdm(range(len(u.trajectory))):
# for i in tqdm(range(100)):
u.trajectory[i]
radius= ' 2.5'
atomgroup_IDS=list(u.select_atoms('name ' + atomgroup + ' and point ' + point_str +radius).indices)
if len(atomgroup_IDS)==0:
atomgroup_IDS=[-1]
radius = ' 2.5'
atomgroup_IDS = list(u.select_atoms('name ' + atomgroup + ' and point ' + point_str + radius).indices)
if len(atomgroup_IDS) == 0:
atomgroup_IDS[-1]
counting.append(atomgroup_IDS)

## Atom indices that appear in the atom site
# Atom indices that appear in the atom site
flat_list = [item for sublist in counting for item in sublist]

atom_ID = 'a' + str(atom_no+1)
atom_ID = 'a' + str(atom_no + 1)
atom_location = shifted_coords
pocket_occupation_frequency = 1 - flat_list.count(-1)/len(flat_list)
pocket_occupation_frequency = 1 - flat_list.count(-1) / len(flat_list)
pocket_occupation_frequency = round(pocket_occupation_frequency, 4)
atom_information.append([atom_ID, list(atom_location), pocket_occupation_frequency])
atom_dists.append(counting)

## Write data out and visualize atom sites in pdb
# Write data out and visualize atom sites in pdb
if write is True:
data_out('atom_features/' + out_name + atom_ID + '.txt', [counting])
data_out('atom_features/'+out_name+element+'AtomsSummary.txt', atom_information)
data_out('atom_features/' + out_name + element + 'AtomsSummary.txt', atom_information)
write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup)

u_pdb = mda.Universe(pdb_outname)
u_pdb.add_TopologyAttr('tempfactors')
# Write values as beta-factors ("tempfactors") to a PDB file
for res in range(len(atom_information)):
atom_resid = len(u_pdb.residues) - atom_no-1 + res
atom_resid = len(u_pdb.residues) - atom_no - 1 + res
u_pdb.residues[atom_resid].atoms.tempfactors = atom_information[res][-1]
u_pdb.atoms.write(pdb_outname)

# Add atom pocket atomIDs
feature_names[element+'Pocket_Idx']= [atinfo[0] for atinfo in atom_information]
features_data[element+'Pocket_Idx']= np.array(atom_dists, dtype=object)
feature_names[element + 'Pocket_Idx'] = [atinfo[0] for atinfo in atom_information]
features_data[element + 'Pocket_Idx'] = np.array(atom_dists, dtype=object)

# Add atom pocket frequencies
feature_names[element+'Pocket_Occup']= [atinfo[0] for atinfo in atom_information]
features_data[element+'Pocket_Occup']= np.array([atinfo[2] for atinfo in atom_information], dtype=object)
feature_names[element + 'Pocket_Occup'] = [atinfo[0] for atinfo in atom_information]
features_data[element + 'Pocket_Occup'] = np.array([atinfo[2] for atinfo in atom_information], dtype=object)

# Add atom pocket occupancy timeseries
feature_names[element+'Pocket_OccupDistr']= [atinfo[0] for atinfo in atom_information]
features_data[element+'Pocket_OccupDistr']= np.array([convert_to_occ(distr, -1, water=False) for distr in atom_dists], dtype=object)
feature_names[element + 'Pocket_OccupDistr'] = [atinfo[0] for atinfo in atom_information]
features_data[element + 'Pocket_OccupDistr'] = np.array([convert_to_occ(distr, -1, water=False) for distr in atom_dists], dtype=object)

# Add atom pocket locations
feature_names[element+'Pocket_xyz']= [atinfo[0] for atinfo in atom_information]
features_data[element+'Pocket_xyz']= np.array([atinfo[1] for atinfo in atom_information], dtype=object)
feature_names[element + 'Pocket_xyz'] = [atinfo[0] for atinfo in atom_information]
features_data[element + 'Pocket_xyz'] = np.array([atinfo[1] for atinfo in atom_information], dtype=object)

# Return the dictionaries.
return feature_names, features_data

1 change: 0 additions & 1 deletion pensa/features/csv_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,3 @@ def get_drormd_features(csv_file):
for i, f in enumerate(feature_names):
feature_data[:, i] = df[f]
return feature_names, feature_data

Loading

0 comments on commit 61e688b

Please sign in to comment.