cosmetic changes

drorlab · Oct 1, 2023 · 61e688b · 61e688b
1 parent 8897620
commit 61e688b
Show file tree

Hide file tree

Showing 24 changed files with 580 additions and 696 deletions.
diff --git a/pensa/__init__.py b/pensa/__init__.py
@@ -2,6 +2,6 @@
 from .features import *
 from .statesinfo import *
 from .clusters import *
-from .comparison import *  
+from .comparison import *
 from .dimensionality import *
 
diff --git a/pensa/features/__init__.py b/pensa/features/__init__.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# - * - coding: utf-8 - * -
 """
 Methods to read in and process features from coordinates.
 
@@ -13,4 +13,4 @@
 from .txt_features import *
 from .csv_features import *
 from .hbond_features import *
-#from .pyemma_features import *
+# from .pyemma_features import *
diff --git a/pensa/features/atom_features.py b/pensa/features/atom_features.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# - * - coding: utf-8 - * -
 """
 Methods to obtain a timeseries distribution for the atom/ion pockets' occupancies.
 
@@ -19,8 +19,8 @@
 from gridData import Grid
 from tqdm import tqdm
 import os
-from pensa.preprocessing.density import *
-
+from pensa.preprocessing.density import \
+    get_grid, data_out, write_atom_to_pdb, convert_to_occ, local_maxima_3D
 
 
 def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=10,
@@ -58,7 +58,6 @@ def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=
             Data for all features
     """
 
-
     if write is not None:
         if out_name is None:
             print('WARNING: You must provide out_name if writing out result.')
@@ -95,89 +94,89 @@ def get_atom_features(structure_input, xtc_input, atomgroup, element, top_atoms=
     else:
         g = Grid(grid_input)
 
-    ##converting the density to a probability
+    # converting the density to a probability
     atom_number = len(u.select_atoms('name ' + atomgroup))
-    grid_data = np.array(g.grid)*atom_number/np.sum(np.array(g.grid))
+    grid_data = np.array(g.grid) * atom_number / np.sum(np.array(g.grid))
 
-    ##mask all probabilities below the average water probability
-    average_probability_density = atom_number/sum(1 for i in grid_data.flat if i)
-    ##mask all grid centers with density less than threshold density
+    # mask all probabilities below the average water probability
+    average_probability_density = atom_number / sum(1 for i in grid_data.flat if i)
+    # mask all grid centers with density less than threshold density
     grid_data[grid_data <= average_probability_density] = 0.0
 
     xyz, val = local_maxima_3D(grid_data)
-    ##negate the array to get descending order from most prob to least prob
-    val_sort = np.argsort(-1*val.copy())
+    # negate the array to get descending order from most prob to least prob
+    val_sort = np.argsort(-1 * val.copy())
     # values = [val[i] for i in val_sort]
     coords = [xyz[max_val] for max_val in val_sort]
     maxdens_coord_str = [str(item)[1:-1] for item in coords]
 
-    atom_information=[]
-    atom_dists=[]
-
+    atom_information = []
+    atom_dists = []
 
     if top_atoms > len(coords):
         top_atoms = len(coords)
 
     print('\n')
     print('Featurizing ', top_atoms, ' Atoms')
+
     for atom_no in range(top_atoms):
+
         print('\n')
-        print('Atom no: ', atom_no+1)
+        print('Atom no: ', atom_no + 1)
         print('\n')
 
-        counting=[]
-        shifted_coords=coords[atom_no]+g.origin
+        counting = []
+        shifted_coords = coords[atom_no] + g.origin
         point_str = str(shifted_coords)[1:-1]
-        ## Find all water atoms within 2.5 Angstroms of density maxima
+
+        # Find all water atoms within 2.5 Angstroms of density maxima
         for i in tqdm(range(len(u.trajectory))):
-        # for i in tqdm(range(100)):
             u.trajectory[i]
-            radius= ' 2.5'
-            atomgroup_IDS=list(u.select_atoms('name ' + atomgroup + ' and point ' + point_str +radius).indices)
-            if len(atomgroup_IDS)==0:
-                atomgroup_IDS=[-1]
+            radius = ' 2.5'
+            atomgroup_IDS = list(u.select_atoms('name ' + atomgroup + ' and point ' + point_str + radius).indices)
+            if len(atomgroup_IDS) == 0:
+                atomgroup_IDS[-1]
             counting.append(atomgroup_IDS)
 
-        ## Atom indices that appear in the atom site
+        # Atom indices that appear in the atom site
         flat_list = [item for sublist in counting for item in sublist]
 
-        atom_ID = 'a' + str(atom_no+1)
+        atom_ID = 'a' + str(atom_no + 1)
         atom_location = shifted_coords
-        pocket_occupation_frequency = 1 - flat_list.count(-1)/len(flat_list)
+        pocket_occupation_frequency = 1 - flat_list.count(-1) / len(flat_list)
         pocket_occupation_frequency = round(pocket_occupation_frequency, 4)
         atom_information.append([atom_ID, list(atom_location), pocket_occupation_frequency])
         atom_dists.append(counting)
 
-        ## Write data out and visualize atom sites in pdb
+        # Write data out and visualize atom sites in pdb
         if write is True:
             data_out('atom_features/' + out_name + atom_ID + '.txt', [counting])
-            data_out('atom_features/'+out_name+element+'AtomsSummary.txt', atom_information)
+            data_out('atom_features/' + out_name + element + 'AtomsSummary.txt', atom_information)
             write_atom_to_pdb(pdb_outname, atom_location, atom_ID, atomgroup)
 
             u_pdb = mda.Universe(pdb_outname)
             u_pdb.add_TopologyAttr('tempfactors')
             # Write values as beta-factors ("tempfactors") to a PDB file
             for res in range(len(atom_information)):
-                atom_resid = len(u_pdb.residues) - atom_no-1 + res
+                atom_resid = len(u_pdb.residues) - atom_no - 1 + res
                 u_pdb.residues[atom_resid].atoms.tempfactors = atom_information[res][-1]
             u_pdb.atoms.write(pdb_outname)
 
     # Add atom pocket atomIDs
-    feature_names[element+'Pocket_Idx']= [atinfo[0] for atinfo in atom_information]
-    features_data[element+'Pocket_Idx']= np.array(atom_dists, dtype=object)
+    feature_names[element + 'Pocket_Idx'] = [atinfo[0] for atinfo in atom_information]
+    features_data[element + 'Pocket_Idx'] = np.array(atom_dists, dtype=object)
 
     # Add atom pocket frequencies
-    feature_names[element+'Pocket_Occup']= [atinfo[0] for atinfo in atom_information]
-    features_data[element+'Pocket_Occup']= np.array([atinfo[2] for atinfo in atom_information], dtype=object)
+    feature_names[element + 'Pocket_Occup'] = [atinfo[0] for atinfo in atom_information]
+    features_data[element + 'Pocket_Occup'] = np.array([atinfo[2] for atinfo in atom_information], dtype=object)
 
     # Add atom pocket occupancy timeseries
-    feature_names[element+'Pocket_OccupDistr']= [atinfo[0] for atinfo in atom_information]
-    features_data[element+'Pocket_OccupDistr']= np.array([convert_to_occ(distr, -1, water=False) for distr in atom_dists], dtype=object)
+    feature_names[element + 'Pocket_OccupDistr'] = [atinfo[0] for atinfo in atom_information]
+    features_data[element + 'Pocket_OccupDistr'] = np.array([convert_to_occ(distr, -1, water=False) for distr in atom_dists], dtype=object)
 
     # Add atom pocket locations
-    feature_names[element+'Pocket_xyz']= [atinfo[0] for atinfo in atom_information]
-    features_data[element+'Pocket_xyz']= np.array([atinfo[1] for atinfo in atom_information], dtype=object)
+    feature_names[element + 'Pocket_xyz'] = [atinfo[0] for atinfo in atom_information]
+    features_data[element + 'Pocket_xyz'] = np.array([atinfo[1] for atinfo in atom_information], dtype=object)
 
     # Return the dictionaries.
     return feature_names, features_data
-
diff --git a/pensa/features/csv_features.py b/pensa/features/csv_features.py
@@ -68,4 +68,3 @@ def get_drormd_features(csv_file):
     for i, f in enumerate(feature_names):
         feature_data[:, i] = df[f]
     return feature_names, feature_data
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -68,4 +68,3 @@ def get_drormd_features(csv_file):
		for i, f in enumerate(feature_names):
		feature_data[:, i] = df[f]
		return feature_names, feature_data