adapt function name extract_aligned_coordinates, remove hard-coded fi…

…le names, improve style
drorlab · Nov 27, 2023 · c7f3691 · c7f3691
1 parent 123b800
commit c7f3691
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 43 deletions.
diff --git a/pensa/preprocessing/__init__.py b/pensa/preprocessing/__init__.py
@@ -15,7 +15,7 @@
     get_transmem_from_uniprot
 
 from .density import \
-    extract_aligned_coords, \
+    extract_aligned_coordinates, \
     extract_combined_grid, \
     generate_grid, \
     dens_grid_pdb, \

diff --git a/pensa/preprocessing/density.py b/pensa/preprocessing/density.py
@@ -136,75 +136,72 @@ def extract_combined_grid(struc_a, xtc_a, struc_b, xtc_b, atomgroup, write_grid_
         python arrays. The default is False.
 
     """
-    if not os.path.exists('dens/'):
-        os.makedirs('dens/')
-
     condition_a = mda.Universe(struc_a, xtc_a)
     condition_b = mda.Universe(struc_b, xtc_b)
 
     if use_memmap is True:
-        # # # Combine both ensembles' atoms into one universe
-        Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
-        # # # The density needs to be formed from an even contribution of both conditions
-        # # # otherwise it will be unevely biased towards one condition.
-        # # # So we iterate over the smallest simulation length
+        # Combine both ensembles' atoms into one universe
+        combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
+        # The density needs to be formed from an even contribution of both conditions
+        # otherwise it will be unevely biased towards one condition.
+        # So iterate over the smallest simulation length
         smallest_traj_len = min(len(condition_a.trajectory), len(condition_b.trajectory))
-        # # # The shape for memmap pseudo-trajetcory
+        # The shape for memmap pseudo-trajetcory
         array_shape = [smallest_traj_len, len(condition_a.atoms) + len(condition_b.atoms), 3]
-        # # # Writing out pseudo-trajetcory
+        # Write out pseudo-trajetcory
         merged_coords = np.memmap('combined_traj.mymemmap', dtype='float32', mode='w+',
                                   shape=(array_shape[0], array_shape[1], array_shape[2]))
-        # # # Creating universe with blank timesteps from pseudo-trajectory
-        Combined_conditions.load_new(merged_coords, format=MemoryReader)
+        # Creating universe with blank timesteps from pseudo-trajectory
+        combined_conditions.load_new(merged_coords, format=MemoryReader)
 
-        # # # Creating universe with correct timesteps
+        # Create universe with correct timesteps
         for frameno in tqdm(range(smallest_traj_len)):
             condition_a.trajectory[frameno]
             condition_b.trajectory[frameno]
-            # # # Extract trajectory coordinates at frame [frameno]
+            # Extract trajectory coordinates at frame [frameno]
             coords_a = condition_a.atoms.positions
             coords_b = condition_b.atoms.positions
-            # # # Then we merge the coordinates into one system
+            # Then merge the coordinates into one system
             stacked = np.concatenate((coords_a, coords_b), axis=0)
-            # # # Write over blank trajectory with new coordinates
-            Combined_conditions.trajectory[frameno].positions = stacked
+            # Write over blank trajectory with new coordinates
+            combined_conditions.trajectory[frameno].positions = stacked
 
     else:
-        # # # Combine both ensembles' atoms into one universe
-        Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
-        # # # Extract trajectory coordinates
+        # Combine both ensembles' atoms into one universe
+        combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
+        # Extract trajectory coordinates
         aligned_coords_a = AnalysisFromFunction(_copy_coords,
                                                 condition_a.atoms).run().results
         aligned_coords_b = AnalysisFromFunction(_copy_coords,
                                                 condition_b.atoms).run().results
-        # # # The density needs to be formed from an even contribution of both conditions
-        # # # otherwise it will be unevely biased towards one condition.
-        # # # So we match the simulation lengths first
+        # The density needs to be formed from an even contribution of both conditions
+        # otherwise it will be unevely biased towards one condition.
+        # So match the simulation lengths first
         sim1_coords, sim2_coords = _match_sim_lengths(aligned_coords_a, aligned_coords_b)
 
-        # # # Then we merge the coordinates into one system
+        # Then merge the coordinates into one system
         merged_coords = np.hstack([sim1_coords, sim2_coords])
-        # # # We load in the merged coordinated into our new universe that contains
-        # # # the receptor in both conditions
-        Combined_conditions.load_new(merged_coords, format=MemoryReader)
+        # Load in the merged coordinates into our new universe that contains
+        # the receptor in both conditions
+        combined_conditions.load_new(merged_coords, format=MemoryReader)
 
-    # # # Grab the density for atomgroup proximal to protein only
+    # Grab the density for atomgroup proximal to protein only
     if prot_prox is True:
         _selection = "name " + atomgroup + " and around 3.5 protein"
-        density_atomgroup = Combined_conditions.select_atoms(_selection, updating=True)
-    # # # Grab the density for atomgroup anywhere in simulation box
+        density_atomgroup = combined_conditions.select_atoms(_selection, updating=True)
+    # Grab the density for atomgroup anywhere in simulation box
     else:
-        density_atomgroup = Combined_conditions.select_atoms("name " + atomgroup)
+        density_atomgroup = combined_conditions.select_atoms("name " + atomgroup)
     # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box
     D = DensityAnalysis(density_atomgroup, delta=1.0)
     D.run(verbose=True)
     D.density.convert_density(write_grid_as)
-    D.density.export('dens/' + out_name + atomgroup + "_density.dx", type="double")
+    D.density.export(out_name + atomgroup + "_density.dx", type="double")
 
 
-def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
+def extract_aligned_coordinates(struc_a, xtc_a, struc_b, xtc_b, xtc_aligned=None, pdb_outname='alignment_ref.pdb'):
     """
-    Writes out combined atomgroup density for both input simulations.
+    Aligns a trajectory (a) on the average structure of another one (b).
 
     Parameters
     ----------
@@ -213,13 +210,17 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
     xtc_a : str
         File name for the trajectory (xtc format).
     struc_b : str
-        File name for the reference file (PDB or GRO format).
+        File name for the reference file of the trajectory to be aligned to (PDB or GRO format).
     xtc_b : str
-        File name for the trajectory (xtc format).
+        File name for the trajectory to be aligned to (xtc format).
+    xtc_aligned: str, default=None
+        File name for the aligned trajectory. If none, it will be constructed from the
+    pdb_outname: str, default='alignment_ref.pdb'
+        File name for the average structure of the trajectory to be aligned to (PDB format)
 
     """
-    if not os.path.exists('dens/'):
-        os.makedirs('dens/')
+    if xtc_aligned is None:
+        xtc_aligned = xtc_a[:-4] + '_aligned.xtc'
 
     # # Before we extract the water densities, we need to first align the trajectories
     # # so that we can featurize water sites in both ensembles using the same coordinates
@@ -228,7 +229,6 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
 
     # Align a onto the average structure of b
     p = condition_b.select_atoms("protein")
-    pdb_outname = 'dens/alignment_ref.pdb'
     p_avg = np.zeros_like(p.positions)
     # do a quick average of the protein (in reality you probably want to remove PBC and RMSD-superpose)
     for ts in condition_b.trajectory:
@@ -242,13 +242,11 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
     # just make sure that we have clean original coordinates again (start at the beginning)
     condition_b = mda.Universe(pdb_outname)
 
-    align_xtc_name = 'dens/' + struc_a.split('/')[-1][:-4] + 'aligned.xtc'
-
     # Align condition a to condition b
     align.AlignTraj(condition_a,  # trajectory to align
                     condition_b,  # reference
                     select='name CA',  # selection of atoms to align
-                    filename=align_xtc_name,  # file to write the trajectory to
+                    filename=xtc_aligned,  # file to write the trajectory to
                     match_atoms=True,  # whether to match atoms based on mass
                     ).run(verbose=True)