diff --git a/pensa/preprocessing/__init__.py b/pensa/preprocessing/__init__.py index 17db169..305c108 100644 --- a/pensa/preprocessing/__init__.py +++ b/pensa/preprocessing/__init__.py @@ -15,7 +15,7 @@ get_transmem_from_uniprot from .density import \ - extract_aligned_coords, \ + extract_aligned_coordinates, \ extract_combined_grid, \ generate_grid, \ dens_grid_pdb, \ diff --git a/pensa/preprocessing/density.py b/pensa/preprocessing/density.py index 29a08f3..ea4d204 100644 --- a/pensa/preprocessing/density.py +++ b/pensa/preprocessing/density.py @@ -136,75 +136,72 @@ def extract_combined_grid(struc_a, xtc_a, struc_b, xtc_b, atomgroup, write_grid_ python arrays. The default is False. """ - if not os.path.exists('dens/'): - os.makedirs('dens/') - condition_a = mda.Universe(struc_a, xtc_a) condition_b = mda.Universe(struc_b, xtc_b) if use_memmap is True: - # # # Combine both ensembles' atoms into one universe - Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms) - # # # The density needs to be formed from an even contribution of both conditions - # # # otherwise it will be unevely biased towards one condition. - # # # So we iterate over the smallest simulation length + # Combine both ensembles' atoms into one universe + combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms) + # The density needs to be formed from an even contribution of both conditions + # otherwise it will be unevely biased towards one condition. + # So iterate over the smallest simulation length smallest_traj_len = min(len(condition_a.trajectory), len(condition_b.trajectory)) - # # # The shape for memmap pseudo-trajetcory + # The shape for memmap pseudo-trajetcory array_shape = [smallest_traj_len, len(condition_a.atoms) + len(condition_b.atoms), 3] - # # # Writing out pseudo-trajetcory + # Write out pseudo-trajetcory merged_coords = np.memmap('combined_traj.mymemmap', dtype='float32', mode='w+', shape=(array_shape[0], array_shape[1], array_shape[2])) - # # # Creating universe with blank timesteps from pseudo-trajectory - Combined_conditions.load_new(merged_coords, format=MemoryReader) + # Creating universe with blank timesteps from pseudo-trajectory + combined_conditions.load_new(merged_coords, format=MemoryReader) - # # # Creating universe with correct timesteps + # Create universe with correct timesteps for frameno in tqdm(range(smallest_traj_len)): condition_a.trajectory[frameno] condition_b.trajectory[frameno] - # # # Extract trajectory coordinates at frame [frameno] + # Extract trajectory coordinates at frame [frameno] coords_a = condition_a.atoms.positions coords_b = condition_b.atoms.positions - # # # Then we merge the coordinates into one system + # Then merge the coordinates into one system stacked = np.concatenate((coords_a, coords_b), axis=0) - # # # Write over blank trajectory with new coordinates - Combined_conditions.trajectory[frameno].positions = stacked + # Write over blank trajectory with new coordinates + combined_conditions.trajectory[frameno].positions = stacked else: - # # # Combine both ensembles' atoms into one universe - Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms) - # # # Extract trajectory coordinates + # Combine both ensembles' atoms into one universe + combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms) + # Extract trajectory coordinates aligned_coords_a = AnalysisFromFunction(_copy_coords, condition_a.atoms).run().results aligned_coords_b = AnalysisFromFunction(_copy_coords, condition_b.atoms).run().results - # # # The density needs to be formed from an even contribution of both conditions - # # # otherwise it will be unevely biased towards one condition. - # # # So we match the simulation lengths first + # The density needs to be formed from an even contribution of both conditions + # otherwise it will be unevely biased towards one condition. + # So match the simulation lengths first sim1_coords, sim2_coords = _match_sim_lengths(aligned_coords_a, aligned_coords_b) - # # # Then we merge the coordinates into one system + # Then merge the coordinates into one system merged_coords = np.hstack([sim1_coords, sim2_coords]) - # # # We load in the merged coordinated into our new universe that contains - # # # the receptor in both conditions - Combined_conditions.load_new(merged_coords, format=MemoryReader) + # Load in the merged coordinates into our new universe that contains + # the receptor in both conditions + combined_conditions.load_new(merged_coords, format=MemoryReader) - # # # Grab the density for atomgroup proximal to protein only + # Grab the density for atomgroup proximal to protein only if prot_prox is True: _selection = "name " + atomgroup + " and around 3.5 protein" - density_atomgroup = Combined_conditions.select_atoms(_selection, updating=True) - # # # Grab the density for atomgroup anywhere in simulation box + density_atomgroup = combined_conditions.select_atoms(_selection, updating=True) + # Grab the density for atomgroup anywhere in simulation box else: - density_atomgroup = Combined_conditions.select_atoms("name " + atomgroup) + density_atomgroup = combined_conditions.select_atoms("name " + atomgroup) # a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box D = DensityAnalysis(density_atomgroup, delta=1.0) D.run(verbose=True) D.density.convert_density(write_grid_as) - D.density.export('dens/' + out_name + atomgroup + "_density.dx", type="double") + D.density.export(out_name + atomgroup + "_density.dx", type="double") -def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b): +def extract_aligned_coordinates(struc_a, xtc_a, struc_b, xtc_b, xtc_aligned=None, pdb_outname='alignment_ref.pdb'): """ - Writes out combined atomgroup density for both input simulations. + Aligns a trajectory (a) on the average structure of another one (b). Parameters ---------- @@ -213,13 +210,17 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b): xtc_a : str File name for the trajectory (xtc format). struc_b : str - File name for the reference file (PDB or GRO format). + File name for the reference file of the trajectory to be aligned to (PDB or GRO format). xtc_b : str - File name for the trajectory (xtc format). + File name for the trajectory to be aligned to (xtc format). + xtc_aligned: str, default=None + File name for the aligned trajectory. If none, it will be constructed from the + pdb_outname: str, default='alignment_ref.pdb' + File name for the average structure of the trajectory to be aligned to (PDB format) """ - if not os.path.exists('dens/'): - os.makedirs('dens/') + if xtc_aligned is None: + xtc_aligned = xtc_a[:-4] + '_aligned.xtc' # # Before we extract the water densities, we need to first align the trajectories # # so that we can featurize water sites in both ensembles using the same coordinates @@ -228,7 +229,6 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b): # Align a onto the average structure of b p = condition_b.select_atoms("protein") - pdb_outname = 'dens/alignment_ref.pdb' p_avg = np.zeros_like(p.positions) # do a quick average of the protein (in reality you probably want to remove PBC and RMSD-superpose) for ts in condition_b.trajectory: @@ -242,13 +242,11 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b): # just make sure that we have clean original coordinates again (start at the beginning) condition_b = mda.Universe(pdb_outname) - align_xtc_name = 'dens/' + struc_a.split('/')[-1][:-4] + 'aligned.xtc' - # Align condition a to condition b align.AlignTraj(condition_a, # trajectory to align condition_b, # reference select='name CA', # selection of atoms to align - filename=align_xtc_name, # file to write the trajectory to + filename=xtc_aligned, # file to write the trajectory to match_atoms=True, # whether to match atoms based on mass ).run(verbose=True)