Skip to content

Commit

Permalink
adapt function name extract_aligned_coordinates, remove hard-coded fi…
Browse files Browse the repository at this point in the history
…le names, improve style
  • Loading branch information
martinvoegele committed Nov 27, 2023
1 parent 123b800 commit c7f3691
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 43 deletions.
2 changes: 1 addition & 1 deletion pensa/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
get_transmem_from_uniprot

from .density import \
extract_aligned_coords, \
extract_aligned_coordinates, \
extract_combined_grid, \
generate_grid, \
dens_grid_pdb, \
Expand Down
82 changes: 40 additions & 42 deletions pensa/preprocessing/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,75 +136,72 @@ def extract_combined_grid(struc_a, xtc_a, struc_b, xtc_b, atomgroup, write_grid_
python arrays. The default is False.
"""
if not os.path.exists('dens/'):
os.makedirs('dens/')

condition_a = mda.Universe(struc_a, xtc_a)
condition_b = mda.Universe(struc_b, xtc_b)

if use_memmap is True:
# # # Combine both ensembles' atoms into one universe
Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
# # # The density needs to be formed from an even contribution of both conditions
# # # otherwise it will be unevely biased towards one condition.
# # # So we iterate over the smallest simulation length
# Combine both ensembles' atoms into one universe
combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
# The density needs to be formed from an even contribution of both conditions
# otherwise it will be unevely biased towards one condition.
# So iterate over the smallest simulation length
smallest_traj_len = min(len(condition_a.trajectory), len(condition_b.trajectory))
# # # The shape for memmap pseudo-trajetcory
# The shape for memmap pseudo-trajetcory
array_shape = [smallest_traj_len, len(condition_a.atoms) + len(condition_b.atoms), 3]
# # # Writing out pseudo-trajetcory
# Write out pseudo-trajetcory
merged_coords = np.memmap('combined_traj.mymemmap', dtype='float32', mode='w+',
shape=(array_shape[0], array_shape[1], array_shape[2]))
# # # Creating universe with blank timesteps from pseudo-trajectory
Combined_conditions.load_new(merged_coords, format=MemoryReader)
# Creating universe with blank timesteps from pseudo-trajectory
combined_conditions.load_new(merged_coords, format=MemoryReader)

# # # Creating universe with correct timesteps
# Create universe with correct timesteps
for frameno in tqdm(range(smallest_traj_len)):
condition_a.trajectory[frameno]
condition_b.trajectory[frameno]
# # # Extract trajectory coordinates at frame [frameno]
# Extract trajectory coordinates at frame [frameno]
coords_a = condition_a.atoms.positions
coords_b = condition_b.atoms.positions
# # # Then we merge the coordinates into one system
# Then merge the coordinates into one system
stacked = np.concatenate((coords_a, coords_b), axis=0)
# # # Write over blank trajectory with new coordinates
Combined_conditions.trajectory[frameno].positions = stacked
# Write over blank trajectory with new coordinates
combined_conditions.trajectory[frameno].positions = stacked

else:
# # # Combine both ensembles' atoms into one universe
Combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
# # # Extract trajectory coordinates
# Combine both ensembles' atoms into one universe
combined_conditions = mda.Merge(condition_a.atoms, condition_b.atoms)
# Extract trajectory coordinates
aligned_coords_a = AnalysisFromFunction(_copy_coords,
condition_a.atoms).run().results
aligned_coords_b = AnalysisFromFunction(_copy_coords,
condition_b.atoms).run().results
# # # The density needs to be formed from an even contribution of both conditions
# # # otherwise it will be unevely biased towards one condition.
# # # So we match the simulation lengths first
# The density needs to be formed from an even contribution of both conditions
# otherwise it will be unevely biased towards one condition.
# So match the simulation lengths first
sim1_coords, sim2_coords = _match_sim_lengths(aligned_coords_a, aligned_coords_b)

# # # Then we merge the coordinates into one system
# Then merge the coordinates into one system
merged_coords = np.hstack([sim1_coords, sim2_coords])
# # # We load in the merged coordinated into our new universe that contains
# # # the receptor in both conditions
Combined_conditions.load_new(merged_coords, format=MemoryReader)
# Load in the merged coordinates into our new universe that contains
# the receptor in both conditions
combined_conditions.load_new(merged_coords, format=MemoryReader)

# # # Grab the density for atomgroup proximal to protein only
# Grab the density for atomgroup proximal to protein only
if prot_prox is True:
_selection = "name " + atomgroup + " and around 3.5 protein"
density_atomgroup = Combined_conditions.select_atoms(_selection, updating=True)
# # # Grab the density for atomgroup anywhere in simulation box
density_atomgroup = combined_conditions.select_atoms(_selection, updating=True)
# Grab the density for atomgroup anywhere in simulation box
else:
density_atomgroup = Combined_conditions.select_atoms("name " + atomgroup)
density_atomgroup = combined_conditions.select_atoms("name " + atomgroup)
# a resolution of delta=1.0 ensures the coordinates of the maxima match the coordinates of the simulation box
D = DensityAnalysis(density_atomgroup, delta=1.0)
D.run(verbose=True)
D.density.convert_density(write_grid_as)
D.density.export('dens/' + out_name + atomgroup + "_density.dx", type="double")
D.density.export(out_name + atomgroup + "_density.dx", type="double")


def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
def extract_aligned_coordinates(struc_a, xtc_a, struc_b, xtc_b, xtc_aligned=None, pdb_outname='alignment_ref.pdb'):
"""
Writes out combined atomgroup density for both input simulations.
Aligns a trajectory (a) on the average structure of another one (b).
Parameters
----------
Expand All @@ -213,13 +210,17 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
xtc_a : str
File name for the trajectory (xtc format).
struc_b : str
File name for the reference file (PDB or GRO format).
File name for the reference file of the trajectory to be aligned to (PDB or GRO format).
xtc_b : str
File name for the trajectory (xtc format).
File name for the trajectory to be aligned to (xtc format).
xtc_aligned: str, default=None
File name for the aligned trajectory. If none, it will be constructed from the
pdb_outname: str, default='alignment_ref.pdb'
File name for the average structure of the trajectory to be aligned to (PDB format)
"""
if not os.path.exists('dens/'):
os.makedirs('dens/')
if xtc_aligned is None:
xtc_aligned = xtc_a[:-4] + '_aligned.xtc'

# # Before we extract the water densities, we need to first align the trajectories
# # so that we can featurize water sites in both ensembles using the same coordinates
Expand All @@ -228,7 +229,6 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):

# Align a onto the average structure of b
p = condition_b.select_atoms("protein")
pdb_outname = 'dens/alignment_ref.pdb'
p_avg = np.zeros_like(p.positions)
# do a quick average of the protein (in reality you probably want to remove PBC and RMSD-superpose)
for ts in condition_b.trajectory:
Expand All @@ -242,13 +242,11 @@ def extract_aligned_coords(struc_a, xtc_a, struc_b, xtc_b):
# just make sure that we have clean original coordinates again (start at the beginning)
condition_b = mda.Universe(pdb_outname)

align_xtc_name = 'dens/' + struc_a.split('/')[-1][:-4] + 'aligned.xtc'

# Align condition a to condition b
align.AlignTraj(condition_a, # trajectory to align
condition_b, # reference
select='name CA', # selection of atoms to align
filename=align_xtc_name, # file to write the trajectory to
filename=xtc_aligned, # file to write the trajectory to
match_atoms=True, # whether to match atoms based on mass
).run(verbose=True)

Expand Down

0 comments on commit c7f3691

Please sign in to comment.