diff --git a/mdpow/analysis/automation/dihedral/automated_dihedral_analysis.py b/mdpow/analysis/automation/dihedral/automated_dihedral_analysis.py index 840c5c04..9e2eba5e 100644 --- a/mdpow/analysis/automation/dihedral/automated_dihedral_analysis.py +++ b/mdpow/analysis/automation/dihedral/automated_dihedral_analysis.py @@ -1,3 +1,6 @@ +# MDPOW: automated_dihedral_analysis.py +# 2022 Cade Duckworth + import numpy as np import pandas as pd import os @@ -14,9 +17,34 @@ import MDAnalysis as mda from MDAnalysis.topology.guessers import guess_atom_element - - -def dihedral_indices(datadir, resname): +'''Contains an assortment of functions useful for automation + of DihedralAnalysis workflow. See each function for + requirements, usage, output, or examples. + + Most can be used as standalone or in combination for desired purposes. + Complete automation encompassed in automated_dihedral_analysis(). +''' + +def dihedral_indices(datadir, resname): + '''Requires an MDPOW project directory and resname + as input. Uses the topology and trajectory from + a location that should be present in all MDPOW + projects and creates an MDAnalysis Universe object. + Uses resname to select the solute for conversion + to RDKit, and will add Hydrogen if not listed + in the topology. Uses a SMARTS string to obtain all + relevant dihedral atom groups and returns their indices + for use by dihedral_groups and dihedral_groups_ensemble. + + :arguments: + + *datadir : string* + path to the location of MDPOW project data + + *resname : string* + resname for the molecule as defined in + the topology and trajectory + ''' path = pathlib.Path(datadir) topology = path / 'FEP/water/Coulomb/0000' / 'md.tpr' @@ -32,12 +60,14 @@ def dihedral_indices(datadir, resname): mol = solute.convert_to('RDKIT') pattern = Chem.MolFromSmarts('[!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]') - ''' + + '''SMARTS string that identifies relevant dihedral atom groups + [!#1] : any atom, not Hydrogen ~ : any bond - !$(*#*)&!D1] : any atom that is not part of linear triple bond + [!$(*#*)&!D1] : any atom that is not part of linear triple bond and not atom with 1 explicit bond - -!@ : single bond that is not ring bond (or is it not aromatic?) + -!@ : single bond that is not ring bond [!$(*#*)&!D1]-!@[!$(*#*)&!D1] : the central portion selects two atoms that are not involved in a triple bond and are not terminal, @@ -46,15 +76,29 @@ def dihedral_indices(datadir, resname): [!#1] : the first and last portion specify any bond, to any atom that is not hydrogen ''' - #^verify and update this docstring bonds = mol.GetSubstructMatches(pattern) return bonds def dihedral_groups(datadir, resname): + '''Requires an MDPOW project directory and resname + as input. Expands upon usage of dihedral_indices() + to return an array of the names of each atom within + its respective dihedral atom group as identified by + the SMARTS selection string. Not necessary for automation, + useful for verifying all atom groups of interest are + properly identified before analysis. + + :arguments: + + *datadir : string* + path to the location of MDPOW project data + + *resname : string* + resname for the molecule as defined in + the topology and trajectory + ''' - #uses a standard, expected file location to - #determine structure and obtain dihedral atom groups path = pathlib.Path(datadir) topology = path / 'FEP/water/Coulomb/0000' / 'md.tpr' trajectory = path / 'FEP/water/Coulomb/0000' / 'md.xtc' @@ -68,6 +112,9 @@ def dihedral_groups(datadir, resname): return dihedral_groups def add_hydrogens(u): + '''Used by dihedral_indices() for proper conversion + to RDKit. Adds Hydrogen if not listed in the topology. + ''' elements = [guess_atom_element(name) for name in u.atoms.names] u.add_TopologyAttr("elements", elements) @@ -78,7 +125,21 @@ def dihedral_groups_ensemble(bonds, datadir, solvents=('water','octanol'), interactions=('Coulomb','VDW'), start=None, stop=None, step=None): - #need to bounce out ensemble kwargs + '''Creates one ensemble for the MDPOW project and runs + DihedralAnalysis for each dihedral atom group identified + by the SMARTS selection string. For keyword arguments + see automated_dihedral_analysis() or DihedralAnalysis. + + :arguments: + + *bonds* + bond indices for dihedral atom groups + see dihedral_indices() + + *datadir : string* + path to the location of MDPOW project data + ''' + dih_ens = mdpow.analysis.ensemble.Ensemble(dirname=datadir, solvents=solvents, interactions=interactions) @@ -94,7 +155,34 @@ def dihedral_groups_ensemble(bonds, datadir, return df -def periodic_angle(df, padding=45): +def periodic_angle(df, padding=45): + '''Takes a Pandas DataFrame of results from DihedralAnalysis + as input and pads the angles to maintain periodicity + for properly plotting dihedral angle frequencies as KDE violins + with dihedral_violins(). Creates two new DataFrames based on the + cutoff value specified, adds to the angle values, concatenates + all three DataFrames, maintaining original data and adding padding, + and returns new augmented DataFrame. + + :arguments: + + *df : Pandas DataFrame* + results DataFrame from DihedralAnalysis + + :keywords: + + *padding : int* + value must be in degrees + default set to 45 + + Example: + + da = DihedralAnalysis(all_dihedrals) + da.run(start=0, stop=100, step=10) + df = da.results + df_aug = periodic_angle(df, padding=45) + plot = dihedral_violins(df_aug, width=0.9) + ''' df1 = df[df.dihedral > 180 - padding] df1.dihedral -= 360 @@ -105,19 +193,23 @@ def periodic_angle(df, padding=45): return df_aug def dihedral_violins(df, width=0.9): - #need to fix plotting code to match solvents, interactions - """Plot distributions of all dihedrals as violin plots. - - Parameters - ---------- - df : DataFrame - width : float - width of the violin element (>1 overlaps) - - Returns - ------- - seaborn.FacetGrid - """ + '''Plots distributions of dihedral angles for + one dihedral atom group as violin plots, + using as input the augmented DataFrame from periodic_angle(). + Returns a figure in the form of seaborn.FacetGrid. + See periodic_angle() for example. + + :arguments: + + *df : Pandas DataFrame* + augmented DataFrame from periodic_angle() + + :keywords: + + *width : float* + width of the violin element (>1 overlaps) + ''' + df['lambda'] = df['lambda'].astype('float') / 1000 df = df.sort_values(by=["selection", "solvent", @@ -154,7 +246,13 @@ def dihedral_violins(df, width=0.9): return g -def plot_violins(df, resname, figdir=None, molname=None, width=0.9): +def plot_violins(df, resname, figdir=None, molname=None, width=0.9): + '''Coordinates plotting and optionally saving figures + for all dihedral atom groups. Makes a subdirectory within + the specified figure directory using rename or molname provided. + See automated_dihedral_analysis(), dihedral_violins() + Returns figures in the form of seaborn.FacetGrid. + ''' if molname is None: molname = resname @@ -181,7 +279,69 @@ def automated_dihedral_analysis(datadir=None, figdir=None, dataframe=None, padding=45, width=0.9, solvents=('water','octanol'), interactions=('Coulomb','VDW'), - start=None, stop=None, step=None): + start=None, stop=None, step=None): + '''For one MDPOW project, automatically determines all relevant + dihedral atom groups, conducts DihedralAnalysis for each group, + pads the dihedral angles from analysis results for all groups, + creates KDE violin plots of dihedral angle frequencies for each group, + and optionally saves figure for each group as pdf in provided figure directory. + Returns KDE violin plot figure for each atom group as seaborn.FacetGrid. + + :keywords: + + *datadir : string* + path to the location of MDPOW project data + + *figdir : string* + optional, path to an existing directory where plots + can be saved for each dihedral atom group + + *resname : string* + resname for the molecule as defined in + the topology and trajectory + + *molname : string* + molecule name to be used for labelling + plots, if different from resname + + *dataframe : Pandas DataFrame* + optional, if DihedralAnalysis was done prior, + then results DataFrame can be input to utilize + angle padding and violin plotting functionality + + *padding : int* + must be in degrees, values for angle padding function + used for KDE violin plots of dihedral angle frequencies + + *width : float* + used for violin plots + width of violins, (>1 overlaps) + see automated_dihedral_analysis.dihedral_violins() + + *solvents : tuple* + solvent specifications for use by DihedralAnalysis + see DihedralAnalysis + + *interactions : tuple* + interaction specifications for use by DihedralAnalysis + see DihedralAnalysis + + *start,stop,step : int* + run _single_frame() analysis parameters for use by DihedralAnalysis + see DihedralAnalysis + + Example: + + import automated_dihedral_analysis as ada + + ada.automated_dihedral_analysis(datadir='/foo/bar/MDPOW_project_data', + figdir='/foo/bar/MDPOW_figure_directory', + resname='UNK', molname='benzene', + padding=45, width=0.9, + solvents=('water','octanol'), + interactions=('Coulomb','VDW'), + start=0, stop=100, step=10) + ''' bonds = dihedral_indices(datadir=datadir, resname=resname)