From 3d633b33b5b537323c625cb955f0bfbfe0cb929d Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 16 Feb 2021 17:46:45 -0700 Subject: [PATCH 1/5] README typo fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 94400194..e3dd1094 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Tools and infrastructure for automated compound discovery using Folding@home. Run transformation and compound free energy analysis, producing `results/analysis.json`: ``` sh -fah-xchem run-analysis +fah-xchem run-analysis \ --compound-series-file compound-series.json \ --config-file config.json \ --fah-projects-dir /path/to/projects/ \ From 9cbbfb4dd220067b6d8471b2f6d9736c8392fc89 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 16 Feb 2021 19:16:57 -0700 Subject: [PATCH 2/5] Fixes for CI --- .github/workflows/CI.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 5df60f65..ce1f403c 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -43,14 +43,11 @@ jobs: df -h ulimit -a - # More info on options: https://github.com/goanpeca/setup-miniconda - - uses: goanpeca/setup-miniconda@v1 + - name: Configure conda + uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} environment-file: devtools/conda-envs/test_env.yaml - - channels: conda-forge,defaults,omnia - activate-environment: test auto-update-conda: true auto-activate-base: false From f032048da2dd659815bf4e26ee64bdb80ab81451 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 23 Feb 2021 15:35:16 -0700 Subject: [PATCH 3/5] Added `overwrite` flag; by default run-level snapshots and plots not regenerated --- fah_xchem/analysis/__init__.py | 3 ++ fah_xchem/analysis/plots.py | 47 ++++++++++++++++++++++++++------ fah_xchem/analysis/structures.py | 34 +++++++++++++++++++++-- fah_xchem/app.py | 7 +++++ 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py index 73dc57dd..6b7ea962 100644 --- a/fah_xchem/analysis/__init__.py +++ b/fah_xchem/analysis/__init__.py @@ -179,6 +179,7 @@ def generate_artifacts( plots: bool = True, report: bool = True, website: bool = True, + overwrite: bool = False, ) -> None: complex_project_dir = os.path.join( @@ -199,6 +200,7 @@ def generate_artifacts( max_binding_free_energy=config.max_binding_free_energy, cache_dir=cache_dir, num_procs=num_procs, + overwrite=overwrite, ) if plots: @@ -208,6 +210,7 @@ def generate_artifacts( timestamp=timestamp, output_dir=output_dir, num_procs=num_procs, + overwrite=overwrite, ) if snapshots and report: diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index c434494b..2ccf31f0 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -522,7 +522,9 @@ def save_plot( """ try: - yield + outfiles = [os.path.join(path, os.extsep.join([name, file_format])) + for file_format in file_formats] + yield outfiles if timestamp is not None: plt.tight_layout(rect=(0, 0.05, 1, 1)) # leave space for timestamp @@ -533,17 +535,20 @@ def save_plot( # Make sure the directory exists os.makedirs(path, exist_ok=True) - for file_format in file_formats: + for outfile in outfiles: plt.savefig( - os.path.join(path, os.extsep.join([name, file_format])), + outfile, transparent=True, ) + finally: plt.close() def generate_transformation_plots( - transformation: TransformationAnalysis, output_dir: str + transformation: TransformationAnalysis, + output_dir: str, + overwrite: bool = False, ): run_id = transformation.transformation.run_id @@ -551,7 +556,13 @@ def generate_transformation_plots( save_plot, path=os.path.join(output_dir, "transformations", f"RUN{run_id}") ) - with save_transformation_plot(name="works"): + with save_transformation_plot(name="works") as outfiles: + + # check if output files all exist; if so, skip unless we are told not to + if not overwrite: + if all(map(os.path.exists, outfiles)): + continue + fig = plot_work_distributions( complex_forward_works=[ work.forward @@ -578,7 +589,13 @@ def generate_transformation_plots( ) fig.suptitle(f"RUN{run_id}") - with save_transformation_plot(name="convergence"): + with save_transformation_plot(name="convergence") as outfiles: + + # check if output files all exist; if so, skip unless we are told not to + if not overwrite: + if all(map(os.path.exists, outfiles)): + continue + # Filter to GENs for which free energy calculation is available complex_gens = [ (gen.gen, gen.free_energy) @@ -603,7 +620,12 @@ def generate_transformation_plots( ) fig.suptitle(f"RUN{run_id}") - with save_transformation_plot(name="bootstrapped-CLONEs"): + with save_transformation_plot(name="bootstrapped-CLONEs") as outfiles: + + # check if output files all exist; if so, skip unless we are told not to + if not overwrite: + if all(map(os.path.exists, outfiles)): + continue # Gather CLONES per GEN for run clones_per_gen = min( @@ -633,6 +655,7 @@ def generate_plots( timestamp: dt.datetime, output_dir: str, num_procs: Optional[int] = None, + overwrite: bool = False, ) -> None: """ Generate analysis plots in `output_dir`. @@ -667,6 +690,11 @@ def generate_plots( "As of" timestamp to render on plots output_dir : str Where to write plot files + overwrite : bool + If `True`, write over existing output files if present. + Otherwise, skip writing output files for a given transformation when already present. + Assumes that for a given `run_id` the output files do not ever change; + does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed. """ from rich.progress import track @@ -683,6 +711,7 @@ def generate_plots( # Summary plots + # we always regenerate these, since they concern all data with save_summary_plot( name="relative_fe_dist", ): @@ -701,7 +730,9 @@ def generate_plots( # Transformation-level plots generate_transformation_plots_partial = partial( - generate_transformation_plots, output_dir=output_dir + generate_transformation_plots, + output_dir=output_dir, + overwrite=overwrite, ) with multiprocessing.Pool(num_procs) as pool: diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 1ca6aabb..18ac8b26 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -23,6 +23,16 @@ from ..schema import TransformationAnalysis +def _transformation_to_file_mapping(output_dir, run_id, ligand): + fnames = [f"{ligand}_protein.pdb", + f"{ligand}_complex.pdb", + f"{ligand}_ligand.sdf"] + + outfiles = [os.path.join(output_dir, f"RUN{run_id}", f"{fname}") for fname in fnames] + + return outfiles + + def load_trajectory( project_dir: str, project_data_dir: str, run: int, clone: int, gen: int ) -> md.Trajectory: @@ -298,6 +308,7 @@ def generate_representative_snapshot( output_dir: str, max_binding_free_energy: Optional[float], cache_dir: Optional[str] = None, + overwrite: bool = False, ) -> None: r""" @@ -311,6 +322,8 @@ def generate_representative_snapshot( Parameters ---------- + transformation: TransformationAnalysis + The transformation record to operate on. project_dir : str Path to project directory (e.g. '/home/server/server2/projects/13422') project_data_dir : str @@ -325,11 +338,20 @@ def generate_representative_snapshot( Path where snapshots will be written cache_dir : str or None, optional If specified, cache relevant parts of "htf.npz" file in a local directory of this name + overwrite : bool + If `True`, write over existing output files if present. + Otherwise, skip writing output files for a given transformation when already present. + Assumes that for a given `run_id` the output files do not ever change; + does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed. + Returns ------- None """ + # create output directory if not present + os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True) + run_id = transformation.transformation.run_id if ( max_binding_free_energy is not None @@ -348,6 +370,13 @@ def generate_representative_snapshot( ] for ligand in ["old", "new"]: + + # check if output files all exist; if so, skip unless we are told not to + if not overwrite: + outfiles = _transformation_to_file_mapping(output_dir, run_id, ligand) + if all(map(os.path.exists, outfiles)): + continue + if ligand == "old": gen_work = min(gen_works, key=lambda gen_work: gen_work[1].reverse) frame = 3 # TODO: Magic numbers @@ -355,7 +384,6 @@ def generate_representative_snapshot( gen_work = min(gen_works, key=lambda gen_work: gen_work[1].forward) frame = 1 # TODO: Magic numbers - run_id = transformation.transformation.run_id # Extract representative snapshot try: @@ -372,7 +400,6 @@ def generate_representative_snapshot( # Write protein PDB name = f"{ligand}_protein" - os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True) sliced_snapshots["protein"].save( os.path.join(output_dir, f"RUN{run_id}", f"{name}.pdb") @@ -395,6 +422,7 @@ def generate_representative_snapshot( except Exception as e: print(e) + def generate_representative_snapshots( transformations: List[TransformationAnalysis], project_dir: str, @@ -403,6 +431,7 @@ def generate_representative_snapshots( max_binding_free_energy: Optional[float], cache_dir: Optional[str], num_procs: Optional[int], + overwrite: bool = False, ) -> None: from rich.progress import track @@ -415,6 +444,7 @@ def generate_representative_snapshots( output_dir=output_dir, cache_dir=cache_dir, max_binding_free_energy=max_binding_free_energy, + overwrite=overwrite ), transformations, ) diff --git a/fah_xchem/app.py b/fah_xchem/app.py index f59226a3..fbf1b952 100644 --- a/fah_xchem/app.py +++ b/fah_xchem/app.py @@ -143,6 +143,7 @@ def generate_artifacts( website: bool = True, log: str = "WARN", fragalysis_config: Optional[str] = None, + overwrite: bool = False, ) -> None: """ Given results of free energy analysis as JSON, generate analysis @@ -186,6 +187,11 @@ def generate_artifacts( Logging level fragalysis_config : str, optional File containing information for Fragalysis upload as JSON-encoded :class: ~`fah_xchem.schema.FragalysisConfig` + overwrite : bool + If `True`, write over existing output files if present. + Otherwise, skip writing output files for a given transformation when already present. + Assumes that for a given `run_id` the output files do not ever change; + does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed. """ logging.basicConfig(level=getattr(logging, log.upper())) @@ -212,6 +218,7 @@ def generate_artifacts( report=report, website=website, fragalysis_config=fragalysis_config, + overwrite=overwrite, ) From 86a2474ace86537a23984af3a7adfb858cf082e7 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 23 Feb 2021 19:55:25 -0700 Subject: [PATCH 4/5] `save_plot` as a context manager was awkward for skipping Refactored as just a function that we call at the end; not sure if this could have a negative consequence for error handling, but shouls now allow us to do skipping if all plot files that would be produced for a run are already present. --- fah_xchem/analysis/plots.py | 102 +++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 42 deletions(-) diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 2ccf31f0..24bebb14 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -450,8 +450,9 @@ def plot_bootstrapped_clones( return fig -def _plot_updated_timestamp(timestamp: dt.datetime) -> None: - fig = plt.gcf() +def _plot_updated_timestamp(timestamp: dt.datetime, fig: plt.Figure = None) -> None: + if fig is None: + fig = plt.gcf() fig.text( 0.5, 0.03, @@ -490,10 +491,10 @@ def _save_table_pdf(path: str, name: str): logging.warning("Failed to save pdf table") -@contextmanager def save_plot( path: str, name: str, + fig: plt.Figure, file_formats: Iterable[str] = ("png", "pdf"), timestamp: Optional[dt.datetime] = None, ) -> Generator: @@ -516,33 +517,38 @@ def save_plot( Examples -------- - >>> with save_plot('example/plots', 'test_plot', 'png'): - >>> plt.plot(np.cos(np.linspace(-np.pi, np.pi))) - >>> plt.title("My cool plot") + >>> fig = plt.plot(np.cos(np.linspace(-np.pi, np.pi))) + >>> fig.title("My cool plot") + >>> save_plot('example/plots', 'test_plot', fig, 'png'): """ + outfiles = [os.path.join(path, os.extsep.join([name, file_format])) + for file_format in file_formats] + + if timestamp is not None: + fig.tight_layout(rect=(0, 0.05, 1, 1)) # leave space for timestamp + _plot_updated_timestamp(timestamp, fig) + else: + fig.tight_layout() - try: - outfiles = [os.path.join(path, os.extsep.join([name, file_format])) - for file_format in file_formats] - yield outfiles + # Make sure the directory exists + os.makedirs(path, exist_ok=True) - if timestamp is not None: - plt.tight_layout(rect=(0, 0.05, 1, 1)) # leave space for timestamp - _plot_updated_timestamp(timestamp) - else: - plt.tight_layout() + for outfile in outfiles: + fig.savefig( + outfile, + transparent=True, + ) - # Make sure the directory exists - os.makedirs(path, exist_ok=True) + plt.close(fig=fig) - for outfile in outfiles: - plt.savefig( - outfile, - transparent=True, - ) - finally: - plt.close() +def _plot_to_file_mapping( + path: str, + name: str, + file_formats: Iterable[str] = ("png", "pdf"), +) -> List: + return [os.path.join(path, os.extsep.join([name, file_format])) + for file_format in file_formats] def generate_transformation_plots( @@ -552,17 +558,22 @@ def generate_transformation_plots( ): run_id = transformation.transformation.run_id + + plot_output_dir = os.path.join(output_dir, "transformations", f"RUN{run_id}") save_transformation_plot = partial( - save_plot, path=os.path.join(output_dir, "transformations", f"RUN{run_id}") + save_plot, path=plot_output_dir ) - with save_transformation_plot(name="works") as outfiles: + name = "works" + # check if output files all exist; if so, skip unless we are told not to + skip = False + if not overwrite: + outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name) + if all(map(os.path.exists, outfiles)): + skip = True - # check if output files all exist; if so, skip unless we are told not to - if not overwrite: - if all(map(os.path.exists, outfiles)): - continue + if not skip: fig = plot_work_distributions( complex_forward_works=[ work.forward @@ -588,14 +599,17 @@ def generate_transformation_plots( solvent_delta_f=transformation.solvent_phase.free_energy.delta_f.point, ) fig.suptitle(f"RUN{run_id}") + save_transformation_plot(name=name, fig=fig) - with save_transformation_plot(name="convergence") as outfiles: - - # check if output files all exist; if so, skip unless we are told not to - if not overwrite: - if all(map(os.path.exists, outfiles)): - continue + name = "convergence" + # check if output files all exist; if so, skip unless we are told not to + skip = False + if not overwrite: + outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name) + if all(map(os.path.exists, outfiles)): + skip = True + if not skip: # Filter to GENs for which free energy calculation is available complex_gens = [ (gen.gen, gen.free_energy) @@ -619,14 +633,17 @@ def generate_transformation_plots( binding_delta_f_err=transformation.binding_free_energy.stderr, ) fig.suptitle(f"RUN{run_id}") + save_transformation_plot(name=name, fig=fig) - with save_transformation_plot(name="bootstrapped-CLONEs") as outfiles: - - # check if output files all exist; if so, skip unless we are told not to - if not overwrite: - if all(map(os.path.exists, outfiles)): - continue + name = "bootstrapped-CLONEs" + # check if output files all exist; if so, skip unless we are told not to + skip = False + if not overwrite: + outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name) + if all(map(os.path.exists, outfiles)): + skip = True + if not skip: # Gather CLONES per GEN for run clones_per_gen = min( [ @@ -648,6 +665,7 @@ def generate_transformation_plots( n_gens=n_gens, ) fig.suptitle(f"RUN{run_id}") + save_transformation_plot(name=name, fig=fig) def generate_plots( From 7933e2606add5a69de1b307040b28701dd455503 Mon Sep 17 00:00:00 2001 From: David Dotson Date: Tue, 18 May 2021 19:41:37 -0700 Subject: [PATCH 5/5] Hotfixes to plotting, etc. --- fah_xchem/analysis/plots.py | 47 ++++++++++++++++---------------- fah_xchem/analysis/structures.py | 2 +- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py index 24bebb14..1001b8e4 100644 --- a/fah_xchem/analysis/plots.py +++ b/fah_xchem/analysis/plots.py @@ -106,7 +106,7 @@ def _filter_inclusive( def plot_relative_distribution( relative_delta_fs: List[float], min_delta_f: float = -30, max_delta_f: float = 30 -) -> None: +) -> plt.Figure: """ Plot the distribution of relative free energies @@ -124,18 +124,20 @@ def plot_relative_distribution( ) valid_relative_delta_fs_kcal = valid_relative_delta_fs * KT_KCALMOL - sns.displot( - valid_relative_delta_fs_kcal, - kind="kde", - rug=True, - color="hotpink", - fill=True, - rug_kws=dict(alpha=0.5), - label=f"$N={len(relative_delta_fs)}$", - ) + fgrid = sns.displot( + valid_relative_delta_fs_kcal, + kind="kde", + rug=True, + color="hotpink", + fill=True, + rug_kws=dict(alpha=0.5), + label=f"$N={len(relative_delta_fs)}$", + ) plt.xlabel(r"Relative free energy to reference fragment / kcal mol$^{-1}$") plt.legend() + return fgrid.fig + def plot_convergence( complex_gens: List[int], @@ -322,7 +324,7 @@ def plot_cumulative_distribution( cmap: str = "PiYG", n_bins: int = 100, markers_kcal: List[float] = [-6, -5, -4, -3, -2, -1, 0, 1, 2], -) -> None: +) -> plt.Figure: """ Plot cumulative distribution of ligand affinities @@ -357,7 +359,8 @@ def plot_cumulative_distribution( x_span = X.max() - X.min() C = [cm(((X.max() - x) / x_span)) for x in X] - plt.bar(X[:-1], Y, color=C, width=X[1] - X[0], edgecolor="k") + fig, ax = plt.subplots() + ax.bar(X[:-1], Y, color=C, width=X[1] - X[0], edgecolor="k") for marker_kcal in markers_kcal: n_below = (relative_delta_fs_kcal < marker_kcal).astype(int).sum() @@ -373,6 +376,8 @@ def plot_cumulative_distribution( plt.xlabel(r"Relative free energy to reference fragment / kcal mol$^{-1}$") plt.ylabel("Cumulative $N$ ligands") + return fig + def _bootstrap( gens: List[GenAnalysis], @@ -730,17 +735,13 @@ def generate_plots( # Summary plots # we always regenerate these, since they concern all data - with save_summary_plot( - name="relative_fe_dist", - ): - plot_relative_distribution(binding_delta_fs) - plt.title("Relative free energy") - - with save_summary_plot( - name="cumulative_fe_dist", - ): - plot_cumulative_distribution(binding_delta_fs) - plt.title("Cumulative distribution") + fig = plot_relative_distribution(binding_delta_fs) + plt.title("Relative free energy") + save_summary_plot(name="relative_fe_dist", fig=fig) + + fig = plot_cumulative_distribution(binding_delta_fs) + plt.title("Cumulative distribution") + save_summary_plot(name="cumulative_fe_dist", fig=fig) with _save_table_pdf(path=output_dir, name="poor_complex_convergence_fe_table"): plot_poor_convergence_fe_table(series.transformations) diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py index 18ac8b26..232ac2b4 100644 --- a/fah_xchem/analysis/structures.py +++ b/fah_xchem/analysis/structures.py @@ -350,8 +350,8 @@ def generate_representative_snapshot( None """ # create output directory if not present - os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True) run_id = transformation.transformation.run_id + os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True) if ( max_binding_free_energy is not None