From 3d633b33b5b537323c625cb955f0bfbfe0cb929d Mon Sep 17 00:00:00 2001
From: David Dotson <dotsdl@gmail.com>
Date: Tue, 16 Feb 2021 17:46:45 -0700
Subject: [PATCH 1/5] README typo fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 94400194..e3dd1094 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ Tools and infrastructure for automated compound discovery using Folding@home.
 Run transformation and compound free energy analysis, producing `results/analysis.json`:
 
 ``` sh
-fah-xchem run-analysis
+fah-xchem run-analysis \
         --compound-series-file compound-series.json \
         --config-file config.json \
         --fah-projects-dir /path/to/projects/ \

From 9cbbfb4dd220067b6d8471b2f6d9736c8392fc89 Mon Sep 17 00:00:00 2001
From: David Dotson <dotsdl@gmail.com>
Date: Tue, 16 Feb 2021 19:16:57 -0700
Subject: [PATCH 2/5] Fixes for CI

---
 .github/workflows/CI.yaml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 5df60f65..ce1f403c 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -43,14 +43,11 @@ jobs:
         df -h
         ulimit -a
 
-    # More info on options: https://github.com/goanpeca/setup-miniconda
-    - uses: goanpeca/setup-miniconda@v1
+    - name: Configure conda
+      uses: conda-incubator/setup-miniconda@v2
       with:
         python-version: ${{ matrix.python-version }}
         environment-file: devtools/conda-envs/test_env.yaml
-
-        channels: conda-forge,defaults,omnia
-
         activate-environment: test
         auto-update-conda: true
         auto-activate-base: false

From f032048da2dd659815bf4e26ee64bdb80ab81451 Mon Sep 17 00:00:00 2001
From: David Dotson <dotsdl@gmail.com>
Date: Tue, 23 Feb 2021 15:35:16 -0700
Subject: [PATCH 3/5] Added `overwrite` flag; by default run-level snapshots
 and plots not regenerated

---
 fah_xchem/analysis/__init__.py   |  3 ++
 fah_xchem/analysis/plots.py      | 47 ++++++++++++++++++++++++++------
 fah_xchem/analysis/structures.py | 34 +++++++++++++++++++++--
 fah_xchem/app.py                 |  7 +++++
 4 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/fah_xchem/analysis/__init__.py b/fah_xchem/analysis/__init__.py
index 73dc57dd..6b7ea962 100644
--- a/fah_xchem/analysis/__init__.py
+++ b/fah_xchem/analysis/__init__.py
@@ -179,6 +179,7 @@ def generate_artifacts(
     plots: bool = True,
     report: bool = True,
     website: bool = True,
+    overwrite: bool = False,
 ) -> None:
 
     complex_project_dir = os.path.join(
@@ -199,6 +200,7 @@ def generate_artifacts(
             max_binding_free_energy=config.max_binding_free_energy,
             cache_dir=cache_dir,
             num_procs=num_procs,
+            overwrite=overwrite,
         )
 
     if plots:
@@ -208,6 +210,7 @@ def generate_artifacts(
             timestamp=timestamp,
             output_dir=output_dir,
             num_procs=num_procs,
+            overwrite=overwrite,
         )
 
     if snapshots and report:
diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py
index c434494b..2ccf31f0 100644
--- a/fah_xchem/analysis/plots.py
+++ b/fah_xchem/analysis/plots.py
@@ -522,7 +522,9 @@ def save_plot(
     """
 
     try:
-        yield
+        outfiles = [os.path.join(path, os.extsep.join([name, file_format]))
+                    for file_format in file_formats]
+        yield outfiles
 
         if timestamp is not None:
             plt.tight_layout(rect=(0, 0.05, 1, 1))  # leave space for timestamp
@@ -533,17 +535,20 @@ def save_plot(
         # Make sure the directory exists
         os.makedirs(path, exist_ok=True)
 
-        for file_format in file_formats:
+        for outfile in outfiles:
             plt.savefig(
-                os.path.join(path, os.extsep.join([name, file_format])),
+                outfile,
                 transparent=True,
             )
+
     finally:
         plt.close()
 
 
 def generate_transformation_plots(
-    transformation: TransformationAnalysis, output_dir: str
+    transformation: TransformationAnalysis,
+    output_dir: str,
+    overwrite: bool = False,
 ):
 
     run_id = transformation.transformation.run_id
@@ -551,7 +556,13 @@ def generate_transformation_plots(
         save_plot, path=os.path.join(output_dir, "transformations", f"RUN{run_id}")
     )
 
-    with save_transformation_plot(name="works"):
+    with save_transformation_plot(name="works") as outfiles:
+
+        # check if output files all exist; if so, skip unless we are told not to
+        if not overwrite:
+            if all(map(os.path.exists, outfiles)):
+                continue
+
         fig = plot_work_distributions(
             complex_forward_works=[
                 work.forward
@@ -578,7 +589,13 @@ def generate_transformation_plots(
         )
         fig.suptitle(f"RUN{run_id}")
 
-    with save_transformation_plot(name="convergence"):
+    with save_transformation_plot(name="convergence") as outfiles:
+
+        # check if output files all exist; if so, skip unless we are told not to
+        if not overwrite:
+            if all(map(os.path.exists, outfiles)):
+                continue
+
         # Filter to GENs for which free energy calculation is available
         complex_gens = [
             (gen.gen, gen.free_energy)
@@ -603,7 +620,12 @@ def generate_transformation_plots(
         )
         fig.suptitle(f"RUN{run_id}")
 
-    with save_transformation_plot(name="bootstrapped-CLONEs"):
+    with save_transformation_plot(name="bootstrapped-CLONEs") as outfiles:
+
+        # check if output files all exist; if so, skip unless we are told not to
+        if not overwrite:
+            if all(map(os.path.exists, outfiles)):
+                continue
 
         # Gather CLONES per GEN for run
         clones_per_gen = min(
@@ -633,6 +655,7 @@ def generate_plots(
     timestamp: dt.datetime,
     output_dir: str,
     num_procs: Optional[int] = None,
+    overwrite: bool = False,
 ) -> None:
     """
     Generate analysis plots in `output_dir`.
@@ -667,6 +690,11 @@ def generate_plots(
         "As of" timestamp to render on plots
     output_dir : str
         Where to write plot files
+    overwrite : bool
+        If `True`, write over existing output files if present.
+        Otherwise, skip writing output files for a given transformation when already present.
+        Assumes that for a given `run_id` the output files do not ever change;
+        does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed.
     """
     from rich.progress import track
 
@@ -683,6 +711,7 @@ def generate_plots(
 
     # Summary plots
 
+    # we always regenerate these, since they concern all data
     with save_summary_plot(
         name="relative_fe_dist",
     ):
@@ -701,7 +730,9 @@ def generate_plots(
     # Transformation-level plots
 
     generate_transformation_plots_partial = partial(
-        generate_transformation_plots, output_dir=output_dir
+        generate_transformation_plots,
+        output_dir=output_dir,
+        overwrite=overwrite,
     )
 
     with multiprocessing.Pool(num_procs) as pool:
diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py
index 1ca6aabb..18ac8b26 100644
--- a/fah_xchem/analysis/structures.py
+++ b/fah_xchem/analysis/structures.py
@@ -23,6 +23,16 @@
 from ..schema import TransformationAnalysis
 
 
+def _transformation_to_file_mapping(output_dir, run_id, ligand):
+    fnames = [f"{ligand}_protein.pdb",
+              f"{ligand}_complex.pdb",
+              f"{ligand}_ligand.sdf"]
+
+    outfiles = [os.path.join(output_dir, f"RUN{run_id}", f"{fname}") for fname in fnames]
+
+    return outfiles
+
+
 def load_trajectory(
     project_dir: str, project_data_dir: str, run: int, clone: int, gen: int
 ) -> md.Trajectory:
@@ -298,6 +308,7 @@ def generate_representative_snapshot(
     output_dir: str,
     max_binding_free_energy: Optional[float],
     cache_dir: Optional[str] = None,
+    overwrite: bool = False,
 ) -> None:
 
     r"""
@@ -311,6 +322,8 @@ def generate_representative_snapshot(
 
     Parameters
     ----------
+    transformation: TransformationAnalysis
+        The transformation record to operate on.
     project_dir : str
         Path to project directory (e.g. '/home/server/server2/projects/13422')
     project_data_dir : str
@@ -325,11 +338,20 @@ def generate_representative_snapshot(
         Path where snapshots will be written
     cache_dir : str or None, optional
         If specified, cache relevant parts of "htf.npz" file in a local directory of this name
+    overwrite : bool
+        If `True`, write over existing output files if present.
+        Otherwise, skip writing output files for a given transformation when already present.
+        Assumes that for a given `run_id` the output files do not ever change;
+        does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed.
+
 
     Returns
     -------
     None
     """
+    # create output directory if not present
+    os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True)
+    run_id = transformation.transformation.run_id
 
     if (
         max_binding_free_energy is not None
@@ -348,6 +370,13 @@ def generate_representative_snapshot(
     ]
 
     for ligand in ["old", "new"]:
+
+        # check if output files all exist; if so, skip unless we are told not to
+        if not overwrite:
+            outfiles = _transformation_to_file_mapping(output_dir, run_id, ligand)
+            if all(map(os.path.exists, outfiles)):
+                continue
+
         if ligand == "old":
             gen_work = min(gen_works, key=lambda gen_work: gen_work[1].reverse)
             frame = 3  # TODO: Magic numbers
@@ -355,7 +384,6 @@ def generate_representative_snapshot(
             gen_work = min(gen_works, key=lambda gen_work: gen_work[1].forward)
             frame = 1  # TODO: Magic numbers
 
-        run_id = transformation.transformation.run_id
 
         # Extract representative snapshot
         try:
@@ -372,7 +400,6 @@ def generate_representative_snapshot(
             
             # Write protein PDB
             name = f"{ligand}_protein"
-            os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True)
             
             sliced_snapshots["protein"].save(
                 os.path.join(output_dir, f"RUN{run_id}", f"{name}.pdb")
@@ -395,6 +422,7 @@ def generate_representative_snapshot(
         except Exception as e:
             print(e)
 
+
 def generate_representative_snapshots(
     transformations: List[TransformationAnalysis],
     project_dir: str,
@@ -403,6 +431,7 @@ def generate_representative_snapshots(
     max_binding_free_energy: Optional[float],
     cache_dir: Optional[str],
     num_procs: Optional[int],
+    overwrite: bool = False,
 ) -> None:
     from rich.progress import track
 
@@ -415,6 +444,7 @@ def generate_representative_snapshots(
                 output_dir=output_dir,
                 cache_dir=cache_dir,
                 max_binding_free_energy=max_binding_free_energy,
+                overwrite=overwrite
             ),
             transformations,
         )
diff --git a/fah_xchem/app.py b/fah_xchem/app.py
index f59226a3..fbf1b952 100644
--- a/fah_xchem/app.py
+++ b/fah_xchem/app.py
@@ -143,6 +143,7 @@ def generate_artifacts(
     website: bool = True,
     log: str = "WARN",
     fragalysis_config: Optional[str] = None,
+    overwrite: bool = False,
 ) -> None:
     """
     Given results of free energy analysis as JSON, generate analysis
@@ -186,6 +187,11 @@ def generate_artifacts(
         Logging level
     fragalysis_config : str, optional
         File containing information for Fragalysis upload as JSON-encoded :class: ~`fah_xchem.schema.FragalysisConfig`
+    overwrite : bool
+        If `True`, write over existing output files if present.
+        Otherwise, skip writing output files for a given transformation when already present.
+        Assumes that for a given `run_id` the output files do not ever change;
+        does *no* checking that files wouldn't be different if inputs for a given `run_id` have changed.
     """
 
     logging.basicConfig(level=getattr(logging, log.upper()))
@@ -212,6 +218,7 @@ def generate_artifacts(
         report=report,
         website=website,
         fragalysis_config=fragalysis_config,
+        overwrite=overwrite,
     )
 
 

From 86a2474ace86537a23984af3a7adfb858cf082e7 Mon Sep 17 00:00:00 2001
From: David Dotson <dotsdl@gmail.com>
Date: Tue, 23 Feb 2021 19:55:25 -0700
Subject: [PATCH 4/5] `save_plot` as a context manager was awkward for skipping

Refactored as just a function that we call at the end; not sure if this
could have a negative consequence for error handling, but shouls now
allow us to do skipping if all plot files that would be produced for a run
are already present.
---
 fah_xchem/analysis/plots.py | 102 +++++++++++++++++++++---------------
 1 file changed, 60 insertions(+), 42 deletions(-)

diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py
index 2ccf31f0..24bebb14 100644
--- a/fah_xchem/analysis/plots.py
+++ b/fah_xchem/analysis/plots.py
@@ -450,8 +450,9 @@ def plot_bootstrapped_clones(
     return fig
 
 
-def _plot_updated_timestamp(timestamp: dt.datetime) -> None:
-    fig = plt.gcf()
+def _plot_updated_timestamp(timestamp: dt.datetime, fig: plt.Figure = None) -> None:
+    if fig is None:
+        fig = plt.gcf()
     fig.text(
         0.5,
         0.03,
@@ -490,10 +491,10 @@ def _save_table_pdf(path: str, name: str):
             logging.warning("Failed to save pdf table")
 
 
-@contextmanager
 def save_plot(
     path: str,
     name: str,
+    fig: plt.Figure,
     file_formats: Iterable[str] = ("png", "pdf"),
     timestamp: Optional[dt.datetime] = None,
 ) -> Generator:
@@ -516,33 +517,38 @@ def save_plot(
 
     Examples
     --------
-    >>> with save_plot('example/plots', 'test_plot', 'png'):
-    >>>     plt.plot(np.cos(np.linspace(-np.pi, np.pi)))
-    >>>     plt.title("My cool plot")
+    >>> fig = plt.plot(np.cos(np.linspace(-np.pi, np.pi)))
+    >>> fig.title("My cool plot")
+    >>> save_plot('example/plots', 'test_plot', fig, 'png'):
     """
+    outfiles = [os.path.join(path, os.extsep.join([name, file_format]))
+                for file_format in file_formats]
+
+    if timestamp is not None:
+        fig.tight_layout(rect=(0, 0.05, 1, 1))  # leave space for timestamp
+        _plot_updated_timestamp(timestamp, fig)
+    else:
+        fig.tight_layout()
 
-    try:
-        outfiles = [os.path.join(path, os.extsep.join([name, file_format]))
-                    for file_format in file_formats]
-        yield outfiles
+    # Make sure the directory exists
+    os.makedirs(path, exist_ok=True)
 
-        if timestamp is not None:
-            plt.tight_layout(rect=(0, 0.05, 1, 1))  # leave space for timestamp
-            _plot_updated_timestamp(timestamp)
-        else:
-            plt.tight_layout()
+    for outfile in outfiles:
+        fig.savefig(
+            outfile,
+            transparent=True,
+        )
 
-        # Make sure the directory exists
-        os.makedirs(path, exist_ok=True)
+    plt.close(fig=fig)
 
-        for outfile in outfiles:
-            plt.savefig(
-                outfile,
-                transparent=True,
-            )
 
-    finally:
-        plt.close()
+def _plot_to_file_mapping(
+    path: str,
+    name: str,
+    file_formats: Iterable[str] = ("png", "pdf"),
+) -> List:
+    return [os.path.join(path, os.extsep.join([name, file_format]))
+            for file_format in file_formats]
 
 
 def generate_transformation_plots(
@@ -552,17 +558,22 @@ def generate_transformation_plots(
 ):
 
     run_id = transformation.transformation.run_id
+
+    plot_output_dir = os.path.join(output_dir, "transformations", f"RUN{run_id}")
     save_transformation_plot = partial(
-        save_plot, path=os.path.join(output_dir, "transformations", f"RUN{run_id}")
+        save_plot, path=plot_output_dir
     )
 
-    with save_transformation_plot(name="works") as outfiles:
+    name = "works"
+    # check if output files all exist; if so, skip unless we are told not to
+    skip = False
+    if not overwrite:
+        outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name)
+        if all(map(os.path.exists, outfiles)):
+            skip = True
 
-        # check if output files all exist; if so, skip unless we are told not to
-        if not overwrite:
-            if all(map(os.path.exists, outfiles)):
-                continue
 
+    if not skip:
         fig = plot_work_distributions(
             complex_forward_works=[
                 work.forward
@@ -588,14 +599,17 @@ def generate_transformation_plots(
             solvent_delta_f=transformation.solvent_phase.free_energy.delta_f.point,
         )
         fig.suptitle(f"RUN{run_id}")
+        save_transformation_plot(name=name, fig=fig)
 
-    with save_transformation_plot(name="convergence") as outfiles:
-
-        # check if output files all exist; if so, skip unless we are told not to
-        if not overwrite:
-            if all(map(os.path.exists, outfiles)):
-                continue
+    name = "convergence"
+    # check if output files all exist; if so, skip unless we are told not to
+    skip = False
+    if not overwrite:
+        outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name)
+        if all(map(os.path.exists, outfiles)):
+            skip = True
 
+    if not skip:
         # Filter to GENs for which free energy calculation is available
         complex_gens = [
             (gen.gen, gen.free_energy)
@@ -619,14 +633,17 @@ def generate_transformation_plots(
             binding_delta_f_err=transformation.binding_free_energy.stderr,
         )
         fig.suptitle(f"RUN{run_id}")
+        save_transformation_plot(name=name, fig=fig)
 
-    with save_transformation_plot(name="bootstrapped-CLONEs") as outfiles:
-
-        # check if output files all exist; if so, skip unless we are told not to
-        if not overwrite:
-            if all(map(os.path.exists, outfiles)):
-                continue
+    name = "bootstrapped-CLONEs"
+    # check if output files all exist; if so, skip unless we are told not to
+    skip = False
+    if not overwrite:
+        outfiles = _plot_to_file_mapping(path=plot_output_dir, name=name)
+        if all(map(os.path.exists, outfiles)):
+            skip = True
 
+    if not skip:
         # Gather CLONES per GEN for run
         clones_per_gen = min(
             [
@@ -648,6 +665,7 @@ def generate_transformation_plots(
             n_gens=n_gens,
         )
         fig.suptitle(f"RUN{run_id}")
+        save_transformation_plot(name=name, fig=fig)
 
 
 def generate_plots(

From 7933e2606add5a69de1b307040b28701dd455503 Mon Sep 17 00:00:00 2001
From: David Dotson <dotsdl@gmail.com>
Date: Tue, 18 May 2021 19:41:37 -0700
Subject: [PATCH 5/5] Hotfixes to plotting, etc.

---
 fah_xchem/analysis/plots.py      | 47 ++++++++++++++++----------------
 fah_xchem/analysis/structures.py |  2 +-
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/fah_xchem/analysis/plots.py b/fah_xchem/analysis/plots.py
index 24bebb14..1001b8e4 100644
--- a/fah_xchem/analysis/plots.py
+++ b/fah_xchem/analysis/plots.py
@@ -106,7 +106,7 @@ def _filter_inclusive(
 
 def plot_relative_distribution(
     relative_delta_fs: List[float], min_delta_f: float = -30, max_delta_f: float = 30
-) -> None:
+) -> plt.Figure:
     """
     Plot the distribution of relative free energies
 
@@ -124,18 +124,20 @@ def plot_relative_distribution(
     )
     valid_relative_delta_fs_kcal = valid_relative_delta_fs * KT_KCALMOL
 
-    sns.displot(
-        valid_relative_delta_fs_kcal,
-        kind="kde",
-        rug=True,
-        color="hotpink",
-        fill=True,
-        rug_kws=dict(alpha=0.5),
-        label=f"$N={len(relative_delta_fs)}$",
-    )
+    fgrid = sns.displot(
+            valid_relative_delta_fs_kcal,
+            kind="kde",
+            rug=True,
+            color="hotpink",
+            fill=True,
+            rug_kws=dict(alpha=0.5),
+            label=f"$N={len(relative_delta_fs)}$",
+        )
     plt.xlabel(r"Relative free energy to reference fragment / kcal mol$^{-1}$")
     plt.legend()
 
+    return fgrid.fig
+
 
 def plot_convergence(
     complex_gens: List[int],
@@ -322,7 +324,7 @@ def plot_cumulative_distribution(
     cmap: str = "PiYG",
     n_bins: int = 100,
     markers_kcal: List[float] = [-6, -5, -4, -3, -2, -1, 0, 1, 2],
-) -> None:
+) -> plt.Figure:
     """
     Plot cumulative distribution of ligand affinities
 
@@ -357,7 +359,8 @@ def plot_cumulative_distribution(
     x_span = X.max() - X.min()
     C = [cm(((X.max() - x) / x_span)) for x in X]
 
-    plt.bar(X[:-1], Y, color=C, width=X[1] - X[0], edgecolor="k")
+    fig, ax  = plt.subplots()
+    ax.bar(X[:-1], Y, color=C, width=X[1] - X[0], edgecolor="k")
 
     for marker_kcal in markers_kcal:
         n_below = (relative_delta_fs_kcal < marker_kcal).astype(int).sum()
@@ -373,6 +376,8 @@ def plot_cumulative_distribution(
     plt.xlabel(r"Relative free energy to reference fragment / kcal mol$^{-1}$")
     plt.ylabel("Cumulative $N$ ligands")
 
+    return fig
+
 
 def _bootstrap(
     gens: List[GenAnalysis],
@@ -730,17 +735,13 @@ def generate_plots(
     # Summary plots
 
     # we always regenerate these, since they concern all data
-    with save_summary_plot(
-        name="relative_fe_dist",
-    ):
-        plot_relative_distribution(binding_delta_fs)
-        plt.title("Relative free energy")
-
-    with save_summary_plot(
-        name="cumulative_fe_dist",
-    ):
-        plot_cumulative_distribution(binding_delta_fs)
-        plt.title("Cumulative distribution")
+    fig = plot_relative_distribution(binding_delta_fs)
+    plt.title("Relative free energy")
+    save_summary_plot(name="relative_fe_dist", fig=fig)
+
+    fig = plot_cumulative_distribution(binding_delta_fs)
+    plt.title("Cumulative distribution")
+    save_summary_plot(name="cumulative_fe_dist", fig=fig)
         
     with _save_table_pdf(path=output_dir, name="poor_complex_convergence_fe_table"):
         plot_poor_convergence_fe_table(series.transformations)
diff --git a/fah_xchem/analysis/structures.py b/fah_xchem/analysis/structures.py
index 18ac8b26..232ac2b4 100644
--- a/fah_xchem/analysis/structures.py
+++ b/fah_xchem/analysis/structures.py
@@ -350,8 +350,8 @@ def generate_representative_snapshot(
     None
     """
     # create output directory if not present
-    os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True)
     run_id = transformation.transformation.run_id
+    os.makedirs(os.path.join(output_dir, f"RUN{run_id}"), exist_ok=True)
 
     if (
         max_binding_free_energy is not None