Skip to content

Commit

Permalink
Resolve inconsistency between bar_with_bootstrapped_uncertainty and d…
Browse files Browse the repository at this point in the history
…f_and_err_from_u_kln (#1324)

* Adds failing test
* Return max of MBAR error (if finite) or bootstrapped error
* Renamed `bar_with_bootstrapped_uncertainty` to `bar_with_pessimistic_uncertainty`
---------

Co-authored-by: Matt Wittmann <[email protected]>
Co-authored-by: Josh Fass <[email protected]>
  • Loading branch information
3 people authored Jun 10, 2024
1 parent c9191b5 commit ff9cb5f
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 18 deletions.
4 changes: 2 additions & 2 deletions attic/rabfe/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def deltaG_from_results(
fwd_delta_u = model.beta * delta_Us[lambda_idx][0]
rev_delta_u = model.beta * delta_Us[lambda_idx][1]

dG_exact, exact_bar_err = bar_with_bootstrapped_uncertainty(fwd_delta_u, rev_delta_u)
dG_exact, exact_bar_err = bar_with_pessimistic_uncertainty(fwd_delta_u, rev_delta_u)

bar_dG += dG_exact / model.beta
exact_bar_overlap = endpoint_correction.overlap_from_cdf(fwd_delta_u, rev_delta_u)
Expand Down Expand Up @@ -407,7 +407,7 @@ def deltaG_from_results(
rhs_xs=results[-1].xs,
seed=2021,
)
dG_endpoint, endpoint_err = bar_with_bootstrapped_uncertainty(
dG_endpoint, endpoint_err = bar_with_pessimistic_uncertainty(
model.beta * lhs_du, model.beta * np.array(rhs_du)
)
dG_endpoint = dG_endpoint / model.beta
Expand Down
Binary file added tests/data/zero_overlap_ukln.npz
Binary file not shown.
42 changes: 39 additions & 3 deletions tests/test_bar.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from functools import partial
from pathlib import Path
from typing import Tuple
from unittest.mock import patch

import numpy as np
import pymbar
Expand All @@ -8,7 +10,7 @@
from pymbar.testsystems import ExponentialTestCase

from timemachine.fe.bar import (
bar_with_bootstrapped_uncertainty,
bar_with_pessimistic_uncertainty,
bootstrap_bar,
compute_fwd_and_reverse_df_over_time,
df_and_err_from_u_kln,
Expand Down Expand Up @@ -83,8 +85,14 @@ def test_bootstrap_bar(sigma):

# estimate 3 times
df_ref, df_err_ref = df_and_err_from_u_kln(u_kln)
df_0, bootstrap_samples = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap)
df_1, bootstrap_sigma = bar_with_bootstrapped_uncertainty(u_kln)
df_0, ddf_0, bootstrap_samples = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap)
df_1, bootstrap_sigma = bar_with_pessimistic_uncertainty(u_kln)

# Full errors should match exactly
assert df_err_ref == ddf_0

# The bootstrapped error should be as large or larger than the full error
assert bootstrap_sigma >= df_err_ref

# assert estimates identical, uncertainties comparable
print(f"bootstrap uncertainty = {bootstrap_sigma}, pymbar.MBAR uncertainty = {df_err_ref}")
Expand Down Expand Up @@ -216,3 +224,31 @@ def test_compute_fwd_and_reverse_df_over_time(frames_per_step):
# The values at the end should be nearly identical since they contain all the samples
assert np.allclose(fwd[-1], rev[-1])
assert np.allclose(fwd_err[-1], rev_err[-1])


def test_bootstrap_bar_and_regular_bar_match():
"""In cases where the u_kln has effectively no overlap, bootstrapping returns 0.0
since the MBAR estimate is always zero. Checks that `bar_with_pessimistic_uncertainty` returns the (non-zero) error
from the MBAR estimate computed on all samples in these cases.
"""
test_ukln = Path(__file__).parent / "data" / "zero_overlap_ukln.npz"
u_kln = np.load(open(test_ukln, "rb"))["u_kln"]

# The overlap should be closer to zero
overlap = pair_overlap_from_ukln(u_kln)
np.testing.assert_allclose(overlap, 0.0, atol=1e-12)

boot_df, boot_df_err = bar_with_pessimistic_uncertainty(u_kln)
df, df_err = df_and_err_from_u_kln(u_kln)
assert boot_df == df
assert boot_df_err == df_err


@patch("timemachine.fe.bar.pymbar.MBAR.getFreeEnergyDifferences")
def test_nan_bar_error(mock_energy_diff):
df = np.zeros(shape=(1, 2))
df_err = np.ones(shape=(1, 2)) * np.nan
mock_energy_diff.return_value = (df, df_err)
dummy_ukln = np.ones(shape=(2, 2, 100))
_, boot_df_err = bar_with_pessimistic_uncertainty(dummy_ukln)
assert boot_df_err == 0.0
2 changes: 1 addition & 1 deletion tests/test_fe_absolute_hydration.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_run_solvent():

assert res.plots.overlap_summary_png is not None
assert res.plots.overlap_detail_png is not None
assert np.linalg.norm(res.final_result.dG_errs) < 10
assert np.linalg.norm(res.final_result.dG_errs) < 20.0
assert len(res.frames) == n_windows
assert len(res.boxes) == n_windows
assert len(res.frames[0]) == n_frames
Expand Down
49 changes: 39 additions & 10 deletions timemachine/fe/bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def df_from_u_kln(

def bootstrap_bar(
u_kln: NDArray, n_bootstrap: int = 100, maximum_iterations: int = DEFAULT_MAXIMUM_ITERATIONS
) -> Tuple[float, NDArray]:
) -> Tuple[float, float, NDArray]:
"""Given a 2-state u_kln matrix, subsample u_kln with replacement and re-run df_from_u_kln many times
Parameters
Expand All @@ -166,8 +166,12 @@ def bootstrap_bar(
Returns
-------
best_estimate : float
BAR(w_F, w_R, computeUncertainty=False)
bootstrap_samples: array
BAR(w_F, w_R)
best_estimate_err : float
MBAR(w_F, w_R) error estimate, using all samples
bootstrap_samples : array
shape (n_bootstrap,)
Notes
Expand All @@ -178,7 +182,9 @@ def bootstrap_bar(
u_kn, N_k = ukln_to_ukn(u_kln)
mbar = pymbar.MBAR(u_kn, N_k, maximum_iterations=maximum_iterations)

full_bar_result = mbar.getFreeEnergyDifferences(compute_uncertainty=False)[0][0, 1]
df, ddf = mbar.getFreeEnergyDifferences()
full_bar_result = df[0, 1]
full_bar_err = ddf[0, 1]

_, _, n = u_kln.shape

Expand All @@ -196,24 +202,47 @@ def bootstrap_bar(
)
bootstrap_samples.append(bar_result)

return full_bar_result, np.array(bootstrap_samples)
return full_bar_result, full_bar_err, np.array(bootstrap_samples)


def bar_with_bootstrapped_uncertainty(
def bar_with_pessimistic_uncertainty(
u_kln: NDArray, n_bootstrap=100, maximum_iterations: int = DEFAULT_MAXIMUM_ITERATIONS
) -> Tuple[float, float]:
"""Given 2-state u_kln, returns free energy difference and uncertainty computed by bootstrapping."""
"""Given 2-state u_kln, returns free energy difference and the uncertainty. The uncertainty can be produced either by
BAR using all samples or the bootstrapped error, whichever is greater.
Parameters
----------
u_kln : array
2-state u_kln matrix
n_bootstrap : int
number of bootstrap samples
maximum_iterations : int
maximum number of solver iterations to use for each sample
Returns
-------
best_estimate : float
BAR(w_F, w_R)
uncertainty : float
`max(error_estimates)` where `error_estimates = [bootstrapped_bar_stddev, two_state_mbar_uncertainty]`
"""

df, bootstrap_dfs = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap, maximum_iterations=maximum_iterations)
df, ddf, bootstrap_dfs = bootstrap_bar(u_kln, n_bootstrap=n_bootstrap, maximum_iterations=maximum_iterations)

# warn if bootstrap distribution deviates significantly from normality
normaltest_result = normaltest(bootstrap_dfs)
pvalue_threshold = 1e-3 # arbitrary, small
if normaltest_result.pvalue < pvalue_threshold:
logger.warning(f"bootstrapped errors non-normal: {normaltest_result}")

# regardless, summarize as if normal
ddf = np.std(bootstrap_dfs)
# Take the max of the BAR error estimate using all samples and the bootstrapped error. Summarize as if normal regardless
# Use np.maximum to always return the NaN
if not np.isfinite(ddf):
logger.warning(f"BAR error estimate is not finite, setting to zero: {ddf}")
ddf = 0.0
ddf = np.maximum(ddf, np.std(bootstrap_dfs))
return df, ddf


Expand Down
4 changes: 2 additions & 2 deletions timemachine/fe/free_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from timemachine.constants import BOLTZ
from timemachine.fe import model_utils, topology
from timemachine.fe.bar import (
bar_with_bootstrapped_uncertainty,
bar_with_pessimistic_uncertainty,
df_and_err_from_u_kln,
pair_overlap_from_ukln,
works_from_ukln,
Expand Down Expand Up @@ -696,7 +696,7 @@ def estimate_free_energy_bar(u_kln_by_component: NDArray, temperature: float) ->

u_kln = u_kln_by_component.sum(0)

df, df_err = bar_with_bootstrapped_uncertainty(u_kln) # reduced units
df, df_err = bar_with_pessimistic_uncertainty(u_kln) # reduced units

kBT = BOLTZ * temperature
dG, dG_err = df * kBT, df_err * kBT # kJ/mol
Expand Down

0 comments on commit ff9cb5f

Please sign in to comment.