diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 217ef3203..d68e16131 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -13,29 +13,43 @@ on:
     #   (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule)
     - cron: "0 0 * * *"
 
+concurrency:
+  group: "${{ github.workflow }}-${{ github.ref }}"
+  cancel-in-progress: true
+
 jobs:
   test:
-    name: Test on ${{ matrix.cfg.os }}, Python ${{ matrix.cfg.python-version }}, OpenMM ${{ matrix.cfg.openmm }}
-    runs-on: ${{ matrix.cfg.os }}
+    name: ${{ matrix.os }}, py-${{ matrix.python-version }}, OpenMM-${{ matrix.openmm }}, pymbar-${{ matrix.pymbar-version }}
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         cfg:
-          - { os: macOS-latest,   python-version: '3.8', openmm: latest  }
-          - { os: windows-latest, python-version: '3.8.10', openmm: latest  }
-          - { os: ubuntu-latest,  python-version: '3.8', openmm: latest  }
-          - { os: ubuntu-latest,  python-version: '3.9', openmm: latest  }
-          - { os: ubuntu-latest,  python-version: '3.8', openmm: nightly }
-          - { os: ubuntu-latest,  python-version: '3.10', openmm: latest  }
-          - { os: ubuntu-latest,  python-version: '3.8', openmm: beta  }
-          - { os: ubuntu-latest,  python-version: '3.9', openmm: beta  }
+          - { os: macOS-latest,   python-version: '3.8', openmm: dev  }
+          - { os: windows-latest, python-version: '3.8.10', openmm: dev  }
+          - { os: ubuntu-latest,  python-version: '3.9', openmm: dev  }
+          - { os: ubuntu-latest,  python-version: '3.10', openmm: dev  }
 
     env:
       OPENMM: ${{ matrix.cfg.openmm }}
+        python-version: ["3.9", "3.10"]
+        openmm: ["7.7", "8.0"]
+        os: [macOS-latest, ubuntu-latest]
+        pymbar-version: ["4"]
+        include:
+          # Test openmm dev build on newest python + linux
+          - openmm: "dev"
+            python-version: "3.10"
+            os: ubuntu-latest
+            pymbar-version: "4"
+          # Have one job test pymbar 3 support
+          - openmm: "dev"
+            python-version: "3.10"
+            os: ubuntu-latest
+            pymbar-version: "3"
 
     steps:
-      - uses: actions/checkout@v1
-
+      - uses: actions/checkout@v3
       - name: Additional info about the build
         shell: bash
         run: |
@@ -43,44 +57,35 @@ jobs:
           df -h
           ulimit -a
 
-      # More info on options: https://github.com/conda-incubator/setup-miniconda
-      - uses: conda-incubator/setup-miniconda@v2
+      - name: Setup micromamba for openmm dev
+        uses: mamba-org/provision-with-micromamba@main
+        if: ${{ matrix.openmm == 'dev' }}
         with:
-          python-version: ${{ matrix.cfg.python-version }}
+          channels: jaimergp/label/unsupported-cudatoolkit-shim,conda-forge/label/openmm_rc,conda-forge
           environment-file: devtools/conda-envs/test_env.yaml
-          channels: conda-forge,defaults
-          activate-environment: test
-          auto-update-conda: true
-          auto-activate-base: false
-          show-channel-urls: true
-
-      - name: Install OpenMM nightly
-        shell: bash -l {0}
-        if: matrix.cfg.openmm == 'nightly'
-        run: |
-          conda install --yes -c omnia-dev openmm
-
-      - name: Install OpenMM beta
-        shell: bash -l {0}
-        if: matrix.cfg.openmm == 'beta'
-        run: |
-          conda install -c conda-forge/label/openmm_rc -c conda-forge openmm
-
+          channel-priority: flexible
+          environment-name: openmmtools-test
+          extra-specs: |
+            python==${{ matrix.python-version }}
+            openmm==8.1
+            pymbar==${{ matrix.pymbar-version }}.*
+            
       - name: Install package
         shell: bash -l {0}
         run: |
           python -m pip install . --no-deps
-          conda list
+          micromamba list
+          micromamba info
 
       - name: Run tests
         shell: bash -l {0}
         run: |
           # pytest -v --cov=openmmtools --cov-report=xml --color=yes openmmtools/tests/
-          nosetests openmmtools/tests --nocapture --verbosity=2 --with-timer --with-doctest -a '!slow'
+          nosetests openmmtools/tests --nocapture --cover-tests --with-coverage --cover-package=openmmtools --cover-xml --cover-xml-file=coverage.xml --verbosity=2 --with-timer --with-doctest -a '!slow'
 
       - name: CodeCov
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
           flags: unittests
-          name: codecov-${{ matrix.cfg.os }}-py${{ matrix.cfg.python-version }}
+          name: codecov-${{ matrix.os }}-py${{ matrix.python-version }}-openmm-${{ matrix.openmm }}
diff --git a/.github/workflows/self-hosted-gpu-test.yml b/.github/workflows/self-hosted-gpu-test.yml
index bd66f3cab..2994e91f9 100644
--- a/.github/workflows/self-hosted-gpu-test.yml
+++ b/.github/workflows/self-hosted-gpu-test.yml
@@ -11,7 +11,7 @@ on:
 jobs:
   start-runner:
     name: Start self-hosted EC2 runner
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     outputs:
       label: ${{ steps.start-ec2-runner.outputs.label }}
       ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
@@ -28,10 +28,10 @@ jobs:
         with:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-          ec2-image-id: ami-096d499d418f88d88
-          ec2-instance-type: p2.xlarge
-          subnet-id: subnet-0e82552b8c708a999
-          security-group-id: sg-0589e74ec03965add
+          ec2-image-id: ami-04d16a12bbc76ff0b
+          ec2-instance-type: g4dn.xlarge
+          subnet-id: subnet-0dee8543e12afe0cd # us-east-1a
+          security-group-id: sg-0f9809618550edb98
           # iam-role-name: self-hosted-runner # optional, requires additional permissions
           aws-resource-tags: > # optional, requires additional permissions
             [
@@ -46,6 +46,8 @@ jobs:
       TEST_MODE: GPU
       OPENMM: ${{ matrix.cfg.openmm }}
       OE_LICENSE: ${{ github.workspace }}/oe_license.txt
+      HOME: /home/ec2-user
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
     defaults:
       run:
@@ -55,7 +57,7 @@ jobs:
       - uses: conda-incubator/setup-miniconda@v2
         with:
           installer-url: https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
-          python-version: 3.8
+          python-version: "3.10"
           activate-environment: test
           channels: conda-forge,defaults
           environment-file: devtools/conda-envs/test_env.yaml
@@ -66,7 +68,7 @@ jobs:
       - name: Refine test env
         shell: bash -l {0}
         run: |
-          mamba install -y cudatoolkit==11.0.3 openmm==7.7
+          mamba install -y cudatoolkit==11.7 openmm>=8.0
       
       - name: Additional info about the build
         shell: bash -l {0}
@@ -93,7 +95,7 @@ jobs:
       - name: Test the package
         shell: bash -l {0}
         run: |
-          nosetests openmmtools/tests --nocapture --verbosity=2 --with-timer --with-doctest
+          pytest -v --cov-report xml --durations=0 --cov=openmmtools openmmtools/tests 
 
       - name: Codecov
         if: ${{ github.repository == 'choderalab/openmmtools'
@@ -103,7 +105,7 @@ jobs:
           file: ./coverage.xml
           name: codecov-${{ matrix.cfg.os }}-py${{ matrix.cfg.python-version }}
           flags: unittests
-          fail_ci_if_error: true
+          fail_ci_if_error: false
 
   stop-runner:
     name: Stop self-hosted EC2 runner
diff --git a/.gitignore b/.gitignore
index e08538a58..64e7ce099 100644
--- a/.gitignore
+++ b/.gitignore
@@ -100,3 +100,6 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+# macOS
+.DS_Store
diff --git a/README.md b/README.md
index d676f2a8d..7cf81e991 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 [![Downloads Badge](https://anaconda.org/omnia/openmmtools/badges/downloads.svg)](https://anaconda.org/omnia/openmmtools/files)
 [![Documentation Status](https://readthedocs.org/projects/openmmtools/badge/?version=latest)](https://openmmtools.readthedocs.io/en/latest/?badge=latest)
 [![Zenodo DOI Badge](https://zenodo.org/badge/25416166.svg)](https://zenodo.org/badge/latestdoi/25416166)
+[![codecov](https://codecov.io/gh/choderalab/openmmtools/branch/main/graph/badge.svg?token=aGwEahm2CY)](https://codecov.io/gh/choderalab/openmmtools)
 
 ## OpenMMTools
 
@@ -45,4 +46,4 @@ Major contributors include:
 * [Iván Pulido](https://github.com/ijpulidos)
 * [Ivy Zhang](https://github.com/zhang-ivy)
 * [Dominic Rufa](https://github.com/dominicrufa)
-* [Mike Henry](https://github.com/mikemhenry)
\ No newline at end of file
+* [Mike Henry](https://github.com/mikemhenry)
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
index 4be21c418..ec8a0684b 100644
--- a/devtools/conda-envs/test_env.yaml
+++ b/devtools/conda-envs/test_env.yaml
@@ -1,21 +1,20 @@
-name: test
+name: openmmtools-test
 channels:
   - conda-forge
-  - defaults
-  - omnia
 dependencies:
     # Base depends
   - cython
+  - hdf5 <=1.14.0  # Macos has problem with newer releases (1.14.x) to date
   - libnetcdf >=4.6.2 # workaround for libssl issues
   - mdtraj
   - mpiplus
   - netcdf4 >=1.4.2 # after bugfix: "always return masked array by default, even if there are no masked values"
   - numba
   - numpy
-  - openmm >=7.5.0
+  - openmm
   #- parmed  # Test to see if this fixes the docs
   - pip
-  - pymbar <4
+  - pymbar
   - python
   - python
   - pyyaml
@@ -28,8 +27,9 @@ dependencies:
   - nose-timer
   - pytest
   - pytest-cov
+  - coverage
   
   # docs
   - numpydoc
   - sphinxcontrib-bibtex
-  
+  - sphinx-rtd-theme
diff --git a/devtools/conda-recipe/README.md b/devtools/conda-recipe/README.md
deleted file mode 100644
index 01c1972a4..000000000
--- a/devtools/conda-recipe/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-This is a recipe for building the current development package into a conda
-binary.
-
-The installation on travis-ci is done by building the conda package, installing
-it, running the tests, and then if successful pushing the package to binstar
-(and the docs to AWS S3). The binstar auth token is an encrypted environment
-variable generated using:
-```
-binstar auth -n openmmtools -o omnia --max-age 22896000 -c --scopes api:write
-```
-and then saved in the environment variable `BINSTAR_TOKEN`.
-
-You can set up travis to store an encrypted token via
-```
-gem install travis
-travis encrypt BINSTAR_TOKEN=xx
-```
-
-where xx is the token output by binstar. The final command should print a line (containing 'secure') for inclusion in your `.travis.yml` file.
diff --git a/devtools/conda-recipe/bld.bat b/devtools/conda-recipe/bld.bat
deleted file mode 100644
index c40a9bbef..000000000
--- a/devtools/conda-recipe/bld.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-"%PYTHON%" setup.py install
-if errorlevel 1 exit 1
diff --git a/devtools/conda-recipe/build.sh b/devtools/conda-recipe/build.sh
deleted file mode 100755
index b6f546baa..000000000
--- a/devtools/conda-recipe/build.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-$PYTHON setup.py install
-
-# Copy examples
-# TODO: Have setup.py install examples instead?
-mkdir $PREFIX/share/openmmtools
-cp -r examples $PREFIX/share/openmmtools/
diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml
deleted file mode 100644
index ab852c7b3..000000000
--- a/devtools/conda-recipe/meta.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-package:
-  name: openmmtools-dev
-  version: 0.0.0
-
-source:
-  path: ../../
-
-build:
-  preserve_egg_dir: True
-  number: 0
-
-requirements:
-  build:
-    - python
-    - setuptools
-    - openmm >=7.3.1
-    - cython
-
-  run:
-    - python
-    - numpy
-    - scipy
-    - openmm >=7.3.1
-    - mdtraj
-    - netcdf4 >=1.4.2 # after bugfix: "always return masked array by default, even if there are no masked values"
-    - libnetcdf >=4.6.2 # workaround for libssl issues
-    - pyyaml
-    - cython
-    - sphinxcontrib-bibtex
-    - mpiplus
-    - pymbar
-    - pyyaml
-    - numba
-    - nose
-
-test:
-  requires:
-    - nose
-    - pymbar
-# Until the NetCDF issues are resolved, actual test runs are subject to run_test.[bat,sh] (9/16/2018)
-#  imports:
-#    - openmmtools
-
-about:
-  home: https://github.com/choderalab/openmmtools
-  license: MIT License
diff --git a/devtools/conda-recipe/run_test.bat b/devtools/conda-recipe/run_test.bat
deleted file mode 100644
index 6882663e5..000000000
--- a/devtools/conda-recipe/run_test.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-conda uninstall --force --yes netcdf4
-pip install netCDF4
-python -c "import openmmtools"
\ No newline at end of file
diff --git a/devtools/conda-recipe/run_test.sh b/devtools/conda-recipe/run_test.sh
deleted file mode 100644
index 0e1365b25..000000000
--- a/devtools/conda-recipe/run_test.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env bash
-python -c "import openmmtools"
\ No newline at end of file
diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst
index 7813c4b87..1f47286e3 100644
--- a/docs/releasehistory.rst
+++ b/docs/releasehistory.rst
@@ -1,8 +1,57 @@
 Release History
 ***************
 
+
+0.23.1 - Bugfix release
+=======================
+
+Bugfixes
+--------
+
+- Fix issue where if ``None`` was used for ``online_analysis_interval`` an error would be thrown (issue `#708 <https://github.com/choderalab/openmmtools/issues/708>`_ PR `#710 <https://github.com/choderalab/openmmtools/pull/710`_)
+
+0.23.0 - latest numba support and real time stats enhancements
+==============================================================
+
+Please note that there is an API breaking change. To ensure consistency of the data when appending real time stats make sure that you make the ``online_analysis_interval`` of your ``MultiStateSampler`` object match the ``checkpoint_interval`` of your ``MultiStateReporter``. It will error if this is not the case.
+
+Enhancements
+------------
+- Running with NVIDIA GPUs in Exclusive Process mode now raises a warning (issue `#697 <https://github.com/choderalab/openmmtools/issues/697>`_, PR `#699 <https://github.com/choderalab/openmmtools/pull/699>`_)
+
+Bugfixes
+--------
+- Fix metadata for netcdf files, specifying openmmtools for the ``program`` metadata (issue `#694 <https://github.com/choderalab/openmmtools/issues/694>`_, PR `#704 <https://github.com/choderalab/openmmtools/pull/704>`_).
+- Real time statistics YAML file gets appended instead of overwritten when extending or resumimng simulations (issue `#691 <https://github.com/choderalab/openmmtools/issues/691>`_, PR `#692 <https://github.com/choderalab/openmmtools/pull/692>`_).
+- Error when resuming simulations with numba 0.57 fixed by avoiding using ``numpy.MaskedArray`` when deserializing ``.nc`` files (issue `#700 <https://github.com/choderalab/openmmtools/issues/700>`_, PR `#701 <https://github.com/choderalab/openmmtools/pull/701>`_)
+
+
+0.22.1 - Bugfix release
+=======================
+
+Bugfixes
+--------
+
+- Fixed issue where the error message thrown from openMM changed, so we need a case insensitive check. This was already fixed in most of the code base but one spot was missed. (PR `#684 <https://github.com/choderalab/openmmtools/pull/684>`_)
+
+0.22.0 - pymbar 4 support and gentle equilibration
+==================================================
+
+Enhancements
+------------
+- Openmmtools now supports both Pymbar 3 and 4 versions. (PR `#659 <https://github.com/choderalab/openmmtools/pull/659>`_)
+- Gentle equilibration protocol utility function available in ``openmmtools.utils.gentle_equilibration`` (PR `#669 <https://github.com/choderalab/openmmtools/pull/669>`_).
+- Timing information for multiple state sampler is now reported by default (PRs `#679 <https://github.com/choderalab/openmmtools/pull/679>`_ and `#671 <https://github.com/choderalab/openmmtools/issues/671>`_).
+
+Bugfixes
+--------
+- Users were not able to distinguish the exceptions caught during dynamics. Warnings are now raised when an exception is being caught (Issue `#643 <https://github.com/choderalab/openmmtools/issues/643>`_ PR `#658 <https://github.com/choderalab/openmmtools/pull/658>`_).
+- Deserializing MCMC moves objects from versions <=0.21.4 resulted in error finding the key. Fixed by catching the exception and raising a warning when key is not found (Issue `#618 <https://github.com/choderalab/openmmtools/issues/618>`_ PR `#675 <https://github.com/choderalab/openmmtools/pull/675>`_).
+- Different improvements in documentation strings and readthedocs documentation generation (Issues `#620 <https://github.com/choderalab/openmmtools/issues/620>`_ `#641 <https://github.com/choderalab/openmmtools/issues/641>`_ `#548 <https://github.com/choderalab/openmmtools/issues/548>`_. PR `#676 <https://github.com/choderalab/openmmtools/pull/676>`_)
+- Support for newer NetCDF versions (1.6 branch) by not using zlib compression for varying length variables. (PR `#654 <https://github.com/choderalab/openmmtools/pull/654>`_).
+
 0.21.5 - Bugfix release
-======================
+=======================
 
 Changed behaviors
 -----------------
@@ -115,7 +164,7 @@ Cleanup
 - Remove leftover `six` imports and `xrange` (`#504 <https://github.com/choderalab/openmmtools/pull/504>`_)
 
 0.20.1 - Bugfix release
-========================================
+=======================
 
 Enhancements
 ------------
diff --git a/openmmtools/alchemy/__init__.py b/openmmtools/alchemy/__init__.py
new file mode 100644
index 000000000..ec5bb5842
--- /dev/null
+++ b/openmmtools/alchemy/__init__.py
@@ -0,0 +1,22 @@
+"""
+Alchemical factory for free energy calculations package that operates directly on OpenMM
+System objects.
+
+DESCRIPTION
+
+This package contains enumerative factories for generating alchemically-modified System objects
+usable for the calculation of free energy differences of hydration or ligand binding.
+
+Provided classes include:
+
+- :class:`openmmtools.alchemy.alchemy.AlchemicalFunction`
+- :class:`openmmtools.alchemy.alchemy.AlchemicalState`
+- :class:`openmmtools.alchemy.alchemy.AlchemicalRegion`
+- :class:`openmmtools.alchemy.alchemy.AbsoluteAlchemicalFactory`
+- :class:`openmmtools.alchemy.alchemy.AlchemicalStateError`
+
+"""
+
+# Automatically importing everything from the lower level alchemy module to avoid API breakage
+from .alchemy import AlchemicalState, AlchemicalFunction, AlchemicalStateError, AlchemicalRegion, \
+    AbsoluteAlchemicalFactory
diff --git a/openmmtools/alchemy.py b/openmmtools/alchemy/alchemy.py
similarity index 100%
rename from openmmtools/alchemy.py
rename to openmmtools/alchemy/alchemy.py
diff --git a/openmmtools/mcmc.py b/openmmtools/mcmc.py
index 5c76ece9f..2ed1901fe 100644
--- a/openmmtools/mcmc.py
+++ b/openmmtools/mcmc.py
@@ -109,12 +109,14 @@
 # GLOBAL IMPORTS
 # =============================================================================
 
-import os
 import abc
 import copy
 import logging
+import os
+import warnings
 
 import numpy as np
+
 try:
     import openmm
     from openmm import unit
@@ -715,8 +717,10 @@ def apply(self, thermodynamic_state, sampler_state, context_cache=None):
                 # Run dynamics.
                 timer.start("{}: step({})".format(move_name, self.n_steps))
                 integrator.step(self.n_steps)
-            except Exception:
+            except Exception as e:
                 # Catches particle positions becoming nan during integration.
+                # Return the exception message as a warning
+                warnings.warn(str(e))
                 restart = True
             else:
                 timer.stop("{}: step({})".format(move_name, self.n_steps))
@@ -731,6 +735,7 @@ def apply(self, thermodynamic_state, sampler_state, context_cache=None):
                 potential_energy = context_state.getPotentialEnergy()
                 restart = np.isnan(potential_energy.value_in_unit(potential_energy.unit))
 
+            # TODO: Probably refactor this whole thing to do simple restart
             # Restart the move if we found NaNs.
             if restart:
                 err_msg = ('Potential energy is NaN after {} attempts of integration '
diff --git a/openmmtools/multistate/multistateanalyzer.py b/openmmtools/multistate/multistateanalyzer.py
index a41c0ddc6..ae7fd793d 100644
--- a/openmmtools/multistate/multistateanalyzer.py
+++ b/openmmtools/multistate/multistateanalyzer.py
@@ -36,9 +36,13 @@
     from simtk import openmm
     import simtk.unit as units
 from scipy.special import logsumexp
-from pymbar import MBAR, timeseries
 
 from openmmtools import multistate, utils, forces
+from openmmtools.multistate.pymbar import (
+    statistical_inefficiency_multiple,
+    subsample_correlated_data,
+    MBAR,
+)
 
 
 ABC = abc.ABC
@@ -1164,15 +1168,15 @@ class MultiStateSamplerAnalyzer(PhaseAnalyzer):
 
     statistical_inefficiency : float, optional
         Sub-sample rate, e.g. if the statistical_inefficiency is 10, we draw a sample every 10 iterations to get the decorrelated samples.
-        If specified, overrides the statistical_inefficiency computed using _get_equilibration_data() and `n_equilibration_iterations` 
+        If specified, overrides the statistical_inefficiency computed using _get_equilibration_data() and `n_equilibration_iterations`
         must be specified as well.
         Default is None, in which case the the statistical_inefficiency will be computed using _get_equilibration_data().
 
     max_subset : int >= 1 or None, optional, default: 100
-        Argument in ``multistate.utils.get_equilibration_data_per_sample()`` that specifies the maximum number of points from 
-        the ``timeseries_to_analyze`` (another argument to ``multistate.utils.get_equilibration_data_per_sample()``) on which 
+        Argument in ``multistate.utils.get_equilibration_data_per_sample()`` that specifies the maximum number of points from
+        the ``timeseries_to_analyze`` (another argument to ``multistate.utils.get_equilibration_data_per_sample()``) on which
         to compute equilibration data.
-    
+
     Attributes
     ----------
     unbias_restraint
@@ -1286,7 +1290,7 @@ def generate_mixing_statistics(self, number_equilibrated: Union[int, None] = Non
         # states[n][k] is the state index of replica k at iteration n, but
         # the functions wants a list of timeseries states[k][n].
         states_kn = np.transpose(states[number_equilibrated:self.max_n_iterations,])
-        g = timeseries.statisticalInefficiencyMultiple(states_kn)
+        g = statistical_inefficiency_multiple(states_kn)
 
         return self._MixingStatistics(transition_matrix=t_ij, eigenvalues=mu,
                                       statistical_inefficiency=g)
@@ -1915,11 +1919,13 @@ def _compute_free_energy(self):
         logger.debug("Computing covariance matrix...")
 
         try:
-            # pymbar 2
-            (Deltaf_ij, dDeltaf_ij) = self.mbar.getFreeEnergyDifferences()
-        except ValueError:
             # pymbar 3
-            (Deltaf_ij, dDeltaf_ij, _) = self.mbar.getFreeEnergyDifferences()
+            Deltaf_ij, dDeltaf_ij = self.mbar.getFreeEnergyDifferences()
+        except AttributeError:
+            # pymbar 4
+            results = self.mbar.compute_free_energy_differences()
+            Deltaf_ij = results['Delta_f']
+            dDeltaf_ij = results['dDelta_f']
 
         # Matrix of free energy differences
         logger.debug("Deltaf_ij:")
@@ -2063,7 +2069,7 @@ def _get_equilibration_data(self, energies=None, neighborhoods=None, replica_sta
             i_t, g_i, n_effective_i = multistate.utils.get_equilibration_data_per_sample(u_n[t0:], max_subset=self._max_subset)
             n_effective_max = n_effective_i.max()
             i_max = n_effective_i.argmax()
-            n_equilibration = i_t[i_max] + t0 
+            n_equilibration = i_t[i_max] + t0
             g_t = self._statistical_inefficiency if self._statistical_inefficiency is not None else g_i[i_max]
 
         # Store equilibration data
@@ -2186,13 +2192,18 @@ def statistical_inefficiency(self):
         """float: The statistical inefficiency of the sampler."""
         return self._equilibration_data[1]
 
+    @property
+    def effective_length(self):
+        """float: The length of the production data as a number of uncorrelated samples"""
+        return self._equilibration_data[2]
+
     @property
     def _decorrelated_iterations(self):
         """list of int: the indices of the decorrelated iterations truncated to max_n_iterations."""
         if self.use_full_trajectory:
             return np.arange(self.max_n_iterations + 1, dtype=int)
         equilibrium_iterations = np.array(range(self.n_equilibration_iterations, self.max_n_iterations + 1))
-        decorrelated_iterations_indices = timeseries.subsampleCorrelatedData(equilibrium_iterations,
+        decorrelated_iterations_indices = subsample_correlated_data(equilibrium_iterations,
                                                                              self.statistical_inefficiency)
         return equilibrium_iterations[decorrelated_iterations_indices]
 
diff --git a/openmmtools/multistate/multistatereporter.py b/openmmtools/multistate/multistatereporter.py
index 9f63456c9..4ac1f5cb3 100644
--- a/openmmtools/multistate/multistatereporter.py
+++ b/openmmtools/multistate/multistatereporter.py
@@ -48,6 +48,7 @@
 except ImportError:  # OpenMM < 7.6
     from simtk import unit
 
+import openmmtools
 from openmmtools.utils import deserialize, with_timer, serialize, quantity_from_string
 from openmmtools import states
 
@@ -97,8 +98,8 @@ class MultiStateReporter(object):
         The reporter internally tracks what data goes into which file, so its transparent to all other classes
         In the future, this will be able to take Storage classes as well
     analysis_particle_indices : tuple of ints, Optional. Default: () (empty tuple)
-        Indices of particles which should be treated as special when manipulating read and write functions.
-        If this is an empty tuple, then no particles are treated as special
+        If specified, it will serialize positions and velocities for the specified particles, at every iteration, in the
+        reporter storage (.nc) file. If empty, no positions or velocities will be stored in this file for any atoms.
 
     Attributes
     ----------
@@ -137,8 +138,9 @@ def __init__(self, storage, open_mode=None,
         self._analysis_particle_indices = tuple(analysis_particle_indices)
         if open_mode is not None:
             self.open(open_mode)
-        # Flag to check whether to overwrite real time statistics file
-        self._overwrite_statistics = True
+        # TODO: Maybe we want to expose this flag to control ovrwriting/appending
+        # Flag to check whether to overwrite real time statistics file -- Defaults to append
+        self._overwrite_statistics = False
 
     @property
     def filepath(self):
@@ -266,6 +268,7 @@ def open(self, mode='r', convention='ReplicaExchange', netcdf_format='NETCDF4'):
         self.close()
 
         # Create directory if we want to write.
+        # TODO: We probably want to check here specifically for w when we want to write
         if mode != 'r':
             for storage_path in self._storage_paths:
                 # normpath() transform '' to '.' for makedirs().
@@ -368,7 +371,8 @@ def _open_dataset_robustly(self, *args, n_attempts=5, sleep_time=2,
         for attempt in range(n_attempts-1):
             try:
                 return netcdf.Dataset(*args, **kwargs)
-            except:
+            except Exception as err: # This should be safe since we will raise the error below on return
+                logger.debug(f"exception thrown {err}")
                 logger.debug('Attempt {}/{} to open {} failed. Retrying '
                              'in {} seconds'.format(attempt+1, n_attempts, args[0], sleep_time))
                 time.sleep(sleep_time)
@@ -405,8 +409,7 @@ def _initialize_storage_file(self, ncfile, nc_name, convention):
             ncfile.createDimension('spatial', 3)  # Number of spatial dimensions.
 
             # Set global attributes.
-            ncfile.application = 'YANK'
-            ncfile.program = 'yank.py'
+            ncfile.program = f"openmmtools {openmmtools.__version__}"
             ncfile.programVersion = __version__
             ncfile.Conventions = convention
             ncfile.ConventionVersion = '0.2'
@@ -1810,7 +1813,7 @@ def _write_dict(self, path, data, storage_name='analysis',
                 dimension_name = 'scalar'
             # Create variable.
             nc_variable = storage_nc.createVariable(path, variable_type,
-                                                    dimension_name, zlib=True)
+                                                    dimension_name, zlib=False)
 
         # Assign the value to the variable.
         if fixed_dimension:
diff --git a/openmmtools/multistate/multistatesampler.py b/openmmtools/multistate/multistatesampler.py
index 339221149..32e987c94 100644
--- a/openmmtools/multistate/multistatesampler.py
+++ b/openmmtools/multistate/multistatesampler.py
@@ -35,6 +35,7 @@
 import inspect
 import logging
 import datetime
+import subprocess
 
 import numpy as np
 
@@ -47,8 +48,7 @@
 from openmmtools import multistate, utils, states, mcmc, cache
 import mpiplus
 from openmmtools.multistate.utils import SimulationNaNError
-
-from pymbar.utils import ParameterError
+from openmmtools.multistate.pymbar import ParameterError
 
 from openmmtools.integrators import FIREMinimizationIntegrator
 
@@ -590,6 +590,14 @@ def create(self, thermodynamic_states: list, sampler_states, storage,
             raise RuntimeError('Storage file {} already exists; cowardly '
                                'refusing to overwrite.'.format(self._reporter.filepath))
 
+        # Make sure online analysis interval is a multiples of the reporter's checkpoint interval
+        # this avoids having redundant iteration information in the real time yaml files
+        # only check if self.online_analysis_interval is set
+        if self.online_analysis_interval:
+            if self.online_analysis_interval % self._reporter.checkpoint_interval != 0:
+                raise ValueError(f"Online analysis interval: {self.online_analysis_interval}, must be a "
+                                 f"multiple of the checkpoint interval: {self._reporter.checkpoint_interval}")
+
         # Make sure sampler_states is an iterable of SamplerStates.
         if isinstance(sampler_states, states.SamplerState):
             sampler_states = [sampler_states]
@@ -680,7 +688,7 @@ def equilibrate(self, n_iterations, mcmc_moves=None):
         production_mcmc_moves = self._mcmc_moves
         self._mcmc_moves = mcmc_moves
         for iteration in range(1, 1 + n_iterations):
-            logger.debug("Equilibration iteration {}/{}".format(iteration, n_iterations))
+            logger.info("Equilibration iteration {}/{}".format(iteration, n_iterations))
             timer.start('Equilibration Iteration')
 
             # NOTE: Unlike run(), do NOT increment iteration counter.
@@ -704,14 +712,12 @@ def equilibrate(self, n_iterations, mcmc_moves=None):
             estimated_finish_time = time.time() + estimated_time_remaining
             # TODO: Transmit timing information
 
-            # Show timing statistics if debug level is activated.
-            if logger.isEnabledFor(logging.DEBUG):
-                logger.debug("Iteration took {:.3f}s.".format(iteration_time))
-                if estimated_time_remaining != float('inf'):
-                    logger.debug("Estimated completion (of equilibration only) in {}, at {} (consuming total wall clock time {}).".format(
-                        str(datetime.timedelta(seconds=estimated_time_remaining)),
-                        time.ctime(estimated_finish_time),
-                        str(datetime.timedelta(seconds=estimated_total_time))))
+            logger.info("Iteration took {:.3f}s.".format(iteration_time))
+            if estimated_time_remaining != float('inf'):
+                logger.info("Estimated completion (of equilibration only) in {}, at {} (consuming total wall clock time {}).".format(
+                    str(datetime.timedelta(seconds=estimated_time_remaining)),
+                    time.ctime(estimated_finish_time),
+                    str(datetime.timedelta(seconds=estimated_total_time))))
         timer.report_timing()
 
         # Restore production MCMCMoves.
@@ -766,9 +772,9 @@ def run(self, n_iterations=None):
             # Increment iteration counter.
             self._iteration += 1
 
-            logger.debug('*' * 80)
-            logger.debug('Iteration {}/{}'.format(self._iteration, iteration_limit))
-            logger.debug('*' * 80)
+            logger.info('*' * 80)
+            logger.info('Iteration {}/{}'.format(self._iteration, iteration_limit))
+            logger.info('*' * 80)
             timer.start('Iteration')
 
             # Update thermodynamic states
@@ -791,14 +797,13 @@ def run(self, n_iterations=None):
             partial_total_time = timer.partial('Run ReplicaExchange')
             self._update_timing(iteration_time, partial_total_time, run_initial_iteration, iteration_limit)
 
-            # Show timing statistics if debug level is activated.
-            if logger.isEnabledFor(logging.DEBUG):
-                logger.debug("Iteration took {:.3f}s.".format(self._timing_data["iteration_seconds"]))
-                if self._timing_data["estimated_time_remaining"] != float('inf'):
-                    logger.debug("Estimated completion in {}, at {} (consuming total wall clock time {}).".format(
-                        self._timing_data["estimated_time_remaining"],
-                        self._timing_data["estimated_localtime_finish_date"],
-                        self._timing_data["estimated_total_time"]))
+            # Log timing data as info level -- useful for users by default
+            logger.info("Iteration took {:.3f}s.".format(self._timing_data["iteration_seconds"]))
+            if self._timing_data["estimated_time_remaining"] != float('inf'):
+                logger.info("Estimated completion in {}, at {} (consuming total wall clock time {}).".format(
+                    self._timing_data["estimated_time_remaining"],
+                    self._timing_data["estimated_localtime_finish_date"],
+                    self._timing_data["estimated_total_time"]))
 
             # Perform sanity checks to see if we should terminate here.
             self._check_nan_energy()
@@ -1014,7 +1019,8 @@ def _read_options(check_iteration):
                                                     "Either your data is fully corrupted or something has gone very "
                                                     "wrong to see this message. "
                                                     "Please open an issue on the GitHub issue tracker if you see this!")
-            except:
+            except Exception as err:
+                raise err
                 # Trap all other errors caught by the load process
                 continue
 
@@ -1028,12 +1034,12 @@ def _read_options(check_iteration):
         self._thermodynamic_states = thermodynamic_states
         self._unsampled_states = unsampled_states
         self._sampler_states = sampler_states
-        self._replica_thermodynamic_states = state_indices
+        self._replica_thermodynamic_states = np.array(state_indices)
         self._energy_thermodynamic_states = energy_thermodynamic_states
         self._neighborhoods = neighborhoods
         self._energy_unsampled_states = energy_unsampled_states
-        self._n_accepted_matrix = n_accepted_matrix
-        self._n_proposed_matrix = n_proposed_matrix
+        self._n_accepted_matrix = np.array(n_accepted_matrix)
+        self._n_proposed_matrix = np.array(n_proposed_matrix)
         self._metadata = metadata
 
         self._last_mbar_f_k = last_mbar_f_k
@@ -1382,7 +1388,7 @@ def _minimize_replica(self, replica_id, tolerance, max_iterations):
                 logger.debug('Using FIRE: tolerance {} max_iterations {}'.format(tolerance, max_iterations))
                 integrator.step(max_iterations)
         except Exception as e:
-            if str(e) == 'Particle coordinate is nan':
+            if 'particle coordinate is nan' in str(e).lower():
                 logger.debug('NaN encountered in FIRE minimizer; falling back to L-BFGS after resetting positions')
                 sampler_state.apply_to_context(context)
                 openmm.LocalEnergyMinimizer.minimize(context, tolerance, max_iterations)
@@ -1776,11 +1782,19 @@ def _update_timing(self, iteration_time, partial_total_time, run_initial_iterati
     @staticmethod
     def _display_cuda_devices():
         """Query system nvidia-smi to get available GPUs indices and names in debug log."""
-        # Read nvidia-smi query, should return empty strip if no GPU is found.
-        cuda_query_output = os.popen("nvidia-smi --query-gpu=index,gpu_name --format=csv,noheader").read().strip()
-        # Split by line jump and comma
-        cuda_devices_list = [entry.split(',') for entry in cuda_query_output.split('\n')]
-        logger.debug(f"CUDA devices available: {*cuda_devices_list,}")
+
+        cuda_query_output = subprocess.run("nvidia-smi --query-gpu=gpu_uuid,gpu_name,compute_mode  --format=csv", shell=True, capture_output=True, text=True)
+        # Check if command worked
+        if cuda_query_output.returncode == 0:
+            # Split by line jump and comma
+            cuda_devices_list = [entry for entry in cuda_query_output.stdout.splitlines()]
+            logger.debug(f"CUDA devices available: {*cuda_devices_list,}")
+            # We only support "Default" and not "Exclusive_Process" for the compute mode
+            if "Default" not in cuda_query_output.stdout:
+                logger.warning(f"GPU in 'Exclusive_Process' mode (or Prohibited), one context is allowed per device. This may prevent some openmmtools features from working. GPU must be in 'Default' compute mode")
+        # Handel the case where the command had some error
+        else:
+            logger.debug(f"nvidia-smi command failed: {cuda_query_output.stderr}, this is expected if there is no GPU available")
 
     def _flatten_moves_iterator(self):
         """Recursively flatten MCMC moves. Handles the cases where each move can be a set of moves, for example with
diff --git a/openmmtools/multistate/pymbar.py b/openmmtools/multistate/pymbar.py
new file mode 100644
index 000000000..7df80578e
--- /dev/null
+++ b/openmmtools/multistate/pymbar.py
@@ -0,0 +1,55 @@
+from typing import Dict, Tuple
+
+import numpy as np
+
+try:
+    # pymbar >= 4
+    from pymbar.timeseries import (
+        detect_equilibration,
+        statistical_inefficiency_multiple,
+        subsample_correlated_data,
+        statistical_inefficiency
+    )
+    from pymbar import MBAR
+    from pymbar.utils import ParameterError
+except ImportError:
+    # pymbar < 4
+    from pymbar.timeseries import (
+        detectEquilibration as detect_equilibration,
+        statisticalInefficiencyMultiple as statistical_inefficiency_multiple,
+        subsampleCorrelatedData as subsample_correlated_data,
+        statisticalInefficiency as statistical_inefficiency
+    )
+    from pymbar import MBAR
+    from pymbar.utils import ParameterError
+
+
+def _pymbar_bar(
+    work_forward: np.ndarray,
+    work_backward: np.ndarray,
+) -> Dict[str, float]:
+    """
+    https://github.com/shirtsgroup/physical_validation/blob/v1.0.5/physical_validation/util/ensemble.py#L37
+    """
+    import pymbar
+
+    try:
+        # pymbar >= 4
+        return pymbar.other_estimators.bar(work_forward, work_backward)
+    except AttributeError:
+        # pymbar < 4
+        return pymbar.BAR(work_forward, work_backward, return_dict=True)
+
+def _pymbar_exp(
+        w_F: np.ndarray,
+) -> Tuple[float, float]:
+    try:
+        # pymbar < 4
+        from pymbar import EXP
+        fe_estimate = EXP(w_F)
+        return fe_estimate[0], fe_estimate[1]
+    except ImportError:
+        # pymbar >= 4
+        from pymbar.other_estimators import exp
+        fe_estimate = exp(w_F)
+        return fe_estimate["Delta_f"], fe_estimate["dDelta_f"]
diff --git a/openmmtools/multistate/sams.py b/openmmtools/multistate/sams.py
index 75f517baf..cd2fb2d3b 100644
--- a/openmmtools/multistate/sams.py
+++ b/openmmtools/multistate/sams.py
@@ -25,10 +25,10 @@
 
 import logging
 import numpy as np
-import openmmtools as mmtools
 from scipy.special import logsumexp
 
-from openmmtools import multistate, utils
+from openmmtools import multistate
+from openmmtools import utils
 from openmmtools.multistate.multistateanalyzer import MultiStateSamplerAnalyzer
 import mpiplus
 
@@ -404,7 +404,7 @@ def _mix_replicas(self):
         # Perform swap attempts according to requested scheme.
         # TODO: We may be able to refactor this to simply have different update schemes compute neighborhoods differently.
         # TODO: Can we allow "plugin" addition of new update schemes that can be registered externally?
-        with mmtools.utils.time_it('Mixing of replicas'):
+        with utils.time_it('Mixing of replicas'):
             # Initialize statistics. This matrix is modified by the jump function and used when updating the logZ estimates.
             replicas_log_P_k = np.zeros([self.n_replicas, self.n_states], np.float64)
             if self.state_update_scheme == 'global-jump':
diff --git a/openmmtools/multistate/utils.py b/openmmtools/multistate/utils.py
index 131dba89d..3df16a6d1 100644
--- a/openmmtools/multistate/utils.py
+++ b/openmmtools/multistate/utils.py
@@ -24,11 +24,12 @@
 This code is licensed under the latest available version of the MIT License.
 
 """
+from typing import Dict
 import logging
 import warnings
 import numpy as np
 
-from pymbar import timeseries  # for statistical inefficiency analysis
+from openmmtools.multistate.pymbar import subsample_correlated_data, statistical_inefficiency
 
 logger = logging.getLogger(__name__)
 
@@ -98,25 +99,25 @@ def get_decorrelation_time(timeseries_to_analyze):
     """
     Compute the decorrelation times given a timeseries.
 
-    See the ``pymbar.timeseries.statisticalInefficiency`` for full documentation
+    See the ``pymbar.timeseries.statistical_inefficiency`` for full documentation
     """
-    return timeseries.statisticalInefficiency(timeseries_to_analyze)
+    return statistical_inefficiency(timeseries_to_analyze)
 
 
 def get_equilibration_data_per_sample(timeseries_to_analyze, fast=True, max_subset=100):
     """
     Compute the correlation time and n_effective per sample with tuning to how you want your data formatted
 
-    This is a modified pass-through to ``pymbar.timeseries.detectEquilibration`` does, returning the per sample data.
+    This is a modified pass-through to ``pymbar.timeseries.statistical_inefficiency`` does, returning the per sample data.
 
     It has been modified to specify the maximum number of time points to consider, evenly spaced over the timeseries.
     This is different than saying "I want analysis done every X for total points Y = len(timeseries)/X",
     this is "I want Y total analysis points"
 
-    Note that the returned arrays will be of size max_subset - 1, because we always discard data from the first time 
+    Note that the returned arrays will be of size max_subset - 1, because we always discard data from the first time
     origin due to equilibration.
 
-    See the ``pymbar.timeseries.detectEquilibration`` function for full algorithm documentation
+    See the ``pymbar.timeseries.statistical_inefficiency`` function for full algorithm documentation
 
     Parameters
     ----------
@@ -124,8 +125,8 @@ def get_equilibration_data_per_sample(timeseries_to_analyze, fast=True, max_subs
         1-D timeseries to analyze for equilibration
     max_subset : int >= 1 or None, optional, default: 100
         Maximum number of points in the ``timeseries_to_analyze`` on which to analyze the equilibration on.
-        These are distributed uniformly over the timeseries so the final output (after discarding the first point 
-        due to equilibration) will be size max_subset - 1 where indices are placed  approximately every 
+        These are distributed uniformly over the timeseries so the final output (after discarding the first point
+        due to equilibration) will be size max_subset - 1 where indices are placed  approximately every
         ``(len(timeseries_to_analyze) - 1) / max_subset``.
         The full timeseries is used if the timeseries is smaller than ``max_subset`` or if ``max_subset`` is None
     fast : bool, optional. Default: True
@@ -177,12 +178,14 @@ def get_equilibration_data_per_sample(timeseries_to_analyze, fast=True, max_subs
     i_t = np.floor(counter * time_size / max_subset).astype(int)
     for i, t in enumerate(i_t):
         try:
-            g_i[i] = timeseries.statisticalInefficiency(series[t:], fast=fast)
-        except:
+            g_i[i] = statistical_inefficiency(series[t:], fast=fast)
+        except Exception as e:
+            print("If you see this, open an issue https://github.com/choderalab/openmmtools/issues")
+            raise e
             g_i[i] = (time_size - t + 1)
         n_effective_i[i] = (time_size - t + 1) / g_i[i]
 
-    # We should never choose data from the first time origin as the equilibrated data because 
+    # We should never choose data from the first time origin as the equilibrated data because
     # it contains snapshots warming up from minimization, which causes problems with correlation time detection
     # By default (max_subset=100), the first 1% of the data is discarded. If 1% is not ideal, user can specify
     # max_subset to change the percentage (e.g. if 0.5% is desired, specify max_subset=200).
@@ -206,7 +209,7 @@ def get_equilibration_data(timeseries_to_analyze, fast=True, max_subset=1000):
         The full timeseries is used if the timeseries is smaller than ``max_subset`` or if ``max_subset`` is None
     fast : bool, optional. Default: True
         If True, will use faster (but less accurate) method to estimate correlation time
-        passed on to timeseries module.
+        pass:d on to timeseries module.
 
     Returns
     -------
@@ -287,6 +290,6 @@ def subsample_data_along_axis(data, subsample_rate, axis):
     cast_data = np.asarray(data)
     data_shape = cast_data.shape
     # Since we already have g, we can just pass any appropriate shape to the subsample function
-    indices = timeseries.subsampleCorrelatedData(np.zeros(data_shape[axis]), g=subsample_rate)
+    indices = subsample_correlated_data(np.zeros(data_shape[axis]), g=subsample_rate)
     subsampled_data = np.take(cast_data, indices, axis=axis)
     return subsampled_data
diff --git a/openmmtools/storage/iodrivers.py b/openmmtools/storage/iodrivers.py
index 2a0dc4ce4..7b5a20fca 100644
--- a/openmmtools/storage/iodrivers.py
+++ b/openmmtools/storage/iodrivers.py
@@ -37,7 +37,7 @@
 except ImportError:  # OpenMM < 7.6
     from simtk import unit
 
-from ..utils import typename, quantity_from_string
+from openmmtools.utils import typename, quantity_from_string
 
 # TODO: Use the `with_metaclass` from .utils when we merge it in
 ABC = abc.ABCMeta('ABC', (object,), {})  # compatible with Python 2 *and* 3
diff --git a/openmmtools/tests/test_alchemy.py b/openmmtools/tests/test_alchemy.py
index e4c73f60c..bb5010f74 100644
--- a/openmmtools/tests/test_alchemy.py
+++ b/openmmtools/tests/test_alchemy.py
@@ -16,6 +16,8 @@
 
 from __future__ import print_function
 
+import copy
+import logging
 import os
 import sys
 import zlib
@@ -25,11 +27,16 @@
 
 import nose
 import scipy
+import numpy as np
 from nose.plugins.attrib import attr
 
-from openmmtools import testsystems, forces
-from openmmtools.constants import kB
-from openmmtools.alchemy import *
+import openmm
+from openmm import unit
+from openmmtools import forces, forcefactories, states, testsystems, utils
+from openmmtools.constants import kB, ONE_4PI_EPS0
+from openmmtools.alchemy import AlchemicalFunction, AlchemicalState, AbsoluteAlchemicalFactory, \
+    AlchemicalRegion, AlchemicalStateError
+from openmmtools.multistate.pymbar import subsample_correlated_data, detect_equilibration, _pymbar_exp
 
 logger = logging.getLogger(__name__)
 
@@ -1153,13 +1160,12 @@ def overlap_check(reference_system, alchemical_system, positions, nsteps=50, nsa
 
     # Discard data to equilibration and subsample.
     du_n = np.array(data['du_n'])
-    from pymbar import timeseries, EXP
-    t0, g, Neff = timeseries.detectEquilibration(du_n)
-    indices = timeseries.subsampleCorrelatedData(du_n, g=g)
+    t0, g, Neff = detect_equilibration(du_n)
+    indices = subsample_correlated_data(du_n, g=g)
     du_n = du_n[indices]
 
     # Compute statistics.
-    DeltaF, dDeltaF = EXP(du_n)
+    DeltaF, dDeltaF = _pymbar_exp(du_n)
 
     # Raise an exception if the error is larger than 3kT.
     MAX_DEVIATION = 3.0  # kT
diff --git a/openmmtools/tests/test_cache.py b/openmmtools/tests/test_cache.py
index 42816e742..6cbe82862 100644
--- a/openmmtools/tests/test_cache.py
+++ b/openmmtools/tests/test_cache.py
@@ -218,10 +218,10 @@ def test_copy_integrator_state(self):
         def read_attribute(integrator, attribute_name):
             try:
                 return getattr(integrator, attribute_name)
-            except:
+            except AttributeError:
                 try:
                     return getattr(integrator, 'get' + attribute_name)()
-                except:
+                except AttributeError:
                     return integrator.getGlobalVariableByName(attribute_name)
 
         test_cases[0].append('Temperature')  # Getter/setter.
diff --git a/openmmtools/tests/test_integrators.py b/openmmtools/tests/test_integrators.py
index 797f9dd96..0ecc3fb42 100755
--- a/openmmtools/tests/test_integrators.py
+++ b/openmmtools/tests/test_integrators.py
@@ -15,7 +15,7 @@
 
 import copy
 import inspect
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar
 
 from unittest import TestCase
 
@@ -508,19 +508,25 @@ def test_ncmc_update_parameters_in_context(self):
         context.setPositions(wbox.positions)
         context.setVelocitiesToTemperature(temperature)
 
-        def switchoff(force, context, frac=0.9):
-            force.setParticleParameters(0, charge=-0.834 * frac, sigma=0.3150752406575124*frac, epsilon=0.635968 * frac)
-            force.setParticleParameters(1, charge=0.417 * frac, sigma=0, epsilon=1 * frac)
-            force.setParticleParameters(2, charge=0.417 * frac, sigma=0, epsilon=1 * frac)
-            force.updateParametersInContext(context)
-
-        def switchon(force, context):
-            force.setParticleParameters(0, charge=-0.834, sigma=0.3150752406575124, epsilon=0.635968)
-            force.setParticleParameters(1, charge=0.417, sigma=0, epsilon=1)
-            force.setParticleParameters(2, charge=0.417, sigma=0, epsilon=1)
-            force.updateParametersInContext(context)
-
-        force = wbox.system.getForce(2)  # Non-bonded force.
+        def switchoff(omm_force, omm_context, frac=0.9):
+            omm_force.setParticleParameters(0, charge=-0.834 * frac, sigma=0.3150752406575124 * frac, epsilon=0.635968 * frac)
+            omm_force.setParticleParameters(1, charge=0.417 * frac, sigma=0, epsilon=1 * frac)
+            omm_force.setParticleParameters(2, charge=0.417 * frac, sigma=0, epsilon=1 * frac)
+            omm_force.updateParametersInContext(omm_context)
+
+        def switchon(omm_force, omm_context):
+            omm_force.setParticleParameters(0, charge=-0.834, sigma=0.3150752406575124, epsilon=0.635968)
+            omm_force.setParticleParameters(1, charge=0.417, sigma=0, epsilon=1)
+            omm_force.setParticleParameters(2, charge=0.417, sigma=0, epsilon=1)
+            omm_force.updateParametersInContext(omm_context)
+
+        # Get Nonbonded force from system -- Note: order may vary by platform. Don't expect a specific index.
+        for force in wbox.system.getForces():
+            if isinstance(force, openmm.NonbondedForce):
+                nonbonded_force = force
+                break
+        else:
+            raise ValueError("No nonbonded force found in the system.")
 
         # Number of NCMC steps
         nsteps = 20
@@ -533,14 +539,14 @@ def switchon(force, context):
             for step in range(nsteps):
                 fraction = float(step + 1) / float(nsteps)
                 initial_energy = context.getState(getEnergy=True).getPotentialEnergy()
-                switchoff(force, context, frac=fraction)
+                switchoff(nonbonded_force, context, frac=fraction)
                 final_energy = context.getState(getEnergy=True).getPotentialEnergy()
                 external_protocol_work += (final_energy - initial_energy) / kT
                 integrator.step(1)
             integrator_protocol_work = integrator.get_protocol_work(dimensionless=True)
             assert abs(external_protocol_work - integrator_protocol_work) < 1.E-5
             # Return to unperturbed state
-            switchon(force, context)
+            switchon(nonbonded_force, context)
 
 
     def test_protocol_work_accumulation_harmonic_oscillator(self):
@@ -789,9 +795,15 @@ def run_alchemical_langevin_integrator(nsteps=0, splitting="O { V R H R V } O"):
         del context
         del compound_integrator
 
-    dF, ddF = pymbar.BAR(work['forward'], work['reverse'])
-    nsigma = np.abs(dF - dF_analytical) / ddF
-    print("analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(dF_analytical, dF, ddF, nsigma))
+    results = _pymbar_bar(work['forward'], work['reverse'])
+    nsigma = np.abs(results['Delta_f'] - dF_analytical) / results['dDelta_f']
+
+    print(
+        "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(
+            dF_analytical, results['Delta_f'], results['dDelta_f'], nsigma,
+        )
+    )
+
     if nsigma > NSIGMA_MAX:
         raise Exception("The free energy difference for the nonequilibrium switching for splitting '%s' and %d steps is not zero within statistical error." % (splitting, nsteps))
 
@@ -939,10 +951,15 @@ def test_periodic_langevin_integrator(splitting="H V R O R V H", ncycles=40, nst
     print(np.array(forward_works).std())
     print(np.array(reverse_works).std())
 
-    dF, ddF = pymbar.BAR(np.array(forward_works), np.array(reverse_works))
-    nsigma = np.abs(dF - dF_analytical) / ddF
+    results = _pymbar_bar(np.array(forward_works), np.array(reverse_works))
+    nsigma = np.abs(results['Delta_f'] - dF_analytical) / results['dDelta_f']
     assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0)
-    print("analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(dF_analytical, dF, ddF, nsigma))
+    print(
+        "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(
+            dF_analytical, results['Delta_f'], results['dDelta_f'], nsigma,
+        )
+    )
+
     if nsigma > NSIGMA_MAX:
         raise Exception(f"The free energy difference for the nonequilibrium switching for splitting {splitting} is not zero within statistical error.")
 
diff --git a/openmmtools/tests/test_mcmc.py b/openmmtools/tests/test_mcmc.py
index 67e1fb4cf..5d6c2da31 100644
--- a/openmmtools/tests/test_mcmc.py
+++ b/openmmtools/tests/test_mcmc.py
@@ -19,7 +19,7 @@
 from functools import partial
 
 import nose
-from pymbar import timeseries
+from openmmtools.multistate.pymbar import detect_equilibration
 
 from openmmtools import testsystems
 from openmmtools.states import SamplerState, ThermodynamicState
@@ -143,7 +143,7 @@ def subtest_mcmc_expectation(testsystem, move):
             testsystem.__class__.__name__)
 
         potential_expectation = testsystem.get_potential_expectation(thermodynamic_state) / kT
-        [t0, g, Neff_max] = timeseries.detectEquilibration(potential_n)
+        [t0, g, Neff_max] = detect_equilibration(potential_n)
         potential_mean = potential_n[t0:].mean()
         dpotential_mean = potential_n[t0:].std() / np.sqrt(Neff_max)
         potential_error = potential_mean - potential_expectation
@@ -166,7 +166,7 @@ def subtest_mcmc_expectation(testsystem, move):
             testsystem.__class__.__name__)
 
         volume_expectation = testsystem.get_volume_expectation(thermodynamic_state) / (unit.nanometers**3)
-        [t0, g, Neff_max] = timeseries.detectEquilibration(volume_n)
+        [t0, g, Neff_max] = detect_equilibration(volume_n)
         volume_mean = volume_n[t0:].mean()
         dvolume_mean = volume_n[t0:].std() / np.sqrt(Neff_max)
         volume_error = volume_mean - volume_expectation
@@ -364,7 +364,7 @@ def test_mcmc_move_context_cache_shallow_copy():
     )
     # Create temporary reporter storage file
     with tempfile.NamedTemporaryFile() as storage:
-        reporter = multistate.MultiStateReporter(storage.name, checkpoint_interval=999999)
+        reporter = multistate.MultiStateReporter(storage.name, checkpoint_interval=200)
     simulation.create(
         thermodynamic_states=thermodynamic_states,
         sampler_states=SamplerState(
diff --git a/openmmtools/tests/test_sampling.py b/openmmtools/tests/test_sampling.py
index 3fc353fc1..8c2495670 100644
--- a/openmmtools/tests/test_sampling.py
+++ b/openmmtools/tests/test_sampling.py
@@ -16,10 +16,12 @@
 import contextlib
 import copy
 import inspect
+import math
 import os
 import pickle
 import shutil
 import sys
+import tempfile
 from io import StringIO
 
 import numpy as np
@@ -34,12 +36,11 @@
 import mpiplus
 
 import openmmtools as mmtools
-from openmmtools import cache
-from openmmtools import testsystems
+from openmmtools import cache, testsystems, states, mcmc
 from openmmtools.multistate import MultiStateReporter
 from openmmtools.multistate import MultiStateSampler, MultiStateSamplerAnalyzer
 from openmmtools.multistate import ReplicaExchangeSampler, ReplicaExchangeAnalyzer
-from openmmtools.multistate import ParallelTemperingSampler, ParallelTemperingAnalyzer
+from openmmtools.multistate import ParallelTemperingSampler
 from openmmtools.multistate import SAMSSampler, SAMSAnalyzer
 from openmmtools.multistate.multistatereporter import _DictYamlLoader
 from openmmtools.utils import temporary_directory
@@ -141,7 +142,8 @@ def teardown_class(cls):
     def run(self, include_unsampled_states=False):
         # Create and configure simulation object
         move = mmtools.mcmc.MCDisplacementMove(displacement_sigma=1.0*unit.angstroms)
-        simulation = self.SAMPLER(mcmc_moves=move, number_of_iterations=self.N_ITERATIONS)
+        simulation = self.SAMPLER(mcmc_moves=move, number_of_iterations=self.N_ITERATIONS,
+                                  online_analysis_interval=self.N_ITERATIONS)
 
         # Define file for temporary storage.
         with temporary_directory() as tmp_dir:
@@ -160,11 +162,11 @@ def run(self, include_unsampled_states=False):
             logger.setLevel(logging.CRITICAL)
             simulation.run()
 
-            # Create Analyzer specfiying statistical_inefficiency without n_equilibration_iterations and 
+            # Create Analyzer specfiying statistical_inefficiency without n_equilibration_iterations and
             # check that it throws an exception
             assert_raises(Exception, self.ANALYZER, reporter, statistical_inefficiency=10)
 
-            # Create Analyzer specifying n_equilibration_iterations=10 without statistical_inefficiency and 
+            # Create Analyzer specifying n_equilibration_iterations=10 without statistical_inefficiency and
             # check that equilibration detection returns n_equilibration_iterations > 10
             analyzer = self.ANALYZER(reporter, n_equilibration_iterations=10)
             sampled_energy_matrix, unsampled_energy_matrix, neighborhoods, replicas_state_indices = list(analyzer._read_energies(truncate_max_n_iterations=True))
@@ -587,6 +589,7 @@ class TestBaseMultistateSampler(object):
 
     N_SAMPLERS = 3
     N_STATES = 5
+    # TODO: Once we migrate to pytest SAMPLER and REPORTER should be fixtures!
     SAMPLER = MultiStateSampler
     REPORTER = MultiStateReporter
 
@@ -999,7 +1002,7 @@ def actual_stored_properties_check(self, additional_properties=None):
         thermodynamic_states, sampler_states, unsampled_states = copy.deepcopy(self.alanine_test)
 
         with self.temporary_storage_path() as storage_path:
-            sampler = self.SAMPLER(number_of_iterations=5)
+            sampler = self.SAMPLER(number_of_iterations=5, online_analysis_interval=1)
             reporter = self.REPORTER(storage_path, checkpoint_interval=1)
             self.call_sampler_create(sampler, reporter,
                                      thermodynamic_states, sampler_states,
@@ -1445,13 +1448,14 @@ def test_online_analysis_works(self):
         """Test online analysis runs"""
         thermodynamic_states, sampler_states, unsampled_states = copy.deepcopy(self.alanine_test)
         with self.temporary_storage_path() as storage_path:
-            n_iterations = 5
-            online_interval = 1
+            n_iterations = 10
+            online_interval = 2
             move = mmtools.mcmc.IntegratorMove(openmm.VerletIntegrator(1.0 * unit.femtosecond), n_steps=1)
             sampler = self.SAMPLER(mcmc_moves=move, number_of_iterations=n_iterations,
                                    online_analysis_interval=online_interval,
                                    online_analysis_minimum_iterations=3)
-            self.call_sampler_create(sampler, storage_path,
+            reporter = self.REPORTER(storage_path, checkpoint_interval=online_interval)
+            self.call_sampler_create(sampler, reporter,
                                      thermodynamic_states, sampler_states,
                                      unsampled_states)
             # Run
@@ -1482,11 +1486,12 @@ def validate_this_test():
             except AssertionError as e:
                 # Handle case where MBAR does not have a converged free energy yet by attempting to run longer
                 # Only run up until we have sampled every state, or we hit some cycle limit
-                cycle_limit = 20  # Put some upper limit of cycles
+                cycle_limit = 100  # Put some upper limit of cycles
                 cycles = 0
                 while (not np.unique(sampler._reporter.read_replica_thermodynamic_states()).size == self.N_STATES
-                       or cycles == cycle_limit):
+                       and cycles < cycle_limit):
                     sampler.extend(20)
+                    cycles += 1
                     try:
                         validate_this_test()
                     except AssertionError:
@@ -1510,7 +1515,8 @@ def test_online_analysis_stops(self):
                                    online_analysis_interval=online_interval,
                                    online_analysis_minimum_iterations=0,
                                    online_analysis_target_error=np.inf)  # use infinite error to stop right away
-            self.call_sampler_create(sampler, storage_path,
+            reporter = self.REPORTER(storage_path, checkpoint_interval=online_interval)
+            self.call_sampler_create(sampler, reporter,
                                      thermodynamic_states, sampler_states,
                                      unsampled_states)
             # Run
@@ -1570,7 +1576,8 @@ def test_real_time_analysis_yaml(self):
             move = mmtools.mcmc.IntegratorMove(openmm.VerletIntegrator(1.0 * unit.femtosecond), n_steps=1)
             sampler = self.SAMPLER(mcmc_moves=move, number_of_iterations=n_iterations,
                                    online_analysis_interval=online_interval)
-            self.call_sampler_create(sampler, storage_path,
+            reporter = self.REPORTER(storage_path, checkpoint_interval=online_interval)
+            self.call_sampler_create(sampler, reporter,
                                      thermodynamic_states, sampler_states,
                                      unsampled_states)
             # Run
@@ -1586,7 +1593,24 @@ def test_real_time_analysis_yaml(self):
             assert len(yaml_contents) == expected_yaml_entries, \
                 "Expected yaml entries do not match the actual number entries in the file."
 
-
+def test_real_time_analysis_can_be_none():
+    """Test if real time analysis can be done"""
+    testsystem = testsystems.AlanineDipeptideImplicit()
+    n_replicas = 3
+    T_min = 298.0 * unit.kelvin  # Minimum temperature.
+    T_max = 600.0 * unit.kelvin  # Maximum temperature.
+    temperatures = [T_min + (T_max - T_min) * (math.exp(float(i) / float(n_replicas-1)) - 1.0) / (math.e - 1.0)
+                    for i in range(n_replicas)]
+    temperatures = [T_min + (T_max - T_min) * (math.exp(float(i) / float(n_replicas-1)) - 1.0) / (math.e - 1.0)
+                    for i in range(n_replicas)]
+    thermodynamic_states = [states.ThermodynamicState(system=testsystem.system, temperature=T)
+                            for T in temperatures]
+    move = mcmc.GHMCMove(timestep=2.0*unit.femtoseconds, n_steps=50)
+    simulation = MultiStateSampler(mcmc_moves=move, number_of_iterations=2, online_analysis_interval=None)
+    storage_path = tempfile.NamedTemporaryFile(delete=False).name + '.nc'
+    reporter = MultiStateReporter(storage_path, checkpoint_interval=1)
+    simulation.create(thermodynamic_states=thermodynamic_states,
+                      sampler_states=states.SamplerState(testsystem.positions), storage=reporter)
 #############
 
 class TestExtraSamplersMultiStateSampler(TestMultiStateSampler):
diff --git a/openmmtools/tests/test_utils.py b/openmmtools/tests/test_utils.py
index ba69dac97..1ad4c927e 100644
--- a/openmmtools/tests/test_utils.py
+++ b/openmmtools/tests/test_utils.py
@@ -8,13 +8,22 @@
 Test utility functions in utils.py.
 
 """
-
+import abc
+import copy
 
 # =============================================================================
 # GLOBAL IMPORTS
 # =============================================================================
 
 import nose
+from nose.tools import nottest
+import numpy as np
+
+try:
+    import openmm
+    from openmm import unit
+except ImportError:  # OpenMM < 7.6
+    from simtk import openmm, unit
 
 from openmmtools.utils import _RESERVED_WORDS_PATTERNS
 from openmmtools.utils import *
@@ -26,10 +35,6 @@
 
 def test_platform_supports_precision():
     """Test that platform_supports_precision works correctly."""
-    try:
-        import openmm
-    except ImportError:  # OpenMM < 7.6
-        from simtk import openmm
 
     for platform_index in range(openmm.Platform.getNumPlatforms()):
         platform = openmm.Platform.getPlatform(platform_index)
@@ -326,16 +331,12 @@ class DummyRestorableCustomForce(RestorableOpenMMObject, openmm.CustomBondForce)
             def __init__(self, *args, **kwargs):
                 super(DummyRestorableCustomForce, self).__init__(*args, **kwargs)
 
-        class DummierRestorableCustomForce(RestorableOpenMMObject, openmm.CustomAngleForce):
-            def __init__(self, *args, **kwargs):
-                super(DummierRestorableCustomForce, self).__init__(*args, **kwargs)
-
         class DummyRestorableCustomIntegrator(RestorableOpenMMObject, openmm.CustomIntegrator):
             def __init__(self, *args, **kwargs):
                 super(DummyRestorableCustomIntegrator, self).__init__(*args, **kwargs)
 
         cls.dummy_force = DummyRestorableCustomForce('0.0;')
-        cls.dummier_force = DummierRestorableCustomForce('0.0;')
+        cls.dummier_force = DummyRestorableCustomForce('0.0;')
         cls.dummy_integrator= DummyRestorableCustomIntegrator(2.0*unit.femtoseconds)
 
     def test_restorable_openmm_object(self):
@@ -367,7 +368,16 @@ def test_restorable_openmm_object(self):
                 assert hasattr(copied_object, '_monkey_patching')
 
     def test_multiple_object_context_creation(self):
-        """Test that it is possible to create contexts with multiple restorable objects."""
+        """Test that it is possible to create contexts with multiple restorable objects.
+
+        The aim of this test is to make sure we can restore the force objects using to create the context;
+        after serialization.
+
+        Notes
+        -----
+        As of Openmm 8 having the same type of openmm objects with different default values for global parameters is
+        not allowed.
+        """
         system = openmm.System()
         for i in range(4):
             system.addParticle(1.0*unit.atom_mass_units)
@@ -385,12 +395,43 @@ def test_multiple_object_context_creation(self):
         force1, force2 = system.getForce(0), system.getForce(1)
         assert RestorableOpenMMObject.restore_interface(force1)
         assert RestorableOpenMMObject.restore_interface(force2)
-        hash1 = force1._get_force_parameter_by_name(force1, force_hash_parameter_name)
-        hash2 = force2._get_force_parameter_by_name(force2, force_hash_parameter_name)
-        assert not np.isclose(hash1, hash2)
         assert isinstance(force1, self.dummy_force.__class__)
         assert isinstance(force2, self.dummier_force.__class__)
 
+    def test_context_from_restorable_with_different_globals(self):
+        """
+        Test that you cannot create a context from restorable objects with different default values for
+        global parameters.
+
+        Creates a system with two forces that have different default values for the hash global parameter and
+        expects an OpenmmException when trying to create a Context using these forces.
+
+        Notes
+        -----
+        As of Openmm 8 having the same type of openmm objects with different default values for global parameters is
+        not allowed.
+        """
+        dummy_force = copy.deepcopy(self.dummy_force)
+        dummier_force = copy.deepcopy(self.dummier_force)
+
+        # Change the global parameter default value for one of the forces
+        force_hash_parameter_name = dummy_force._hash_parameter_name
+        dummier_force.addGlobalParameter(force_hash_parameter_name, 3.141592)
+
+        system = openmm.System()
+        for i in range(4):
+            system.addParticle(1.0 * unit.atom_mass_units)
+        system.addForce(copy.deepcopy(dummy_force))
+        system.addForce(copy.deepcopy(dummier_force))
+
+        # TODO: Change this once we migrate to pytest -- using skipif as needed
+        # Skip assertion for openmm < 8
+        if int(openmm.__version__[0]) < 8:
+            pass
+        else:
+            with nose.tools.assert_raises(openmm.OpenMMException):
+                openmm.Context(system, copy.deepcopy(self.dummy_integrator))
+
     def test_restorable_openmm_object_failure(self):
         """An exception is raised if the class has a restorable hash but the class can't be found."""
         force = openmm.CustomBondForce('0.0')
@@ -414,3 +455,155 @@ def test_restorable_openmm_object_hash_collisions(self):
             hash_float = RestorableOpenMMObject._compute_class_hash(restorable_cls)
             all_hashes.add(hash_float)
         assert len(all_hashes) == len(restorable_classes)
+
+
+class TestEquilibrationUtils(object):
+    """
+    Class for testing equilibration utility functions in openmmtools.utils.equilibration
+    """
+    def test_gentle_equilibration_setup(self):
+        """
+        Test gentle equilibration implementation using the Alanine dipeptide in explicit solvent
+        system found in `openmmtools.testsystems.AlanineDipeptideExplicit`
+
+        This only tests the gentle equilibration can be run with this system, only one iteration
+        of each stage is run.
+        """
+        from openmmtools.testsystems import AlanineDipeptideExplicit
+        from openmmtools.utils import run_gentle_equilibration
+
+        test_system = AlanineDipeptideExplicit()
+
+        # Retrieve positions, system and topology from the test_system object
+        positions = np.array(test_system.positions.value_in_unit(unit.nanometer))
+        system = test_system.system
+        topology = test_system.topology
+
+        stages = [
+            {'EOM': 'minimize', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': None,
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD_interpolate', 'n_steps': 1, 'temperature': 100 * unit.kelvin,
+             'temperature_end': 300 * unit.kelvin,
+             'ensemble': 'NVT', 'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 10 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 10 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'minimize', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': None,
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 1 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 0.1 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 1, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': None,
+             'force_constant': 0 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 2 * unit.femtoseconds},
+        ]
+
+        with temporary_directory() as tmp_path:
+            outfile_path = f"{tmp_path}/outfile.cif"
+            run_gentle_equilibration(topology, positions, system, stages, outfile_path, platform_name="CPU",
+                                 save_box_vectors=False)
+
+    # TODO: Marking as not a test until we solve our GPU CI
+    @nottest
+    def test_gentle_equilibration_cuda(self):
+        """
+        Test gentle equilibration implementation using the Alanine dipeptide in explicit solvent
+        system found in `openmmtools.testsystems.AlanineDipeptideExplicit`
+
+        To date it is meant to just test that the protocol can run with a test system and
+        do a quick comparison of the energies (this latter part not implemented yet).
+
+        Meant to be run using CUDA platform, similar to a production-ready environment.
+        """
+        # TODO: Perform the energy comparison part
+        from openmmtools.testsystems import AlanineDipeptideExplicit
+        from openmmtools.utils import run_gentle_equilibration
+
+        test_system = AlanineDipeptideExplicit()
+
+        # Retrieve positions, system and topology from the test_system object
+        positions = np.array(test_system.positions.value_in_unit(unit.nanometer))
+        system = test_system.system
+        topology = test_system.topology
+
+        stages = [
+            {'EOM': 'minimize', 'n_steps': 10000, 'temperature': 300 * unit.kelvin, 'ensemble': None,
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD_interpolate', 'n_steps': 100000, 'temperature': 100 * unit.kelvin,
+             'temperature_end': 300 * unit.kelvin,
+             'ensemble': 'NVT', 'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 10 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 100000, 'temperature': 300, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 100 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 10 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 250000, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and not type H',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'minimize', 'n_steps': 10000, 'temperature': 300 * unit.kelvin, 'ensemble': None,
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 100000, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 10 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 100000, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 1 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 100000, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': 'protein and backbone',
+             'force_constant': 0.1 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 1 * unit.femtoseconds},
+            {'EOM': 'MD', 'n_steps': 2500000, 'temperature': 300 * unit.kelvin, 'ensemble': 'NPT',
+             'restraint_selection': None,
+             'force_constant': 0 * unit.kilocalories_per_mole / unit.angstrom ** 2,
+             'collision_rate': 2 / unit.picoseconds,
+             'timestep': 2 * unit.femtoseconds},
+        ]
+
+        run_gentle_equilibration(topology, positions, system, stages, "outfile.cif", platform_name="CUDA",
+                             save_box_vectors=False)
diff --git a/openmmtools/utils/__init__.py b/openmmtools/utils/__init__.py
new file mode 100644
index 000000000..b86320301
--- /dev/null
+++ b/openmmtools/utils/__init__.py
@@ -0,0 +1,40 @@
+"""
+Utils
+=====
+
+Utility functions meant to be used in the different openmm modules or tests.
+
+This module provides a common API point for different utility functions that serve as convenience
+or help users in different miscellaneous tasks.
+"""
+from .utils import (
+    RestorableOpenMMObject,
+    RestorableOpenMMObjectError,
+    SubhookedABCMeta,
+    Timer,
+    TrackedQuantity,
+    TrackedQuantityView,
+    _RESERVED_WORDS_PATTERNS,
+    _SERIALIZED_MANGLED_PREFIX,
+    _VALID_UNITS,
+    _VALID_UNIT_FUNCTIONS,
+    _changes_state,
+    deserialize,
+    find_all_subclasses,
+    find_subclass,
+    get_available_platforms,
+    get_fastest_platform,
+    is_quantity_close,
+    math_eval,
+    platform_supports_precision,
+    quantity_from_string,
+    sanitize_expression,
+    serialize,
+    temporary_directory,
+    time_it,
+    typename,
+    with_metaclass,
+    with_timer,
+)
+
+from .equilibration import run_gentle_equilibration
diff --git a/openmmtools/utils/equilibration.py b/openmmtools/utils/equilibration.py
new file mode 100644
index 000000000..1c5f06d63
--- /dev/null
+++ b/openmmtools/utils/equilibration.py
@@ -0,0 +1,164 @@
+"""
+Utility functions useful for equilibration.
+"""
+
+import logging
+from openmm import unit, OpenMMException
+
+# Set up logger
+_logger = logging.getLogger(__name__)
+
+
+def run_gentle_equilibration(topology, positions, system, stages, filename, platform_name='CUDA', save_box_vectors=True):
+    """
+    Run gentle equilibration. Serialize results in an external PDB or CIF file.
+
+    Parameters
+    ----------
+    topology : openmm.app.Topology
+        topology
+    positions : np.array in unit.nanometer
+        positions
+    system : openmm.System
+        system
+    stages : list of dicts
+        each dict corresponds to a stage of equilibration and contains the equilibration parameters for that stage
+
+        equilibration parameters:
+            EOM : str
+                'minimize' or 'MD' or 'MD_interpolate' (the last one will allow interpolation between 'temperature' and 'temperature_end')
+            n_steps : int
+                number of steps of MD
+            temperature : openmm.unit.kelvin
+                temperature (kelvin)
+            temperature_end : openmm.unit.kelvin, optional
+                the temperature (kelvin) at which to finish interpolation, if 'EOM' is 'MD_interpolate'
+            ensemble : str or None
+                'NPT' or 'NVT'
+            restraint_selection : str or None
+                to be used by mdtraj to select atoms for which to apply restraints
+            force_constant : openmm.unit.kilocalories_per_mole/openmm.unit.angstrom**2
+                force constant (kcal/molA^2)
+            collision_rate : 1/openmm.unit.picoseconds
+                collision rate (1/picoseconds)
+            timestep : openmm.unit.femtoseconds
+                timestep (femtoseconds)
+    filename : str
+        path to save the equilibrated structure
+    platform_name : str, default 'CUDA'
+        name of platform to be used by OpenMM. If not specified, OpenMM will select the fastest available platform
+    save_box_vectors : bool
+        Whether to save the box vectors in a box_vectors.npy file in the working directory, after execution.
+        Defaults to True.
+
+    """
+    import copy
+    import openmm
+    import time
+    import numpy as np
+    import mdtraj as md
+
+    for stage_index, parameters in enumerate(stages):
+
+        initial_time = time.time()
+        print(f"Executing stage {stage_index + 1}")
+
+        # Make a copy of the system
+        system_copy = copy.deepcopy(system)
+
+        # Add restraint
+        if parameters['restraint_selection'] is not None:
+            traj = md.Trajectory(positions, md.Topology.from_openmm(topology))
+            selection_indices = traj.topology.select(parameters['restraint_selection'])
+
+            custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*(RMSD)^2')
+            custom_cv_force.addGlobalParameter('K_RMSD', parameters['force_constant'] * 2)
+            rmsd_force = openmm.RMSDForce(positions, selection_indices)
+            custom_cv_force.addCollectiveVariable('RMSD', rmsd_force)
+            system_copy.addForce(custom_cv_force)
+
+        # Set barostat update interval to 0 (for NVT)
+        if parameters['ensemble'] == 'NVT':
+            force_dict = {force.__class__.__name__: index for index, force in enumerate(system_copy.getForces())}
+            try:
+                system_copy.getForce(force_dict['MonteCarloBarostat']).setFrequency(0)  # This requires openmm 8
+            except KeyError:
+                # No MonteCarloBarostat found
+                _logger.debug("No MonteCarloBarostat found in forces. Continuing.")
+            except OpenMMException:
+                # Must be Openmm<8
+                system_copy.removeForce(force_dict['MonteCarloBarostat'])
+
+        elif parameters['ensemble'] == 'NPT' or parameters['ensemble'] is None:
+            pass
+
+        else:
+            raise ValueError("Invalid parameter supplied for 'ensemble'")
+
+        # Set up integrator
+        temperature = parameters['temperature']
+        collision_rate = parameters['collision_rate']
+        timestep = parameters['timestep']
+
+        integrator = openmm.LangevinMiddleIntegrator(temperature, collision_rate, timestep)
+
+        # Set up context
+        platform = openmm.Platform.getPlatformByName(platform_name)
+        if platform_name in ['CUDA', 'OpenCL']:
+            platform.setPropertyDefaultValue('Precision', 'mixed')
+        if platform_name in ['CUDA']:
+            platform.setPropertyDefaultValue('DeterministicForces', 'true')
+
+        context = openmm.Context(system_copy, integrator, platform)
+        context.setPeriodicBoxVectors(*system_copy.getDefaultPeriodicBoxVectors())
+        context.setPositions(positions)
+        context.setVelocitiesToTemperature(temperature)
+
+        # Run minimization or MD
+        n_steps = parameters['n_steps']
+        n_steps_per_iteration = 100
+
+        if parameters['EOM'] == 'minimize':
+            openmm.LocalEnergyMinimizer.minimize(context, maxIterations=n_steps)
+
+        elif parameters['EOM'] == 'MD':
+            for _ in range(int(n_steps / n_steps_per_iteration)):
+                integrator.step(n_steps_per_iteration)
+
+        elif parameters['EOM'] == 'MD_interpolate':
+            temperature_end = parameters['temperature_end']
+            temperature_unit = unit.kelvin
+            temperatures = np.linspace(temperature / temperature_unit, temperature_end / temperature_unit,
+                                       int(n_steps / n_steps_per_iteration)) * temperature_unit
+            for temperature in temperatures:
+                integrator.setTemperature(temperature)
+                integrator.step(n_steps_per_iteration)
+
+        else:
+            raise ValueError("Invalid parameter supplied for 'EOM'")
+
+        # Retrieve positions after this stage of equil
+        state = context.getState(getPositions=True)
+        positions = state.getPositions(asNumpy=True)
+
+        # Update default box vectors for next iteration
+        box_vectors = state.getPeriodicBoxVectors()
+        system.setDefaultPeriodicBoxVectors(*box_vectors)
+
+        # Delete context and integrator
+        del context, integrator, system_copy
+
+        elapsed_time = time.time() - initial_time
+        print(f"\tStage {stage_index + 1} took {elapsed_time} seconds")
+
+    # Save the final equilibrated positions
+    if filename.endswith('pdb'):
+        openmm.app.PDBFile.writeFile(topology, positions, open(filename, "w"), keepIds=True)
+    elif filename.endswith('cif'):
+        openmm.app.PDBxFile.writeFile(topology, positions, open(filename, "w"), keepIds=True)
+
+    # Save the box vectors
+    if save_box_vectors:
+        with open(filename[:-4] + '_box_vectors.npy', 'wb') as f:
+            np.save(f, box_vectors)
+
diff --git a/openmmtools/utils.py b/openmmtools/utils/utils.py
similarity index 99%
rename from openmmtools/utils.py
rename to openmmtools/utils/utils.py
index 83523d017..f7392f467 100644
--- a/openmmtools/utils.py
+++ b/openmmtools/utils/utils.py
@@ -28,6 +28,7 @@
 import functools
 import importlib
 import contextlib
+import warnings
 import zlib
 
 import numpy as np
@@ -678,6 +679,8 @@ def deserialize(serialization):
         instance.__setstate__(serialization)
     except AttributeError:
         raise ValueError('Cannot deserialize class {} without a __setstate__ method'.format(class_name))
+    except KeyError as e:  # Key not found in options/variables -- backward compatibility for <=0.21.4
+        warnings.warn(f"Key {e} not found in {instance.__class__.__name__}.")
     return instance
 
 
diff --git a/readthedocs.yml b/readthedocs.yml
index 8bb808241..e2ca60ca6 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,15 +1,13 @@
-conda:
-  file: devtools/conda-envs/test_env.yaml
+version: 2
 
-# The default for requirements_file is null, but for some reason we get an error with this message if it's not set:
-# "Problem parsing YAML configuration. Invalid "requirements_file": path docs/requirements.txt does not exist"
-requirements_file: null
+conda:
+  environment: devtools/conda-envs/test_env.yaml
 
 build:
-  image: latest
+  os: ubuntu-22.04
   tools:
     python: mambaforge-4.10
 
-python:
-  version: 3.6
-  setup_py_install: true
+sphinx:
+   configuration: docs/conf.py
+   fail_on_warning: false