From ef61df35c9274b450c34f3da91931ddb8c74458c Mon Sep 17 00:00:00 2001
From: Carlo Camilloni <carlo.camilloni@gmail.com>
Date: Wed, 8 Mar 2023 15:13:06 +0100
Subject: [PATCH] removed patch for gmx2020 this is included till plumed 2.9

---
 patches/gromacs-2020.7.config                 |   36 -
 .../src/gromacs/CMakeLists.txt                |  445 ---
 .../src/gromacs/CMakeLists.txt.preplumed      |  442 ---
 .../src/gromacs/mdlib/expanded.cpp            | 1648 ---------
 .../src/gromacs/mdlib/expanded.cpp.preplumed  | 1582 ---------
 .../src/gromacs/mdlib/expanded.h              |   83 -
 .../src/gromacs/mdlib/expanded.h.preplumed    |   77 -
 .../src/gromacs/mdlib/force.cpp               |  388 ---
 .../src/gromacs/mdlib/force.cpp.preplumed     |  374 ---
 .../src/gromacs/mdrun/legacymdrunoptions.cpp  |  200 --
 .../mdrun/legacymdrunoptions.cpp.preplumed    |  167 -
 .../src/gromacs/mdrun/legacymdrunoptions.h    |  398 ---
 .../mdrun/legacymdrunoptions.h.preplumed      |  384 ---
 .../src/gromacs/mdrun/md.cpp                  | 1897 -----------
 .../src/gromacs/mdrun/md.cpp.preplumed        | 1689 ----------
 .../src/gromacs/mdrun/minimize.cpp            | 2951 -----------------
 .../src/gromacs/mdrun/minimize.cpp.preplumed  | 2873 ----------------
 .../src/gromacs/mdrun/replicaexchange.cpp     | 1495 ---------
 .../mdrun/replicaexchange.cpp.preplumed       | 1391 --------
 .../src/gromacs/mdrun/replicaexchange.h       |  117 -
 .../gromacs/mdrun/replicaexchange.h.preplumed |  109 -
 .../src/gromacs/mdrun/runner.cpp              | 2133 ------------
 .../src/gromacs/mdrun/runner.cpp.preplumed    | 2104 ------------
 23 files changed, 22983 deletions(-)
 delete mode 100644 patches/gromacs-2020.7.config
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp
 delete mode 100644 patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp.preplumed

diff --git a/patches/gromacs-2020.7.config b/patches/gromacs-2020.7.config
deleted file mode 100644
index 9f5f0f41f6..0000000000
--- a/patches/gromacs-2020.7.config
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-function plumed_preliminary_test(){
-# check if the README contains the word GROMACS and if gromacs has been already configured
-  grep -q GROMACS README 1>/dev/null 2>/dev/null
-}
-
-function plumed_patch_info(){
-cat << EOF
-PLUMED can be incorporated into gromacs using the standard patching procedure.
-Patching must be done in the gromacs root directory  _before_ the cmake command is invoked.
-
-On clusters you may want to patch gromacs using the static version of plumed, in this case
-building gromacs can result in multiple errors. One possible solution is to configure gromacs
-with these additional options:
-
-cmake -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON
-
-To enable PLUMED in a gromacs simulation one should use
-mdrun with an extra -plumed flag. The flag can be used to
-specify the name of the PLUMED input file, e.g.:
-
-gmx mdrun -plumed plumed.dat
-
-For more information on gromacs you should visit http://www.gromacs.org
-
-EOF
-}
-
-plumed_before_patch(){
-  plumed_patch_info
-
-  mv cmake/gmxVersionInfo.cmake cmake/gmxVersionInfo.cmake.preplumed
-  awk -v version="$PLUMED_VERSION" '/^set\(GMX_VERSION_STRING_OF_FORK/{gsub(/""/, "plumed-" version)}; {print}' cmake/gmxVersionInfo.cmake.preplumed > cmake/gmxVersionInfo.cmake
-}
-
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt
deleted file mode 100644
index f13d03c174..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt
+++ /dev/null
@@ -1,445 +0,0 @@
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2010,2011,2012,2013,2014,2015, The GROMACS development team.
-# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-include(${CMAKE_SOURCE_DIR}/Plumed.cmake)
-
-set(LIBGROMACS_SOURCES)
-
-if (GMX_CLANG_CUDA)
-    include(gmxClangCudaUtils)
-endif()
-
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
-set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE)
-
-set(libgromacs_object_library_dependencies "")
-function (_gmx_add_files_to_property PROPERTY)
-    foreach (_file ${ARGN})
-        if (IS_ABSOLUTE "${_file}")
-            set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file})
-        else()
-            set_property(GLOBAL APPEND PROPERTY ${PROPERTY}
-                         ${CMAKE_CURRENT_LIST_DIR}/${_file})
-        endif()
-    endforeach()
-endfunction ()
-
-function (gmx_add_libgromacs_sources)
-    _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN})
-endfunction ()
-
-# TODO Reconsider this, as the CUDA driver API is probably a simpler
-# approach, at least for the build system. See Redmine #2530
-function (gmx_compile_cpp_as_cuda)
-    _gmx_add_files_to_property(GMX_LIBGROMACS_GPU_IMPL_SOURCES ${ARGN})
-endfunction ()
-
-# Add these contents first because linking their tests can take a lot
-# of time, so we want lots of parallel work still available after
-# linking starts.
-add_subdirectory(utility)
-# Add normal contents
-add_subdirectory(gmxlib)
-add_subdirectory(mdlib)
-add_subdirectory(applied_forces)
-add_subdirectory(listed_forces)
-add_subdirectory(nbnxm)
-add_subdirectory(commandline)
-add_subdirectory(domdec)
-add_subdirectory(ewald)
-add_subdirectory(fft)
-add_subdirectory(gpu_utils)
-add_subdirectory(hardware)
-add_subdirectory(linearalgebra)
-add_subdirectory(math)
-add_subdirectory(mdrun)
-add_subdirectory(mdrunutility)
-add_subdirectory(mdspan)
-add_subdirectory(mdtypes)
-add_subdirectory(onlinehelp)
-add_subdirectory(options)
-add_subdirectory(pbcutil)
-add_subdirectory(random)
-add_subdirectory(restraint)
-add_subdirectory(tables)
-add_subdirectory(taskassignment)
-add_subdirectory(timing)
-add_subdirectory(topology)
-add_subdirectory(trajectory)
-add_subdirectory(swap)
-add_subdirectory(essentialdynamics)
-add_subdirectory(pulling)
-add_subdirectory(awh)
-add_subdirectory(simd)
-add_subdirectory(imd)
-add_subdirectory(compat)
-add_subdirectory(mimic)
-add_subdirectory(modularsimulator)
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    add_subdirectory(gmxana)
-    add_subdirectory(gmxpreprocess)
-    add_subdirectory(correlationfunctions)
-    add_subdirectory(statistics)
-    add_subdirectory(analysisdata)
-    add_subdirectory(coordinateio)
-    add_subdirectory(trajectoryanalysis)
-    add_subdirectory(energyanalysis)
-    add_subdirectory(tools)
-endif()
-
-get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES})
-
-# This would be the standard way to include thread_mpi, but
-# we want libgromacs to link the functions directly
-#if(GMX_THREAD_MPI)
-#    add_subdirectory(thread_mpi)
-#endif()
-#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
-tmpi_get_source_list(THREAD_MPI_SOURCES ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/src)
-add_library(thread_mpi OBJECT ${THREAD_MPI_SOURCES})
-target_compile_definitions(thread_mpi PRIVATE HAVE_CONFIG_H)
-gmx_target_compile_options(thread_mpi)
-if (WIN32)
-    gmx_target_warning_suppression(thread_mpi /wd4996 HAS_NO_MSVC_UNSAFE_FUNCTION)
-endif()
-list(APPEND libgromacs_object_library_dependencies thread_mpi)
-
-configure_file(version.h.cmakein version.h)
-if(GMX_INSTALL_LEGACY_API)
-  install(FILES
-          ${CMAKE_CURRENT_BINARY_DIR}/version.h
-	  analysisdata.h
-	  options.h
-	  selection.h
-	  trajectoryanalysis.h
-          DESTINATION include/gromacs)
-endif()
-
-# This code is here instead of utility/CMakeLists.txt, because CMake
-# custom commands and source file properties can only be set in the directory
-# that contains the target that uses them.
-# TODO: Generate a header instead that can be included from baseversion.cpp.
-# That probably simplifies things somewhat.
-set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp)
-gmx_configure_version_file(
-    utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE}
-    REMOTE_HASH
-    EXTRA_VARS
-        GMX_SOURCE_DOI
-        GMX_RELEASE_HASH
-        GMX_SOURCE_HASH
-        )
-list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
-
-# Mark some shared GPU implementation files to compile with CUDA if needed
-if (GMX_USE_CUDA)
-    get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
-    set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-endif()
-
-# set up CUDA compilation with clang
-if (GMX_CLANG_CUDA)
-    foreach (_file ${LIBGROMACS_SOURCES})
-        get_filename_component(_ext ${_file} EXT)
-        get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
-        if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
-            gmx_compile_cuda_file_with_clang(${_file})
-        endif()
-    endforeach()
-endif()
-
-if (GMX_USE_CUDA)
-    # Work around FindCUDA that prevents using target_link_libraries()
-    # with keywords otherwise...
-    set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
-    if (NOT GMX_CLANG_CUDA)
-        gmx_cuda_add_library(libgromacs ${LIBGROMACS_SOURCES})
-    else()
-        add_library(libgromacs ${LIBGROMACS_SOURCES})
-    endif()
-    target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES})
-else()
-    add_library(libgromacs ${LIBGROMACS_SOURCES})
-endif()
-
-# Add these contents first because linking their tests can take a lot
-# of time, so we want lots of parallel work still available after
-# linking starts.
-add_subdirectory(fileio)
-add_subdirectory(selection)
-
-# Suppress a warning about our abuse of t_inputrec
-gmx_source_file_warning_suppression(mdtypes/inputrec.cpp -Wno-class-memaccess HAS_NO_CLASS_MEMACCESS)
-
-# Handle the object libraries that contain the source file
-# dependencies that need special handling because they are generated
-# or external code.
-foreach(object_library ${libgromacs_object_library_dependencies})
-    if (BUILD_SHARED_LIBS)
-        set_target_properties(${object_library} PROPERTIES POSITION_INDEPENDENT_CODE true)
-    endif()
-    target_include_directories(${object_library} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
-
-    # Add the sources from the object libraries to the main library.
-    target_sources(libgromacs PRIVATE $<TARGET_OBJECTS:${object_library}>)
-endforeach()
-gmx_target_compile_options(libgromacs)
-target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H)
-target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
-
-if (GMX_USE_OPENCL)
-    option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE)
-    mark_as_advanced(GMX_EXTERNAL_CLFFT)
-
-    # Default to using clFFT found on the system
-    # switch to quiet at the second run.
-    if (DEFINED clFFT_LIBRARY)
-        set (clFFT_FIND_QUIETLY TRUE)
-    endif()
-    find_package(clFFT)
-    if (NOT clFFT_FOUND)
-        if (GMX_EXTERNAL_CLFFT)
-            message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR")
-        endif()
-
-        if(MSVC)
-            message(FATAL_ERROR
-"An OpenCL build was requested with Visual Studio compiler, but GROMACS
-requires clFFT, which was not found on your system. GROMACS does bundle
-clFFT to help with building for OpenCL, but that clFFT has not yet been
-ported to the more recent versions of that compiler that GROMACS itself
-requires. Thus for now, OpenCL is not available with MSVC and the internal
-build of clFFT in GROMACS 2019. Either change compiler, try installing
-a clFFT package, or use the latest GROMACS 2018 point release.")
-        endif()
-
-        # Fall back on the internal version
-        set (_clFFT_dir ../external/clFFT/src)
-        add_subdirectory(${_clFFT_dir} clFFT-build)
-        target_sources(libgromacs PRIVATE
-            $<TARGET_OBJECTS:clFFT>
-        )
-        target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include)
-        # Use the magic variable for how to link any library needed for
-        # dlopen, etc.  which is -ldl where needed, and empty otherwise
-        # (e.g. Windows, BSD, Mac).
-        target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}")
-    else()
-        target_link_libraries(libgromacs PRIVATE clFFT)
-    endif()
-endif()
-
-# Permit GROMACS code to include externally developed headers, such as
-# the functionality from the nonstd project that we use for
-# gmx::compat::optional. These are included as system headers so that
-# no warnings are issued from them.
-#
-# TODO Perhaps generalize this for all headers from src/external
-target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
-
-if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
-    # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file.
-    # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions
-    # with the SIMD_AVX_512_CXX_FLAGS flags.
-    set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}")
-endif()
-
-gmx_setup_tng_for_libgromacs()
-
-target_link_libraries(libgromacs
-                      PRIVATE
-                      ${EXTRAE_LIBRARIES}
-                      ${GMX_EXTRA_LIBRARIES}
-                      ${GMX_COMMON_LIBRARIES}
-                      ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES}
-                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}
-                      ${OpenCL_LIBRARIES}
-                      $<$<PLATFORM_ID:SunOS>:socket>
-                      PUBLIC
-                      ${GMX_PUBLIC_LIBRARIES}
-                      ${PLUMED_LOAD}
-                      )
-if (GMX_OPENMP)
-    target_link_libraries(libgromacs PUBLIC OpenMP::OpenMP_CXX)
-endif()
-set_target_properties(libgromacs PROPERTIES
-                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
-                      SOVERSION ${LIBRARY_SOVERSION_MAJOR}
-                      VERSION ${LIBRARY_VERSION}
-                      )
-
-gmx_manage_lmfit()
-target_link_libraries(libgromacs PRIVATE lmfit)
-
-# Fix everything found by the latest version of clang that we use in
-# Jenkins testing. This should be updated when we update the latest
-# tested version of clang.
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^7\.0")
-   target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>)
-endif()
-if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-   target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/analyze /analyze:stacksize 70000
-     #Control flow warnings are disabled because the commond line output is insufficient. There is no tool
-     #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support.
-     /wd6001  #unitialized memory
-     /wd6011  #derefencing NULL
-     /wd6053  #prior call not zero-terminate
-     /wd6054  #might not be zero-terminated
-     /wd6385  #reading invalid data
-     /wd6386  #buffer overrun
-     /wd6387  #could be '0'
-     /wd28199 #uninitialized memory
-     # For compile time constant (e.g. templates) the following warnings have flase postives
-     /wd6239  #(<non-zero> && <expr>)
-     /wd6240  #(<expr> && <non-zero>)
-     /wd6294  #Ill-defined for-loop
-     /wd6326  #comparison of constant with other constant
-     /wd28020 #expression involving paramter is not true
-     # Misc
-     /wd6330  #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..))
-     /wd6993  #OpenMP ignored
-     #TODO
-     /wd6031  #return value ignored (important - mostly warnigns about sscanf)
-     /wd6244  #hides declaration (known issue - we ingore similar warnings for other compilers)
-     /wd6246  #hides declaration
-     >
-   )
-endif()
-
-if (GMX_CLANG_TIDY)
-   set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY
-       "${CLANG_TIDY_EXE};-warnings-as-errors=*")
-endif()
-
-# clang-3.6 warns about a number of issues that are not reported by more modern compilers
-# and we know they are not real issues. So we only check that it can compile without error
-# but ignore all warnings.
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6")
-    target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-w>)
-endif()
-
-# Only install the library in mdrun-only mode if it is actually necessary
-# for the binary
-if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS)
-    install(TARGETS libgromacs
-            EXPORT libgromacs
-            LIBRARY
-                DESTINATION ${CMAKE_INSTALL_LIBDIR}
-                COMPONENT libraries
-            RUNTIME
-                DESTINATION ${CMAKE_INSTALL_BINDIR}
-                COMPONENT libraries
-            ARCHIVE
-                DESTINATION ${CMAKE_INSTALL_LIBDIR}
-                COMPONENT libraries
-            INCLUDES DESTINATION include)
-endif()
-
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    include(InstallLibInfo.cmake)
-endif()
-
-# Technically, the user could want to do this for an OpenCL build
-# using the CUDA runtime, but currently there's no reason to want to
-# do that.
-if (INSTALL_CUDART_LIB) #can be set manual by user
-    if (GMX_USE_CUDA)
-        foreach(CUDA_LIB ${CUDA_LIBRARIES})
-            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
-            if(IS_CUDART) #libcuda should not be installed
-                #install also name-links (linker uses those)
-                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
-                install(FILES ${CUDA_LIBS} DESTINATION
-                    ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries)
-            endif()
-        endforeach()
-    else()
-        message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support")
-    endif()
-endif()
-
-if(GMX_USE_OPENCL)
-    # Install the utility headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        gpu_utils/vectype_ops.clh
-        gpu_utils/device_utils.clh
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils
-        COMPONENT libraries)
-    file(GLOB OPENCL_INSTALLED_FILES
-        pbcutil/ishift.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil
-        COMPONENT libraries)
-
-    # Install the NBNXM source and headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        nbnxm/constants.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm
-        COMPONENT libraries)
-    file(GLOB OPENCL_INSTALLED_FILES
-        nbnxm/opencl/nbnxm_ocl_kernels.cl
-        nbnxm/opencl/nbnxm_ocl_kernel.clh
-        nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
-        nbnxm/opencl/nbnxm_ocl_kernels.clh
-        nbnxm/opencl/nbnxm_ocl_kernels_fastgen.clh
-        nbnxm/opencl/nbnxm_ocl_kernels_fastgen_add_twincut.clh
-        nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
-        nbnxm/opencl/nbnxm_ocl_consts.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm/opencl
-        COMPONENT libraries)
-
-    # Install the PME source and headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        ewald/pme_spread.clh
-        ewald/pme_solve.clh
-        ewald/pme_gather.clh
-        ewald/pme_gpu_utils.clh
-        ewald/pme_program.cl
-        ewald/pme_gpu_types.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald
-        COMPONENT libraries)
-endif()
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt.preplumed
deleted file mode 100644
index 9249a7a08f..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/CMakeLists.txt.preplumed
+++ /dev/null
@@ -1,442 +0,0 @@
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2010,2011,2012,2013,2014,2015, The GROMACS development team.
-# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-set(LIBGROMACS_SOURCES)
-
-if (GMX_CLANG_CUDA)
-    include(gmxClangCudaUtils)
-endif()
-
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
-set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE)
-
-set(libgromacs_object_library_dependencies "")
-function (_gmx_add_files_to_property PROPERTY)
-    foreach (_file ${ARGN})
-        if (IS_ABSOLUTE "${_file}")
-            set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file})
-        else()
-            set_property(GLOBAL APPEND PROPERTY ${PROPERTY}
-                         ${CMAKE_CURRENT_LIST_DIR}/${_file})
-        endif()
-    endforeach()
-endfunction ()
-
-function (gmx_add_libgromacs_sources)
-    _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN})
-endfunction ()
-
-# TODO Reconsider this, as the CUDA driver API is probably a simpler
-# approach, at least for the build system. See Redmine #2530
-function (gmx_compile_cpp_as_cuda)
-    _gmx_add_files_to_property(GMX_LIBGROMACS_GPU_IMPL_SOURCES ${ARGN})
-endfunction ()
-
-# Add these contents first because linking their tests can take a lot
-# of time, so we want lots of parallel work still available after
-# linking starts.
-add_subdirectory(utility)
-# Add normal contents
-add_subdirectory(gmxlib)
-add_subdirectory(mdlib)
-add_subdirectory(applied_forces)
-add_subdirectory(listed_forces)
-add_subdirectory(nbnxm)
-add_subdirectory(commandline)
-add_subdirectory(domdec)
-add_subdirectory(ewald)
-add_subdirectory(fft)
-add_subdirectory(gpu_utils)
-add_subdirectory(hardware)
-add_subdirectory(linearalgebra)
-add_subdirectory(math)
-add_subdirectory(mdrun)
-add_subdirectory(mdrunutility)
-add_subdirectory(mdspan)
-add_subdirectory(mdtypes)
-add_subdirectory(onlinehelp)
-add_subdirectory(options)
-add_subdirectory(pbcutil)
-add_subdirectory(random)
-add_subdirectory(restraint)
-add_subdirectory(tables)
-add_subdirectory(taskassignment)
-add_subdirectory(timing)
-add_subdirectory(topology)
-add_subdirectory(trajectory)
-add_subdirectory(swap)
-add_subdirectory(essentialdynamics)
-add_subdirectory(pulling)
-add_subdirectory(awh)
-add_subdirectory(simd)
-add_subdirectory(imd)
-add_subdirectory(compat)
-add_subdirectory(mimic)
-add_subdirectory(modularsimulator)
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    add_subdirectory(gmxana)
-    add_subdirectory(gmxpreprocess)
-    add_subdirectory(correlationfunctions)
-    add_subdirectory(statistics)
-    add_subdirectory(analysisdata)
-    add_subdirectory(coordinateio)
-    add_subdirectory(trajectoryanalysis)
-    add_subdirectory(energyanalysis)
-    add_subdirectory(tools)
-endif()
-
-get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES})
-
-# This would be the standard way to include thread_mpi, but
-# we want libgromacs to link the functions directly
-#if(GMX_THREAD_MPI)
-#    add_subdirectory(thread_mpi)
-#endif()
-#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
-tmpi_get_source_list(THREAD_MPI_SOURCES ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/src)
-add_library(thread_mpi OBJECT ${THREAD_MPI_SOURCES})
-target_compile_definitions(thread_mpi PRIVATE HAVE_CONFIG_H)
-gmx_target_compile_options(thread_mpi)
-if (WIN32)
-    gmx_target_warning_suppression(thread_mpi /wd4996 HAS_NO_MSVC_UNSAFE_FUNCTION)
-endif()
-list(APPEND libgromacs_object_library_dependencies thread_mpi)
-
-configure_file(version.h.cmakein version.h)
-if(GMX_INSTALL_LEGACY_API)
-  install(FILES
-          ${CMAKE_CURRENT_BINARY_DIR}/version.h
-	  analysisdata.h
-	  options.h
-	  selection.h
-	  trajectoryanalysis.h
-          DESTINATION include/gromacs)
-endif()
-
-# This code is here instead of utility/CMakeLists.txt, because CMake
-# custom commands and source file properties can only be set in the directory
-# that contains the target that uses them.
-# TODO: Generate a header instead that can be included from baseversion.cpp.
-# That probably simplifies things somewhat.
-set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp)
-gmx_configure_version_file(
-    utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE}
-    REMOTE_HASH
-    EXTRA_VARS
-        GMX_SOURCE_DOI
-        GMX_RELEASE_HASH
-        GMX_SOURCE_HASH
-        )
-list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
-
-# Mark some shared GPU implementation files to compile with CUDA if needed
-if (GMX_USE_CUDA)
-    get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
-    set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-endif()
-
-# set up CUDA compilation with clang
-if (GMX_CLANG_CUDA)
-    foreach (_file ${LIBGROMACS_SOURCES})
-        get_filename_component(_ext ${_file} EXT)
-        get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
-        if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
-            gmx_compile_cuda_file_with_clang(${_file})
-        endif()
-    endforeach()
-endif()
-
-if (GMX_USE_CUDA)
-    # Work around FindCUDA that prevents using target_link_libraries()
-    # with keywords otherwise...
-    set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
-    if (NOT GMX_CLANG_CUDA)
-        gmx_cuda_add_library(libgromacs ${LIBGROMACS_SOURCES})
-    else()
-        add_library(libgromacs ${LIBGROMACS_SOURCES})
-    endif()
-    target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES})
-else()
-    add_library(libgromacs ${LIBGROMACS_SOURCES})
-endif()
-
-# Add these contents first because linking their tests can take a lot
-# of time, so we want lots of parallel work still available after
-# linking starts.
-add_subdirectory(fileio)
-add_subdirectory(selection)
-
-# Suppress a warning about our abuse of t_inputrec
-gmx_source_file_warning_suppression(mdtypes/inputrec.cpp -Wno-class-memaccess HAS_NO_CLASS_MEMACCESS)
-
-# Handle the object libraries that contain the source file
-# dependencies that need special handling because they are generated
-# or external code.
-foreach(object_library ${libgromacs_object_library_dependencies})
-    if (BUILD_SHARED_LIBS)
-        set_target_properties(${object_library} PROPERTIES POSITION_INDEPENDENT_CODE true)
-    endif()
-    target_include_directories(${object_library} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
-
-    # Add the sources from the object libraries to the main library.
-    target_sources(libgromacs PRIVATE $<TARGET_OBJECTS:${object_library}>)
-endforeach()
-gmx_target_compile_options(libgromacs)
-target_compile_definitions(libgromacs PRIVATE HAVE_CONFIG_H)
-target_include_directories(libgromacs SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
-
-if (GMX_USE_OPENCL)
-    option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE)
-    mark_as_advanced(GMX_EXTERNAL_CLFFT)
-
-    # Default to using clFFT found on the system
-    # switch to quiet at the second run.
-    if (DEFINED clFFT_LIBRARY)
-        set (clFFT_FIND_QUIETLY TRUE)
-    endif()
-    find_package(clFFT)
-    if (NOT clFFT_FOUND)
-        if (GMX_EXTERNAL_CLFFT)
-            message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR")
-        endif()
-
-        if(MSVC)
-            message(FATAL_ERROR
-"An OpenCL build was requested with Visual Studio compiler, but GROMACS
-requires clFFT, which was not found on your system. GROMACS does bundle
-clFFT to help with building for OpenCL, but that clFFT has not yet been
-ported to the more recent versions of that compiler that GROMACS itself
-requires. Thus for now, OpenCL is not available with MSVC and the internal
-build of clFFT in GROMACS 2019. Either change compiler, try installing
-a clFFT package, or use the latest GROMACS 2018 point release.")
-        endif()
-
-        # Fall back on the internal version
-        set (_clFFT_dir ../external/clFFT/src)
-        add_subdirectory(${_clFFT_dir} clFFT-build)
-        target_sources(libgromacs PRIVATE
-            $<TARGET_OBJECTS:clFFT>
-        )
-        target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include)
-        # Use the magic variable for how to link any library needed for
-        # dlopen, etc.  which is -ldl where needed, and empty otherwise
-        # (e.g. Windows, BSD, Mac).
-        target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}")
-    else()
-        target_link_libraries(libgromacs PRIVATE clFFT)
-    endif()
-endif()
-
-# Permit GROMACS code to include externally developed headers, such as
-# the functionality from the nonstd project that we use for
-# gmx::compat::optional. These are included as system headers so that
-# no warnings are issued from them.
-#
-# TODO Perhaps generalize this for all headers from src/external
-target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
-
-if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
-    # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file.
-    # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions
-    # with the SIMD_AVX_512_CXX_FLAGS flags.
-    set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}")
-endif()
-
-gmx_setup_tng_for_libgromacs()
-
-target_link_libraries(libgromacs
-                      PRIVATE
-                      ${EXTRAE_LIBRARIES}
-                      ${GMX_EXTRA_LIBRARIES}
-                      ${GMX_COMMON_LIBRARIES}
-                      ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES}
-                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS}
-                      ${OpenCL_LIBRARIES}
-                      $<$<PLATFORM_ID:SunOS>:socket>
-                      PUBLIC
-                      ${GMX_PUBLIC_LIBRARIES}
-                      )
-if (GMX_OPENMP)
-    target_link_libraries(libgromacs PUBLIC OpenMP::OpenMP_CXX)
-endif()
-set_target_properties(libgromacs PROPERTIES
-                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
-                      SOVERSION ${LIBRARY_SOVERSION_MAJOR}
-                      VERSION ${LIBRARY_VERSION}
-                      )
-
-gmx_manage_lmfit()
-target_link_libraries(libgromacs PRIVATE lmfit)
-
-# Fix everything found by the latest version of clang that we use in
-# Jenkins testing. This should be updated when we update the latest
-# tested version of clang.
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^7\.0")
-   target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>)
-endif()
-if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-   target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/analyze /analyze:stacksize 70000
-     #Control flow warnings are disabled because the commond line output is insufficient. There is no tool
-     #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support.
-     /wd6001  #unitialized memory
-     /wd6011  #derefencing NULL
-     /wd6053  #prior call not zero-terminate
-     /wd6054  #might not be zero-terminated
-     /wd6385  #reading invalid data
-     /wd6386  #buffer overrun
-     /wd6387  #could be '0'
-     /wd28199 #uninitialized memory
-     # For compile time constant (e.g. templates) the following warnings have flase postives
-     /wd6239  #(<non-zero> && <expr>)
-     /wd6240  #(<expr> && <non-zero>)
-     /wd6294  #Ill-defined for-loop
-     /wd6326  #comparison of constant with other constant
-     /wd28020 #expression involving paramter is not true
-     # Misc
-     /wd6330  #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..))
-     /wd6993  #OpenMP ignored
-     #TODO
-     /wd6031  #return value ignored (important - mostly warnigns about sscanf)
-     /wd6244  #hides declaration (known issue - we ingore similar warnings for other compilers)
-     /wd6246  #hides declaration
-     >
-   )
-endif()
-
-if (GMX_CLANG_TIDY)
-   set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY
-       "${CLANG_TIDY_EXE};-warnings-as-errors=*")
-endif()
-
-# clang-3.6 warns about a number of issues that are not reported by more modern compilers
-# and we know they are not real issues. So we only check that it can compile without error
-# but ignore all warnings.
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6")
-    target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-w>)
-endif()
-
-# Only install the library in mdrun-only mode if it is actually necessary
-# for the binary
-if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS)
-    install(TARGETS libgromacs
-            EXPORT libgromacs
-            LIBRARY
-                DESTINATION ${CMAKE_INSTALL_LIBDIR}
-                COMPONENT libraries
-            RUNTIME
-                DESTINATION ${CMAKE_INSTALL_BINDIR}
-                COMPONENT libraries
-            ARCHIVE
-                DESTINATION ${CMAKE_INSTALL_LIBDIR}
-                COMPONENT libraries
-            INCLUDES DESTINATION include)
-endif()
-
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    include(InstallLibInfo.cmake)
-endif()
-
-# Technically, the user could want to do this for an OpenCL build
-# using the CUDA runtime, but currently there's no reason to want to
-# do that.
-if (INSTALL_CUDART_LIB) #can be set manual by user
-    if (GMX_USE_CUDA)
-        foreach(CUDA_LIB ${CUDA_LIBRARIES})
-            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
-            if(IS_CUDART) #libcuda should not be installed
-                #install also name-links (linker uses those)
-                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
-                install(FILES ${CUDA_LIBS} DESTINATION
-                    ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries)
-            endif()
-        endforeach()
-    else()
-        message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support")
-    endif()
-endif()
-
-if(GMX_USE_OPENCL)
-    # Install the utility headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        gpu_utils/vectype_ops.clh
-        gpu_utils/device_utils.clh
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils
-        COMPONENT libraries)
-    file(GLOB OPENCL_INSTALLED_FILES
-        pbcutil/ishift.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil
-        COMPONENT libraries)
-
-    # Install the NBNXM source and headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        nbnxm/constants.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm
-        COMPONENT libraries)
-    file(GLOB OPENCL_INSTALLED_FILES
-        nbnxm/opencl/nbnxm_ocl_kernels.cl
-        nbnxm/opencl/nbnxm_ocl_kernel.clh
-        nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
-        nbnxm/opencl/nbnxm_ocl_kernels.clh
-        nbnxm/opencl/nbnxm_ocl_kernels_fastgen.clh
-        nbnxm/opencl/nbnxm_ocl_kernels_fastgen_add_twincut.clh
-        nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
-        nbnxm/opencl/nbnxm_ocl_consts.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/nbnxm/opencl
-        COMPONENT libraries)
-
-    # Install the PME source and headers
-    file(GLOB OPENCL_INSTALLED_FILES
-        ewald/pme_spread.clh
-        ewald/pme_solve.clh
-        ewald/pme_gather.clh
-        ewald/pme_gpu_utils.clh
-        ewald/pme_program.cl
-        ewald/pme_gpu_types.h
-        )
-    install(FILES ${OPENCL_INSTALLED_FILES}
-        DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald
-        COMPONENT libraries)
-endif()
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp
deleted file mode 100644
index 7f99a54c81..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp
+++ /dev/null
@@ -1,1648 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012-2018, The GROMACS development team.
- * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-#include "gmxpre.h"
-
-#include "expanded.h"
-
-#include <cmath>
-#include <cstdio>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/gmxfio.h"
-#include "gromacs/fileio/xtcio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/listed_forces/disre.h"
-#include "gromacs/listed_forces/orires.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/calcmu.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/random/threefry.h"
-#include "gromacs/random/uniformrealdistribution.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "expanded_internal.h"
-
-static void init_df_history_weights(df_history_t* dfhist, const t_expanded* expand, int nlim)
-{
-    int i;
-    dfhist->wl_delta = expand->init_wl_delta;
-    for (i = 0; i < nlim; i++)
-    {
-        dfhist->sum_weights[i] = expand->init_lambda_weights[i];
-        dfhist->sum_dg[i]      = expand->init_lambda_weights[i];
-    }
-}
-
-/* Eventually should contain all the functions needed to initialize expanded ensemble
-   before the md loop starts */
-void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist, const gmx::MDLogger& mdlog)
-{
-    if (!bStateFromCP)
-    {
-        init_df_history_weights(dfhist, ir->expandedvals, ir->fepvals->n_lambda);
-    }
-    if (plumedswitch)
-    {
-        if (ir->expandedvals->elamstats == elamstatsNO)
-        {
-            // No weight updating was chosen, use PLUMED weights
-            int plumedVersion=0;
-            plumed_cmd(plumedmain, "getApiVersion", &plumedVersion);
-            GMX_RELEASE_ASSERT(
-                    plumedVersion >= 9,
-                    "Please use PLUMED v2.8 or newer to use alchemical metadynamics with expanded ensemble");
-
-            GMX_LOG(mdlog.info).asParagraph().appendText(
-                    "You requested an expanded ensemble simulation with lmc-stats = no and activated PLUMED.\n"
-                    "As a result, this simulation will use the bias provided by PLUMED and ignore all\n"
-                    "expanded ensemble settings related to weight updates.\n"
-                    "If you want to use lambda weights updated by GROMACS in the expanded ensemble calculation,\n"
-                    "set lmc-stats != no.");
-        }
-        else
-        {
-            GMX_LOG(mdlog.info).asParagraph().appendText(
-                    "You requested an expanded ensemble simulation with lmc-stats != no and activated PLUMED.\n"
-                    "As a result, this simulation will use lambda weights managed by GROMACS and will not\n"
-                    "explicitly use the PLUMED bias in the expanded ensemble calculation.\n"
-                    "If you want to use the PLUMED bias as lambda weights, set lmc-stats = no.");
-        }
-    }
-}
-
-static void GenerateGibbsProbabilities(const real* ene, double* p_k, double* pks, int minfep, int maxfep)
-{
-
-    int  i;
-    real maxene;
-
-    *pks   = 0.0;
-    maxene = ene[minfep];
-    /* find the maximum value */
-    for (i = minfep; i <= maxfep; i++)
-    {
-        if (ene[i] > maxene)
-        {
-            maxene = ene[i];
-        }
-    }
-    /* find the denominator */
-    for (i = minfep; i <= maxfep; i++)
-    {
-        *pks += std::exp(ene[i] - maxene);
-    }
-    /*numerators*/
-    for (i = minfep; i <= maxfep; i++)
-    {
-        p_k[i] = std::exp(ene[i] - maxene) / *pks;
-    }
-}
-
-static void
-GenerateWeightedGibbsProbabilities(const real* ene, double* p_k, double* pks, int nlim, real* nvals, real delta)
-{
-
-    int   i;
-    real  maxene;
-    real* nene;
-    *pks = 0.0;
-
-    snew(nene, nlim);
-    for (i = 0; i < nlim; i++)
-    {
-        if (nvals[i] == 0)
-        {
-            /* add the delta, since we need to make sure it's greater than zero, and
-               we need a non-arbitrary number? */
-            nene[i] = ene[i] + std::log(nvals[i] + delta);
-        }
-        else
-        {
-            nene[i] = ene[i] + std::log(nvals[i]);
-        }
-    }
-
-    /* find the maximum value */
-    maxene = nene[0];
-    for (i = 0; i < nlim; i++)
-    {
-        if (nene[i] > maxene)
-        {
-            maxene = nene[i];
-        }
-    }
-
-    /* subtract off the maximum, avoiding overflow */
-    for (i = 0; i < nlim; i++)
-    {
-        nene[i] -= maxene;
-    }
-
-    /* find the denominator */
-    for (i = 0; i < nlim; i++)
-    {
-        *pks += std::exp(nene[i]);
-    }
-
-    /*numerators*/
-    for (i = 0; i < nlim; i++)
-    {
-        p_k[i] = std::exp(nene[i]) / *pks;
-    }
-    sfree(nene);
-}
-
-static int FindMinimum(const real* min_metric, int N)
-{
-
-    real min_val;
-    int  min_nval, nval;
-
-    min_nval = 0;
-    min_val  = min_metric[0];
-
-    for (nval = 0; nval < N; nval++)
-    {
-        if (min_metric[nval] < min_val)
-        {
-            min_val  = min_metric[nval];
-            min_nval = nval;
-        }
-    }
-    return min_nval;
-}
-
-static gmx_bool CheckHistogramRatios(int nhisto, const real* histo, real ratio)
-{
-
-    int      i;
-    real     nmean;
-    gmx_bool bIfFlat;
-
-    nmean = 0;
-    for (i = 0; i < nhisto; i++)
-    {
-        nmean += histo[i];
-    }
-
-    if (nmean == 0)
-    {
-        /* no samples! is bad!*/
-        bIfFlat = FALSE;
-        return bIfFlat;
-    }
-    nmean /= static_cast<real>(nhisto);
-
-    bIfFlat = TRUE;
-    for (i = 0; i < nhisto; i++)
-    {
-        /* make sure that all points are in the ratio < x <  1/ratio range  */
-        if (!((histo[i] / nmean < 1.0 / ratio) && (histo[i] / nmean > ratio)))
-        {
-            bIfFlat = FALSE;
-            break;
-        }
-    }
-    return bIfFlat;
-}
-
-static gmx_bool CheckIfDoneEquilibrating(int nlim, const t_expanded* expand, const df_history_t* dfhist, int64_t step)
-{
-
-    int      i, totalsamples;
-    gmx_bool bDoneEquilibrating = TRUE;
-    gmx_bool bIfFlat;
-
-    /* If we are doing slow growth to get initial values, we haven't finished equilibrating */
-    if (expand->lmc_forced_nstart > 0)
-    {
-        for (i = 0; i < nlim; i++)
-        {
-            if (dfhist->n_at_lam[i]
-                < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're
-                                                definitely not done equilibrating*/
-            {
-                bDoneEquilibrating = FALSE;
-                break;
-            }
-        }
-    }
-    else
-    {
-        /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */
-        bDoneEquilibrating = TRUE;
-
-        /* calculate the total number of samples */
-        switch (expand->elmceq)
-        {
-            case elmceqNO:
-                /* We have not equilibrated, and won't, ever. */
-                bDoneEquilibrating = FALSE;
-                break;
-            case elmceqYES:
-                /* we have equilibrated -- we're done */
-                bDoneEquilibrating = TRUE;
-                break;
-            case elmceqSTEPS:
-                /* first, check if we are equilibrating by steps, if we're still under */
-                if (step < expand->equil_steps)
-                {
-                    bDoneEquilibrating = FALSE;
-                }
-                break;
-            case elmceqSAMPLES:
-                totalsamples = 0;
-                for (i = 0; i < nlim; i++)
-                {
-                    totalsamples += dfhist->n_at_lam[i];
-                }
-                if (totalsamples < expand->equil_samples)
-                {
-                    bDoneEquilibrating = FALSE;
-                }
-                break;
-            case elmceqNUMATLAM:
-                for (i = 0; i < nlim; i++)
-                {
-                    if (dfhist->n_at_lam[i]
-                        < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're
-                                                     definitely not done equilibrating*/
-                    {
-                        bDoneEquilibrating = FALSE;
-                        break;
-                    }
-                }
-                break;
-            case elmceqWLDELTA:
-                if (EWL(expand->elamstats)) /* This check is in readir as well, but
-                                               just to be sure */
-                {
-                    if (dfhist->wl_delta > expand->equil_wl_delta)
-                    {
-                        bDoneEquilibrating = FALSE;
-                    }
-                }
-                break;
-            case elmceqRATIO:
-                /* we can use the flatness as a judge of good weights, as long as
-                   we're not doing minvar, or Wang-Landau.
-                   But turn off for now until we figure out exactly how we do this.
-                 */
-
-                if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR))
-                {
-                    /* we want to use flatness -avoiding- the forced-through samples.  Plus, we need
-                       to convert to floats for this histogram function. */
-
-                    real* modhisto;
-                    snew(modhisto, nlim);
-                    for (i = 0; i < nlim; i++)
-                    {
-                        modhisto[i] = 1.0 * (dfhist->n_at_lam[i] - expand->lmc_forced_nstart);
-                    }
-                    bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio);
-                    sfree(modhisto);
-                    if (!bIfFlat)
-                    {
-                        bDoneEquilibrating = FALSE;
-                    }
-                }
-                break;
-            default: bDoneEquilibrating = TRUE; break;
-        }
-    }
-    return bDoneEquilibrating;
-}
-
-static gmx_bool UpdateWeights(int           nlim,
-                              t_expanded*   expand,
-                              df_history_t* dfhist,
-                              int           fep_state,
-                              const real*   scaled_lamee,
-                              const real*   weighted_lamee,
-                              int64_t       step)
-{
-    gmx_bool bSufficientSamples;
-    real     acceptanceWeight;
-    int      i;
-    int      min_nvalm, min_nvalp, maxc;
-    real     omega_m1_0, omega_p1_0;
-    real     zero_sum_weights;
-    real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array,
-            *dwp_array, *dwm_array;
-    real    clam_varm, clam_varp, clam_osum, clam_weightsm, clam_weightsp, clam_minvar;
-    real *  lam_variance, *lam_dg;
-    double* p_k;
-    double  pks = 0;
-
-    /* Future potential todos for this function (see #3848):
-     *  - Update the names in the dhist structure to be clearer. Not done for now since this
-     *    a bugfix update and we are mininizing other code changes.
-     *  - Modularize the code some more.
-     *  - potentially merge with accelerated weight histogram functionality, since it's very similar.
-     */
-    /*  if we have equilibrated the expanded ensemble weights, we are not updating them, so exit now */
-    if (dfhist->bEquil)
-    {
-        return FALSE;
-    }
-
-    if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step))
-    {
-        dfhist->bEquil = TRUE;
-        /* zero out the visited states so we know how many equilibrated states we have
-           from here on out.*/
-        for (i = 0; i < nlim; i++)
-        {
-            dfhist->n_at_lam[i] = 0;
-        }
-        return TRUE;
-    }
-
-    /* If we reached this far, we have not equilibrated yet, keep on
-       going resetting the weights */
-
-    if (EWL(expand->elamstats))
-    {
-        if (expand->elamstats == elamstatsWL) /* Using standard Wang-Landau for weight updates */
-        {
-            dfhist->sum_weights[fep_state] -= dfhist->wl_delta;
-            dfhist->wl_histo[fep_state] += 1.0;
-        }
-        else if (expand->elamstats == elamstatsWWL)
-        /* Using weighted Wang-Landau for weight updates.
-         * Very closly equivalent to accelerated weight histogram approach
-         * applied to expanded ensemble. */
-        {
-            snew(p_k, nlim);
-
-            /* first increment count */
-            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim - 1);
-            for (i = 0; i < nlim; i++)
-            {
-                dfhist->wl_histo[i] += static_cast<real>(p_k[i]);
-            }
-
-            /* then increment weights (uses count) */
-            pks = 0.0;
-            GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo,
-                                               dfhist->wl_delta);
-
-            for (i = 0; i < nlim; i++)
-            {
-                dfhist->sum_weights[i] -= dfhist->wl_delta * static_cast<real>(p_k[i]);
-            }
-            /* Alternate definition, using logarithms. Shouldn't make very much difference! */
-            /*
-               real di;
-               for (i=0;i<nlim;i++)
-               {
-                di = (real)1.0 + dfhist->wl_delta*(real)p_k[i];
-                dfhist->sum_weights[i] -= log(di);
-               }
-             */
-            sfree(p_k);
-        }
-
-        zero_sum_weights = dfhist->sum_weights[0];
-        for (i = 0; i < nlim; i++)
-        {
-            dfhist->sum_weights[i] -= zero_sum_weights;
-        }
-    }
-
-    if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS
-        || expand->elamstats == elamstatsMINVAR)
-    {
-        maxc = 2 * expand->c_range + 1;
-
-        snew(lam_dg, nlim);
-        snew(lam_variance, nlim);
-
-        snew(omegap_array, maxc);
-        snew(weightsp_array, maxc);
-        snew(varp_array, maxc);
-        snew(dwp_array, maxc);
-
-        snew(omegam_array, maxc);
-        snew(weightsm_array, maxc);
-        snew(varm_array, maxc);
-        snew(dwm_array, maxc);
-
-        /* unpack the values of the free energy differences and the
-         * variance in their estimates between nearby lambdas. We will
-         * only actually update 2 of these, the state we are currently
-         * at and the one we end up moving to
-         */
-
-        for (i = 0; i < nlim - 1; i++)
-        { /* only through the second to last */
-            lam_dg[i] = dfhist->sum_dg[i + 1] - dfhist->sum_dg[i];
-            lam_variance[i] =
-                    gmx::square(dfhist->sum_variance[i + 1]) - gmx::square(dfhist->sum_variance[i]);
-        }
-
-        /* accumulate running averages of thermodynamic averages for Bennett Acceptance Ratio-based
-         * estimates of the free energy .
-         * Rather than peforming self-consistent estimation of the free energies at each step,
-         * we keep track of an array of possible different free energies (cnvals),
-         * and we self-consistently choose the best one. The one that leads to a free energy estimate
-         * that is closest to itself is the best estimate of the free energy.  It is essentially a
-         * parallellized version of self-consistent iteration.  maxc is the number of these constants. */
-
-        for (int nval = 0; nval < maxc; nval++)
-        {
-            const real cnval = static_cast<real>(nval - expand->c_range);
-
-            /* Compute acceptance criterion weight to the state below this one for use in averages.
-             * Note we do not have to have just moved from that state to use this free energy
-             * estimate; these are essentially "virtual" moves. */
-
-            if (fep_state > 0)
-            {
-                const auto lambdaEnergyDifference =
-                        cnval - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]);
-                acceptanceWeight =
-                        gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference);
-                dfhist->accum_m[fep_state][nval] += acceptanceWeight;
-                dfhist->accum_m2[fep_state][nval] += acceptanceWeight * acceptanceWeight;
-            }
-
-            // Compute acceptance criterion weight to transition to the next state
-            if (fep_state < nlim - 1)
-            {
-                const auto lambdaEnergyDifference =
-                        -cnval + (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]);
-                acceptanceWeight =
-                        gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference);
-                dfhist->accum_p[fep_state][nval] += acceptanceWeight;
-                dfhist->accum_p2[fep_state][nval] += acceptanceWeight * acceptanceWeight;
-            }
-
-            /* Determination of Metropolis transition and Barker transition weights */
-
-            int numObservationsCurrentState = dfhist->n_at_lam[fep_state];
-            /* determine the number of observations above and below the current state */
-            int numObservationsLowerState = 0;
-            if (fep_state > 0)
-            {
-                numObservationsLowerState = dfhist->n_at_lam[fep_state - 1];
-            }
-            int numObservationsHigherState = 0;
-            if (fep_state < nlim - 1)
-            {
-                numObservationsHigherState = dfhist->n_at_lam[fep_state + 1];
-            }
-
-            /* Calculate the biases for each expanded ensemble state that minimize the total
-             * variance, as implemented in Martinez-Veracoechea and Escobedo,
-             * J. Phys. Chem. B 2008, 112, 8120-8128
-             *
-             * The variance associated with the free energy estimate between two states i and j
-             * is calculated as
-             *     Var(i,j) = {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} / numObservations(i->j)
-             *              + {avg[xi(j->i)^2] / avg[xi(j->i)]^2 - 1} / numObservations(j->i)
-             * where xi(i->j) is the acceptance factor / weight associated with moving from state i to j
-             * As we are calculating the acceptance factor to the neighbors every time we're visiting
-             * a state, numObservations(i->j) == numObservations(i) and numObservations(j->i) == numObservations(j)
-             */
-
-            /* Accumulation of acceptance weight averages between the current state and the
-             * states +1 (p1) and -1 (m1), averaged at current state (0)
-             */
-            real avgAcceptanceCurrentToLower  = 0;
-            real avgAcceptanceCurrentToHigher = 0;
-            /* Accumulation of acceptance weight averages quantities between states 0
-             *  and states +1 and -1, squared
-             */
-            real avgAcceptanceCurrentToLowerSquared  = 0;
-            real avgAcceptanceCurrentToHigherSquared = 0;
-            /* Accumulation of free energy quantities from lower state (m1) to current state (0) and squared */
-            real avgAcceptanceLowerToCurrent        = 0;
-            real avgAcceptanceLowerToCurrentSquared = 0;
-            /* Accumulation of free energy quantities from upper state (p1) to current state (0) and squared */
-            real avgAcceptanceHigherToCurrent        = 0;
-            real avgAcceptanceHigherToCurrentSquared = 0;
-
-            if (numObservationsCurrentState > 0)
-            {
-                avgAcceptanceCurrentToLower = dfhist->accum_m[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToHigher =
-                        dfhist->accum_p[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToLowerSquared =
-                        dfhist->accum_m2[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToHigherSquared =
-                        dfhist->accum_p2[fep_state][nval] / numObservationsCurrentState;
-            }
-
-            if ((fep_state > 0) && (numObservationsLowerState > 0))
-            {
-                avgAcceptanceLowerToCurrent =
-                        dfhist->accum_p[fep_state - 1][nval] / numObservationsLowerState;
-                avgAcceptanceLowerToCurrentSquared =
-                        dfhist->accum_p2[fep_state - 1][nval] / numObservationsLowerState;
-            }
-
-            if ((fep_state < nlim - 1) && (numObservationsHigherState > 0))
-            {
-                avgAcceptanceHigherToCurrent =
-                        dfhist->accum_m[fep_state + 1][nval] / numObservationsHigherState;
-                avgAcceptanceHigherToCurrentSquared =
-                        dfhist->accum_m2[fep_state + 1][nval] / numObservationsHigherState;
-            }
-            /* These are accumulation of positive values (see definition of acceptance functions
-             * above), or of squares of positive values.
-             * We're taking this for granted in the following calculation, so make sure
-             * here that nothing weird happened. Although technically all values should be positive,
-             * because of floating point precisions, they might be numerically zero. */
-            GMX_RELEASE_ASSERT(
-                    avgAcceptanceCurrentToLower >= 0 && avgAcceptanceCurrentToLowerSquared >= 0
-                            && avgAcceptanceCurrentToHigher >= 0
-                            && avgAcceptanceCurrentToHigherSquared >= 0 && avgAcceptanceLowerToCurrent >= 0
-                            && avgAcceptanceLowerToCurrentSquared >= 0 && avgAcceptanceHigherToCurrent >= 0
-                            && avgAcceptanceHigherToCurrentSquared >= 0,
-                    "By definition, the acceptance factors should all be nonnegative.");
-
-            real varianceCurrentToLower   = 0;
-            real varianceCurrentToHigher  = 0;
-            real weightDifferenceToLower  = 0;
-            real weightDifferenceToHigher = 0;
-            real varianceToLower          = 0;
-            real varianceToHigher         = 0;
-
-            if (fep_state > 0)
-            {
-                if (numObservationsCurrentState > 0)
-                {
-                    /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                     *
-                     * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                     * acceptances are all positive!), and hence
-                     *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                     * We're catching that case explicitly to avoid numerical
-                     * problems dividing by zero when the overlap between states is small (#3304)
-                     */
-                    if (avgAcceptanceCurrentToLower > 0)
-                    {
-                        varianceCurrentToLower =
-                                avgAcceptanceCurrentToLowerSquared
-                                        / (avgAcceptanceCurrentToLower * avgAcceptanceCurrentToLower)
-                                - 1.0;
-                    }
-                    if (numObservationsLowerState > 0)
-                    {
-                        /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                         *
-                         * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                         * acceptances are all positive!), and hence
-                         *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                         * We're catching that case explicitly to avoid numerical
-                         * problems dividing by zero when the overlap between states is small (#3304)
-                         */
-                        real varianceLowerToCurrent = 0;
-                        if (avgAcceptanceLowerToCurrent > 0)
-                        {
-                            varianceLowerToCurrent =
-                                    avgAcceptanceLowerToCurrentSquared
-                                            / (avgAcceptanceLowerToCurrent * avgAcceptanceLowerToCurrent)
-                                    - 1.0;
-                        }
-                        /* Free energy difference to the state one state lower */
-                        /* if these either of these quantities are zero, the energies are */
-                        /* way too large for the dynamic range.  We need an alternate guesstimate */
-                        if ((avgAcceptanceCurrentToLower == 0) || (avgAcceptanceLowerToCurrent == 0))
-                        {
-                            weightDifferenceToLower =
-                                    (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]);
-                        }
-                        else
-                        {
-                            weightDifferenceToLower = (std::log(avgAcceptanceCurrentToLower)
-                                                       - std::log(avgAcceptanceLowerToCurrent))
-                                                      + cnval;
-                        }
-                        /* Variance of the free energy difference to the one state lower */
-                        varianceToLower =
-                                (1.0 / numObservationsCurrentState) * (varianceCurrentToLower)
-                                + (1.0 / numObservationsLowerState) * (varianceLowerToCurrent);
-                    }
-                }
-            }
-
-            if (fep_state < nlim - 1)
-            {
-                if (numObservationsCurrentState > 0)
-                {
-                    /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                     *
-                     * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                     * acceptances are all positive!), and hence
-                     *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                     * We're catching that case explicitly to avoid numerical
-                     * problems dividing by zero when the overlap between states is small (#3304)
-                     */
-
-                    if (avgAcceptanceCurrentToHigher < 0)
-                    {
-                        varianceCurrentToHigher =
-                                avgAcceptanceCurrentToHigherSquared
-                                        / (avgAcceptanceCurrentToHigher * avgAcceptanceCurrentToHigher)
-                                - 1.0;
-                    }
-                    if (numObservationsHigherState > 0)
-                    {
-                        /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                         *
-                         * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                         * acceptances are all positive!), and hence
-                         *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                         * We're catching that case explicitly to avoid numerical
-                         * problems dividing by zero when the overlap between states is small (#3304)
-                         */
-                        real varianceHigherToCurrent = 0;
-                        if (avgAcceptanceHigherToCurrent > 0)
-                        {
-                            varianceHigherToCurrent =
-                                    avgAcceptanceHigherToCurrentSquared
-                                            / (avgAcceptanceHigherToCurrent * avgAcceptanceHigherToCurrent)
-                                    - 1.0;
-                        }
-                        /* Free energy difference to the state one state higher */
-                        /* if these either of these quantities are zero, the energies are */
-                        /* way too large for the dynamic range.  We need an alternate guesstimate */
-                        if ((avgAcceptanceHigherToCurrent == 0) || (avgAcceptanceCurrentToHigher == 0))
-                        {
-                            weightDifferenceToHigher =
-                                    (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]);
-                        }
-                        else
-                        {
-                            weightDifferenceToHigher = (std::log(avgAcceptanceHigherToCurrent)
-                                                        - std::log(avgAcceptanceCurrentToHigher))
-                                                       + cnval;
-                        }
-                        /* Variance of the free energy difference to the one state higher */
-                        varianceToHigher =
-                                (1.0 / numObservationsHigherState) * (varianceHigherToCurrent)
-                                + (1.0 / numObservationsCurrentState) * (varianceCurrentToHigher);
-                    }
-                }
-            }
-
-            if (numObservationsCurrentState > 0)
-            {
-                omegam_array[nval] = varianceCurrentToLower;
-            }
-            else
-            {
-                omegam_array[nval] = 0;
-            }
-            weightsm_array[nval] = weightDifferenceToLower;
-            varm_array[nval]     = varianceToLower;
-            if (numObservationsLowerState > 0)
-            {
-                dwm_array[nval] =
-                        fabs((cnval + std::log((1.0 * numObservationsCurrentState) / numObservationsLowerState))
-                             - lam_dg[fep_state - 1]);
-            }
-            else
-            {
-                dwm_array[nval] = std::fabs(cnval - lam_dg[fep_state - 1]);
-            }
-
-            if (numObservationsCurrentState > 0)
-            {
-                omegap_array[nval] = varianceCurrentToHigher;
-            }
-            else
-            {
-                omegap_array[nval] = 0;
-            }
-            weightsp_array[nval] = weightDifferenceToHigher;
-            varp_array[nval]     = varianceToHigher;
-            if ((numObservationsHigherState > 0) && (numObservationsCurrentState > 0))
-            {
-                dwp_array[nval] =
-                        fabs((cnval + std::log((1.0 * numObservationsHigherState) / numObservationsCurrentState))
-                             - lam_dg[fep_state]);
-            }
-            else
-            {
-                dwp_array[nval] = std::fabs(cnval - lam_dg[fep_state]);
-            }
-        }
-
-        /* find the free energy estimate closest to the guessed weight's value */
-
-        min_nvalm     = FindMinimum(dwm_array, maxc);
-        omega_m1_0    = omegam_array[min_nvalm];
-        clam_weightsm = weightsm_array[min_nvalm];
-        clam_varm     = varm_array[min_nvalm];
-
-        min_nvalp     = FindMinimum(dwp_array, maxc);
-        omega_p1_0    = omegap_array[min_nvalp];
-        clam_weightsp = weightsp_array[min_nvalp];
-        clam_varp     = varp_array[min_nvalp];
-
-        clam_osum   = omega_m1_0 + omega_p1_0;
-        clam_minvar = 0;
-        if (clam_osum > 0)
-        {
-            clam_minvar = 0.5 * std::log(clam_osum);
-        }
-
-        if (fep_state > 0)
-        {
-            lam_dg[fep_state - 1]       = clam_weightsm;
-            lam_variance[fep_state - 1] = clam_varm;
-        }
-
-        if (fep_state < nlim - 1)
-        {
-            lam_dg[fep_state]       = clam_weightsp;
-            lam_variance[fep_state] = clam_varp;
-        }
-
-        if (expand->elamstats == elamstatsMINVAR)
-        {
-            bSufficientSamples = TRUE;
-            /* make sure the number of samples in each state are all
-             * past a user-specified threshold
-             */
-            for (i = 0; i < nlim; i++)
-            {
-                if (dfhist->n_at_lam[i] < expand->minvarmin)
-                {
-                    bSufficientSamples = FALSE;
-                }
-            }
-            if (bSufficientSamples)
-            {
-                dfhist->sum_minvar[fep_state] = clam_minvar;
-                if (fep_state == 0)
-                {
-                    for (i = 0; i < nlim; i++)
-                    {
-                        dfhist->sum_minvar[i] += (expand->minvar_const - clam_minvar);
-                    }
-                    expand->minvar_const          = clam_minvar;
-                    dfhist->sum_minvar[fep_state] = 0.0;
-                }
-                else
-                {
-                    dfhist->sum_minvar[fep_state] -= expand->minvar_const;
-                }
-            }
-        }
-
-        /* we need to rezero minvar now, since it could change at fep_state = 0 */
-        dfhist->sum_dg[0]       = 0.0;
-        dfhist->sum_variance[0] = 0.0;
-        dfhist->sum_weights[0]  = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */
-
-        for (i = 1; i < nlim; i++)
-        {
-            dfhist->sum_dg[i] = lam_dg[i - 1] + dfhist->sum_dg[i - 1];
-            dfhist->sum_variance[i] =
-                    std::sqrt(lam_variance[i - 1] + gmx::square(dfhist->sum_variance[i - 1]));
-            dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i];
-        }
-
-        sfree(lam_dg);
-        sfree(lam_variance);
-
-        sfree(omegam_array);
-        sfree(weightsm_array);
-        sfree(varm_array);
-        sfree(dwm_array);
-
-        sfree(omegap_array);
-        sfree(weightsp_array);
-        sfree(varp_array);
-        sfree(dwp_array);
-    }
-    return FALSE;
-}
-
-static int ChooseNewLambda(int               nlim,
-                           const t_expanded* expand,
-                           df_history_t*     dfhist,
-                           int               fep_state,
-                           const real*       weighted_lamee,
-                           double*           p_k,
-                           int64_t           seed,
-                           int64_t           step)
-{
-    /* Choose new lambda value, and update transition matrix */
-
-    int                  i, ifep, minfep, maxfep, lamnew, lamtrial, starting_fep_state;
-    real                 r1, r2, de, trialprob, tprob = 0;
-    double *             propose, *accept, *remainder;
-    double               pks;
-    real                 pnorm;
-    gmx::ThreeFry2x64<0> rng(
-            seed, gmx::RandomDomain::ExpandedEnsemble); // We only draw once, so zero bits internal counter is fine
-    gmx::UniformRealDistribution<real> dist;
-
-    starting_fep_state = fep_state;
-    lamnew             = fep_state; /* so that there is a default setting -- stays the same */
-
-    // Don't equilibrate weights when using Plumed
-    if (!plumedswitch || expand->elamstats != elamstatsNO)
-    {
-    if (!EWL(expand->elamstats)) /* ignore equilibrating the weights if using WL */
-    {
-        if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim - 1] <= expand->lmc_forced_nstart))
-        {
-            /* Use a marching method to run through the lambdas and get preliminary free energy data,
-               before starting 'free' sampling.  We start free sampling when we have enough at each lambda */
-
-            /* if we have enough at this lambda, move on to the next one */
-
-            if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart)
-            {
-                lamnew = fep_state + 1;
-                if (lamnew == nlim) /* whoops, stepped too far! */
-                {
-                    lamnew -= 1;
-                }
-            }
-            else
-            {
-                lamnew = fep_state;
-            }
-            return lamnew;
-        }
-    }
-    }
-
-    snew(propose, nlim);
-    snew(accept, nlim);
-    snew(remainder, nlim);
-
-    for (i = 0; i < expand->lmc_repeats; i++)
-    {
-        rng.restart(step, i);
-        dist.reset();
-
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            propose[ifep] = 0;
-            accept[ifep]  = 0;
-        }
-
-        if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS))
-        {
-            /* use the Gibbs sampler, with restricted range */
-            if (expand->gibbsdeltalam < 0)
-            {
-                minfep = 0;
-                maxfep = nlim - 1;
-            }
-            else
-            {
-                minfep = fep_state - expand->gibbsdeltalam;
-                maxfep = fep_state + expand->gibbsdeltalam;
-                if (minfep < 0)
-                {
-                    minfep = 0;
-                }
-                if (maxfep > nlim - 1)
-                {
-                    maxfep = nlim - 1;
-                }
-            }
-
-            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep);
-
-            if (expand->elmcmove == elmcmoveGIBBS)
-            {
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    propose[ifep] = p_k[ifep];
-                    accept[ifep]  = 1.0;
-                }
-                /* Gibbs sampling */
-                r1 = dist(rng);
-                for (lamnew = minfep; lamnew <= maxfep; lamnew++)
-                {
-                    if (r1 <= p_k[lamnew])
-                    {
-                        break;
-                    }
-                    r1 -= p_k[lamnew];
-                }
-            }
-            else if (expand->elmcmove == elmcmoveMETGIBBS)
-            {
-
-                /* Metropolized Gibbs sampling */
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    remainder[ifep] = 1 - p_k[ifep];
-                }
-
-                /* find the proposal probabilities */
-
-                if (remainder[fep_state] == 0)
-                {
-                    /* only the current state has any probability */
-                    /* we have to stay at the current state */
-                    lamnew = fep_state;
-                }
-                else
-                {
-                    for (ifep = minfep; ifep <= maxfep; ifep++)
-                    {
-                        if (ifep != fep_state)
-                        {
-                            propose[ifep] = p_k[ifep] / remainder[fep_state];
-                        }
-                        else
-                        {
-                            propose[ifep] = 0;
-                        }
-                    }
-
-                    r1 = dist(rng);
-                    for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++)
-                    {
-                        pnorm = p_k[lamtrial] / remainder[fep_state];
-                        if (lamtrial != fep_state)
-                        {
-                            if (r1 <= pnorm)
-                            {
-                                break;
-                            }
-                            r1 -= pnorm;
-                        }
-                    }
-
-                    /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */
-                    tprob = 1.0;
-                    /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */
-                    trialprob = (remainder[fep_state]) / (remainder[lamtrial]);
-                    if (trialprob < tprob)
-                    {
-                        tprob = trialprob;
-                    }
-                    r2 = dist(rng);
-                    if (r2 < tprob)
-                    {
-                        lamnew = lamtrial;
-                    }
-                    else
-                    {
-                        lamnew = fep_state;
-                    }
-                }
-
-                /* now figure out the acceptance probability for each */
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    tprob = 1.0;
-                    if (remainder[ifep] != 0)
-                    {
-                        trialprob = (remainder[fep_state]) / (remainder[ifep]);
-                    }
-                    else
-                    {
-                        trialprob = 1.0; /* this state is the only choice! */
-                    }
-                    if (trialprob < tprob)
-                    {
-                        tprob = trialprob;
-                    }
-                    /* probability for fep_state=0, but that's fine, it's never proposed! */
-                    accept[ifep] = tprob;
-                }
-            }
-
-            if (lamnew > maxfep)
-            {
-                /* it's possible some rounding is failing */
-                if (gmx_within_tol(remainder[fep_state], 0, 50 * GMX_DOUBLE_EPS))
-                {
-                    /* numerical rounding error -- no state other than the original has weight */
-                    lamnew = fep_state;
-                }
-                else
-                {
-                    /* probably not a numerical issue */
-                    int   loc    = 0;
-                    int   nerror = 200 + (maxfep - minfep + 1) * 60;
-                    char* errorstr;
-                    snew(errorstr, nerror);
-                    /* if its greater than maxfep, then something went wrong -- probably underflow
-                       in the calculation of sum weights. Generated detailed info for failure */
-                    loc += sprintf(
-                            errorstr,
-                            "Something wrong in choosing new lambda state with a Gibbs move -- "
-                            "probably underflow in weight determination.\nDenominator is: "
-                            "%3d%17.10e\n  i                dE        numerator          weights\n",
-                            0, pks);
-                    for (ifep = minfep; ifep <= maxfep; ifep++)
-                    {
-                        loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep,
-                                       weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]);
-                    }
-                    gmx_fatal(FARGS, "%s", errorstr);
-                }
-            }
-        }
-        else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER))
-        {
-            /* use the metropolis sampler with trial +/- 1 */
-            r1 = dist(rng);
-            if (r1 < 0.5)
-            {
-                if (fep_state == 0)
-                {
-                    lamtrial = fep_state;
-                }
-                else
-                {
-                    lamtrial = fep_state - 1;
-                }
-            }
-            else
-            {
-                if (fep_state == nlim - 1)
-                {
-                    lamtrial = fep_state;
-                }
-                else
-                {
-                    lamtrial = fep_state + 1;
-                }
-            }
-
-            de = weighted_lamee[lamtrial] - weighted_lamee[fep_state];
-            if (expand->elmcmove == elmcmoveMETROPOLIS)
-            {
-                tprob = 1.0;
-                if (de < 0)
-                {
-                    tprob = std::exp(de);
-                }
-                propose[fep_state] = 0;
-                propose[lamtrial]  = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */
-                accept[fep_state] =
-                        1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */
-                accept[lamtrial] = tprob;
-            }
-            else if (expand->elmcmove == elmcmoveBARKER)
-            {
-                if (de > 0) /* Numerically stable version */
-                {
-                    tprob = 1.0 / (1.0 + std::exp(-de));
-                }
-                else if (de < 0)
-                {
-                    tprob = std::exp(de) / (std::exp(de) + 1.0);
-                }
-                propose[fep_state] = (1 - tprob);
-                propose[lamtrial] +=
-                        tprob; /* we add, to account for the fact that at the end, they might be the same point */
-                accept[fep_state] = 1.0;
-                accept[lamtrial]  = 1.0;
-            }
-
-            r2 = dist(rng);
-            if (r2 < tprob)
-            {
-                lamnew = lamtrial;
-            }
-            else
-            {
-                lamnew = fep_state;
-            }
-        }
-
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            dfhist->Tij[fep_state][ifep] += propose[ifep] * accept[ifep];
-            dfhist->Tij[fep_state][fep_state] += propose[ifep] * (1.0 - accept[ifep]);
-        }
-        fep_state = lamnew;
-    }
-
-    dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0;
-
-    sfree(propose);
-    sfree(accept);
-    sfree(remainder);
-
-    return lamnew;
-}
-
-/* print out the weights to the log, along with current state */
-void PrintFreeEnergyInfoToFile(FILE*               outfile,
-                               const t_lambda*     fep,
-                               const t_expanded*   expand,
-                               const t_simtemp*    simtemp,
-                               const df_history_t* dfhist,
-                               int                 fep_state,
-                               int                 frequency,
-                               int64_t             step)
-{
-    int         nlim, i, ifep, jfep;
-    real        dw, dg, dv, Tprint;
-    const char* print_names[efptNR] = { " FEPL", "MassL", "CoulL",   " VdwL",
-                                        "BondL", "RestT", "Temp.(K)" };
-    gmx_bool    bSimTemp            = FALSE;
-
-    nlim = fep->n_lambda;
-    if (simtemp != nullptr)
-    {
-        bSimTemp = TRUE;
-    }
-
-    if (step % frequency == 0)
-    {
-        fprintf(outfile, "             MC-lambda information\n");
-        if (EWL(expand->elamstats) && (!(dfhist->bEquil)))
-        {
-            fprintf(outfile, "  Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta);
-        }
-        fprintf(outfile, "  N");
-        for (i = 0; i < efptNR; i++)
-        {
-            if (fep->separate_dvdl[i])
-            {
-                fprintf(outfile, "%7s", print_names[i]);
-            }
-            else if ((i == efptTEMPERATURE) && bSimTemp)
-            {
-                fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */
-            }
-        }
-        fprintf(outfile, "    Count   ");
-        if (expand->elamstats == elamstatsMINVAR)
-        {
-            fprintf(outfile, "W(in kT)   G(in kT)  dG(in kT)  dV(in kT)\n");
-        }
-        else
-        {
-            fprintf(outfile, "G(in kT)  dG(in kT)\n");
-        }
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            if (ifep == nlim - 1)
-            {
-                dw = 0.0;
-                dg = 0.0;
-                dv = 0.0;
-            }
-            else
-            {
-                dw = dfhist->sum_weights[ifep + 1] - dfhist->sum_weights[ifep];
-                dg = dfhist->sum_dg[ifep + 1] - dfhist->sum_dg[ifep];
-                dv = std::sqrt(gmx::square(dfhist->sum_variance[ifep + 1])
-                               - gmx::square(dfhist->sum_variance[ifep]));
-            }
-            fprintf(outfile, "%3d", (ifep + 1));
-            for (i = 0; i < efptNR; i++)
-            {
-                if (fep->separate_dvdl[i])
-                {
-                    fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]);
-                }
-                else if (i == efptTEMPERATURE && bSimTemp)
-                {
-                    fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]);
-                }
-            }
-            if (EWL(expand->elamstats)
-                && (!(dfhist->bEquil))) /* if performing WL and still haven't equilibrated */
-            {
-                if (expand->elamstats == elamstatsWL)
-                {
-                    fprintf(outfile, " %8d", static_cast<int>(dfhist->wl_histo[ifep]));
-                }
-                else
-                {
-                    fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]);
-                }
-            }
-            else /* we have equilibrated weights */
-            {
-                fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]);
-            }
-            if (expand->elamstats == elamstatsMINVAR)
-            {
-                fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep],
-                        dfhist->sum_dg[ifep], dg, dv);
-            }
-            else
-            {
-                fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw);
-            }
-            if (ifep == fep_state)
-            {
-                fprintf(outfile, " <<\n");
-            }
-            else
-            {
-                fprintf(outfile, "   \n");
-            }
-        }
-        fprintf(outfile, "\n");
-
-        if ((step % expand->nstTij == 0) && (expand->nstTij > 0) && (step > 0))
-        {
-            fprintf(outfile, "                     Transition Matrix\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                fprintf(outfile, "%12d", (ifep + 1));
-            }
-            fprintf(outfile, "\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                for (jfep = 0; jfep < nlim; jfep++)
-                {
-                    if (dfhist->n_at_lam[ifep] > 0)
-                    {
-                        if (expand->bSymmetrizedTMatrix)
-                        {
-                            Tprint = (dfhist->Tij[ifep][jfep] + dfhist->Tij[jfep][ifep])
-                                     / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]);
-                        }
-                        else
-                        {
-                            Tprint = (dfhist->Tij[ifep][jfep]) / (dfhist->n_at_lam[ifep]);
-                        }
-                    }
-                    else
-                    {
-                        Tprint = 0.0;
-                    }
-                    fprintf(outfile, "%12.8f", Tprint);
-                }
-                fprintf(outfile, "%3d\n", (ifep + 1));
-            }
-
-            fprintf(outfile, "                  Empirical Transition Matrix\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                fprintf(outfile, "%12d", (ifep + 1));
-            }
-            fprintf(outfile, "\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                for (jfep = 0; jfep < nlim; jfep++)
-                {
-                    if (dfhist->n_at_lam[ifep] > 0)
-                    {
-                        if (expand->bSymmetrizedTMatrix)
-                        {
-                            Tprint = (dfhist->Tij_empirical[ifep][jfep] + dfhist->Tij_empirical[jfep][ifep])
-                                     / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]);
-                        }
-                        else
-                        {
-                            Tprint = dfhist->Tij_empirical[ifep][jfep] / (dfhist->n_at_lam[ifep]);
-                        }
-                    }
-                    else
-                    {
-                        Tprint = 0.0;
-                    }
-                    fprintf(outfile, "%12.8f", Tprint);
-                }
-                fprintf(outfile, "%3d\n", (ifep + 1));
-            }
-        }
-    }
-}
-
-int ExpandedEnsembleDynamics(FILE*                 log,
-                             const t_inputrec*     ir,
-                             const gmx_enerdata_t* enerd,
-                             t_state*              state,
-                             t_extmass*            MassQ,
-                             int                   fep_state,
-                             df_history_t*         dfhist,
-                             int64_t               step,
-                             rvec*                 v,
-                             const t_mdatoms*      mdatoms,
-                             real*                 realFepState)
-/* Note that the state variable is only needed for simulated tempering, not
-   Hamiltonian expanded ensemble.  May be able to remove it after integrator refactoring. */
-{
-    real *      pfep_lamee, *scaled_lamee, *weighted_lamee;
-    double*     p_k;
-    int         i, nlim, lamnew, totalsamples;
-    real        oneovert, maxscaled = 0, maxweighted = 0;
-    t_expanded* expand;
-    t_simtemp*  simtemp;
-    gmx_bool    bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE;
-
-    expand  = ir->expandedvals;
-    simtemp = ir->simtempvals;
-    nlim    = ir->fepvals->n_lambda;
-
-    snew(scaled_lamee, nlim);
-    snew(weighted_lamee, nlim);
-    snew(pfep_lamee, nlim);
-    snew(p_k, nlim);
-
-    /* update the count at the current lambda*/
-    dfhist->n_at_lam[fep_state]++;
-
-    /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda
-       state that's pressure controlled.*/
-    /*
-       pVTerm = 0;
-       where does this PV term go?
-       for (i=0;i<nlim;i++)
-       {
-       fep_lamee[i] += pVTerm;
-       }
-     */
-
-    /* determine the minimum value to avoid overflow.  Probably a better way to do this */
-    /* we don't need to include the pressure term, since the volume is the same between the two.
-       is there some term we are neglecting, however? */
-
-    if (ir->efep != efepNO)
-    {
-        for (i = 0; i < nlim; i++)
-        {
-            if (ir->bSimTemp)
-            {
-                /* Note -- this assumes no mass changes, since kinetic energy is not added  . . . */
-                scaled_lamee[i] = (enerd->enerpart_lambda[i + 1] - enerd->enerpart_lambda[0])
-                                          / (simtemp->temperatures[i] * BOLTZ)
-                                  + enerd->term[F_EPOT]
-                                            * (1.0 / (simtemp->temperatures[i])
-                                               - 1.0 / (simtemp->temperatures[fep_state]))
-                                            / BOLTZ;
-            }
-            else
-            {
-                scaled_lamee[i] = (enerd->enerpart_lambda[i + 1] - enerd->enerpart_lambda[0])
-                                  / (expand->mc_temp * BOLTZ);
-                /* mc_temp is currently set to the system reft unless otherwise defined */
-            }
-
-            /* save these energies for printing, so they don't get overwritten by the next step */
-            /* they aren't overwritten in the non-free energy case, but we always print with these
-               for simplicity */
-        }
-    }
-    else
-    {
-        if (ir->bSimTemp)
-        {
-            for (i = 0; i < nlim; i++)
-            {
-                scaled_lamee[i] =
-                        enerd->term[F_EPOT]
-                        * (1.0 / simtemp->temperatures[i] - 1.0 / simtemp->temperatures[fep_state]) / BOLTZ;
-            }
-        }
-    }
-
-    for (i = 0; i < nlim; i++)
-    {
-        pfep_lamee[i] = scaled_lamee[i];
-
-        weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i];
-        if (i == 0)
-        {
-            maxscaled   = scaled_lamee[i];
-            maxweighted = weighted_lamee[i];
-        }
-        else
-        {
-            if (scaled_lamee[i] > maxscaled)
-            {
-                maxscaled = scaled_lamee[i];
-            }
-            if (weighted_lamee[i] > maxweighted)
-            {
-                maxweighted = weighted_lamee[i];
-            }
-        }
-    }
-
-    for (i = 0; i < nlim; i++)
-    {
-        scaled_lamee[i] -= maxscaled;
-        weighted_lamee[i] -= maxweighted;
-    }
-
-    if (plumedswitch && expand->elamstats == elamstatsNO)
-    {
-        // Update weights at all lambda states with current values from Plumed.
-        // For acceptance criterion, expanded ensemble is expecting the weight at
-        // lambda i=0 to be zero.
-        real zeroBias = 0;
-        for (i = 0; i < nlim; i++)
-        {
-            *realFepState = i;
-            real bias = 0;
-            plumed_cmd(plumedmain, "prepareCalc", nullptr);
-            plumed_cmd(plumedmain, "performCalcNoForces", nullptr);
-            plumed_cmd(plumedmain, "getBias", &bias);
-            bias /= expand->mc_temp * BOLTZ;
-            if (i == 0)
-            {
-                zeroBias = bias;
-            }
-            dfhist->sum_weights[i] = -bias + zeroBias;
-        }
-        *realFepState = fep_state;
-    }
-    else // Don't update weights using different method when Plumed is active
-    {
-    /* update weights - we decide whether or not to actually do this inside */
-
-    bDoneEquilibrating =
-            UpdateWeights(nlim, expand, dfhist, fep_state, scaled_lamee, weighted_lamee, step);
-    if (bDoneEquilibrating)
-    {
-        if (log)
-        {
-            fprintf(log, "\nStep %" PRId64 ": Weights have equilibrated, using criteria: %s\n",
-                    step, elmceq_names[expand->elmceq]);
-        }
-    }
-    }
-
-    // Accept / reject is handled by GROMACS (possibly with Plumed weights).
-    lamnew = ChooseNewLambda(nlim, expand, dfhist, fep_state, weighted_lamee, p_k,
-                             ir->expandedvals->lmc_seed, step);
-    /* if using simulated tempering, we need to adjust the temperatures */
-    if (ir->bSimTemp && (lamnew != fep_state)) /* only need to change the temperatures if we change the state */
-    {
-        int   i, j, n, d;
-        real* buf_ngtc;
-        real  told;
-        int   nstart, nend, gt;
-
-        snew(buf_ngtc, ir->opts.ngtc);
-
-        for (i = 0; i < ir->opts.ngtc; i++)
-        {
-            if (ir->opts.ref_t[i] > 0)
-            {
-                told              = ir->opts.ref_t[i];
-                ir->opts.ref_t[i] = simtemp->temperatures[lamnew];
-                buf_ngtc[i]       = std::sqrt(ir->opts.ref_t[i] / told); /* using the buffer as temperature scaling */
-            }
-        }
-
-        /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */
-
-        nstart = 0;
-        nend   = mdatoms->homenr;
-        for (n = nstart; n < nend; n++)
-        {
-            gt = 0;
-            if (mdatoms->cTC)
-            {
-                gt = mdatoms->cTC[n];
-            }
-            for (d = 0; d < DIM; d++)
-            {
-                v[n][d] *= buf_ngtc[gt];
-            }
-        }
-
-        if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))
-        {
-            /* we need to recalculate the masses if the temperature has changed */
-            init_npt_masses(ir, state, MassQ, FALSE);
-            for (i = 0; i < state->nnhpres; i++)
-            {
-                for (j = 0; j < ir->opts.nhchainlength; j++)
-                {
-                    state->nhpres_vxi[i + j] *= buf_ngtc[i];
-                }
-            }
-            for (i = 0; i < ir->opts.ngtc; i++)
-            {
-                for (j = 0; j < ir->opts.nhchainlength; j++)
-                {
-                    state->nosehoover_vxi[i + j] *= buf_ngtc[i];
-                }
-            }
-        }
-        sfree(buf_ngtc);
-    }
-
-    /* now check on the Wang-Landau updating critera */
-
-    if (EWL(expand->elamstats))
-    {
-        bSwitchtoOneOverT = FALSE;
-        if (expand->bWLoneovert)
-        {
-            totalsamples = 0;
-            for (i = 0; i < nlim; i++)
-            {
-                totalsamples += dfhist->n_at_lam[i];
-            }
-            oneovert = (1.0 * nlim) / totalsamples;
-            /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */
-            /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */
-            if ((dfhist->wl_delta <= ((totalsamples) / (totalsamples - 1.00001)) * oneovert)
-                && (dfhist->wl_delta < expand->init_wl_delta))
-            {
-                bSwitchtoOneOverT = TRUE;
-            }
-        }
-        if (bSwitchtoOneOverT)
-        {
-            dfhist->wl_delta =
-                    oneovert; /* now we reduce by this each time, instead of only at flatness */
-        }
-        else
-        {
-            bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio);
-            if (bIfReset)
-            {
-                for (i = 0; i < nlim; i++)
-                {
-                    dfhist->wl_histo[i] = 0;
-                }
-                dfhist->wl_delta *= expand->wl_scale;
-                if (log)
-                {
-                    fprintf(log, "\nStep %d: weights are now:", static_cast<int>(step));
-                    for (i = 0; i < nlim; i++)
-                    {
-                        fprintf(log, " %.5f", dfhist->sum_weights[i]);
-                    }
-                    fprintf(log, "\n");
-                }
-            }
-        }
-    }
-    sfree(pfep_lamee);
-    sfree(scaled_lamee);
-    sfree(weighted_lamee);
-    sfree(p_k);
-
-    return lamnew;
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed
deleted file mode 100644
index 9ba14e585f..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.cpp.preplumed
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2012-2018, The GROMACS development team.
- * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "expanded.h"
-
-#include <cmath>
-#include <cstdio>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/gmxfio.h"
-#include "gromacs/fileio/xtcio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/listed_forces/disre.h"
-#include "gromacs/listed_forces/orires.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/calcmu.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/random/threefry.h"
-#include "gromacs/random/uniformrealdistribution.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "expanded_internal.h"
-
-static void init_df_history_weights(df_history_t* dfhist, const t_expanded* expand, int nlim)
-{
-    int i;
-    dfhist->wl_delta = expand->init_wl_delta;
-    for (i = 0; i < nlim; i++)
-    {
-        dfhist->sum_weights[i] = expand->init_lambda_weights[i];
-        dfhist->sum_dg[i]      = expand->init_lambda_weights[i];
-    }
-}
-
-/* Eventually should contain all the functions needed to initialize expanded ensemble
-   before the md loop starts */
-void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist)
-{
-    if (!bStateFromCP)
-    {
-        init_df_history_weights(dfhist, ir->expandedvals, ir->fepvals->n_lambda);
-    }
-}
-
-static void GenerateGibbsProbabilities(const real* ene, double* p_k, double* pks, int minfep, int maxfep)
-{
-
-    int  i;
-    real maxene;
-
-    *pks   = 0.0;
-    maxene = ene[minfep];
-    /* find the maximum value */
-    for (i = minfep; i <= maxfep; i++)
-    {
-        if (ene[i] > maxene)
-        {
-            maxene = ene[i];
-        }
-    }
-    /* find the denominator */
-    for (i = minfep; i <= maxfep; i++)
-    {
-        *pks += std::exp(ene[i] - maxene);
-    }
-    /*numerators*/
-    for (i = minfep; i <= maxfep; i++)
-    {
-        p_k[i] = std::exp(ene[i] - maxene) / *pks;
-    }
-}
-
-static void
-GenerateWeightedGibbsProbabilities(const real* ene, double* p_k, double* pks, int nlim, real* nvals, real delta)
-{
-
-    int   i;
-    real  maxene;
-    real* nene;
-    *pks = 0.0;
-
-    snew(nene, nlim);
-    for (i = 0; i < nlim; i++)
-    {
-        if (nvals[i] == 0)
-        {
-            /* add the delta, since we need to make sure it's greater than zero, and
-               we need a non-arbitrary number? */
-            nene[i] = ene[i] + std::log(nvals[i] + delta);
-        }
-        else
-        {
-            nene[i] = ene[i] + std::log(nvals[i]);
-        }
-    }
-
-    /* find the maximum value */
-    maxene = nene[0];
-    for (i = 0; i < nlim; i++)
-    {
-        if (nene[i] > maxene)
-        {
-            maxene = nene[i];
-        }
-    }
-
-    /* subtract off the maximum, avoiding overflow */
-    for (i = 0; i < nlim; i++)
-    {
-        nene[i] -= maxene;
-    }
-
-    /* find the denominator */
-    for (i = 0; i < nlim; i++)
-    {
-        *pks += std::exp(nene[i]);
-    }
-
-    /*numerators*/
-    for (i = 0; i < nlim; i++)
-    {
-        p_k[i] = std::exp(nene[i]) / *pks;
-    }
-    sfree(nene);
-}
-
-static int FindMinimum(const real* min_metric, int N)
-{
-
-    real min_val;
-    int  min_nval, nval;
-
-    min_nval = 0;
-    min_val  = min_metric[0];
-
-    for (nval = 0; nval < N; nval++)
-    {
-        if (min_metric[nval] < min_val)
-        {
-            min_val  = min_metric[nval];
-            min_nval = nval;
-        }
-    }
-    return min_nval;
-}
-
-static gmx_bool CheckHistogramRatios(int nhisto, const real* histo, real ratio)
-{
-
-    int      i;
-    real     nmean;
-    gmx_bool bIfFlat;
-
-    nmean = 0;
-    for (i = 0; i < nhisto; i++)
-    {
-        nmean += histo[i];
-    }
-
-    if (nmean == 0)
-    {
-        /* no samples! is bad!*/
-        bIfFlat = FALSE;
-        return bIfFlat;
-    }
-    nmean /= static_cast<real>(nhisto);
-
-    bIfFlat = TRUE;
-    for (i = 0; i < nhisto; i++)
-    {
-        /* make sure that all points are in the ratio < x <  1/ratio range  */
-        if (!((histo[i] / nmean < 1.0 / ratio) && (histo[i] / nmean > ratio)))
-        {
-            bIfFlat = FALSE;
-            break;
-        }
-    }
-    return bIfFlat;
-}
-
-static gmx_bool CheckIfDoneEquilibrating(int nlim, const t_expanded* expand, const df_history_t* dfhist, int64_t step)
-{
-
-    int      i, totalsamples;
-    gmx_bool bDoneEquilibrating = TRUE;
-    gmx_bool bIfFlat;
-
-    /* If we are doing slow growth to get initial values, we haven't finished equilibrating */
-    if (expand->lmc_forced_nstart > 0)
-    {
-        for (i = 0; i < nlim; i++)
-        {
-            if (dfhist->n_at_lam[i]
-                < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're
-                                                definitely not done equilibrating*/
-            {
-                bDoneEquilibrating = FALSE;
-                break;
-            }
-        }
-    }
-    else
-    {
-        /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */
-        bDoneEquilibrating = TRUE;
-
-        /* calculate the total number of samples */
-        switch (expand->elmceq)
-        {
-            case elmceqNO:
-                /* We have not equilibrated, and won't, ever. */
-                bDoneEquilibrating = FALSE;
-                break;
-            case elmceqYES:
-                /* we have equilibrated -- we're done */
-                bDoneEquilibrating = TRUE;
-                break;
-            case elmceqSTEPS:
-                /* first, check if we are equilibrating by steps, if we're still under */
-                if (step < expand->equil_steps)
-                {
-                    bDoneEquilibrating = FALSE;
-                }
-                break;
-            case elmceqSAMPLES:
-                totalsamples = 0;
-                for (i = 0; i < nlim; i++)
-                {
-                    totalsamples += dfhist->n_at_lam[i];
-                }
-                if (totalsamples < expand->equil_samples)
-                {
-                    bDoneEquilibrating = FALSE;
-                }
-                break;
-            case elmceqNUMATLAM:
-                for (i = 0; i < nlim; i++)
-                {
-                    if (dfhist->n_at_lam[i]
-                        < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're
-                                                     definitely not done equilibrating*/
-                    {
-                        bDoneEquilibrating = FALSE;
-                        break;
-                    }
-                }
-                break;
-            case elmceqWLDELTA:
-                if (EWL(expand->elamstats)) /* This check is in readir as well, but
-                                               just to be sure */
-                {
-                    if (dfhist->wl_delta > expand->equil_wl_delta)
-                    {
-                        bDoneEquilibrating = FALSE;
-                    }
-                }
-                break;
-            case elmceqRATIO:
-                /* we can use the flatness as a judge of good weights, as long as
-                   we're not doing minvar, or Wang-Landau.
-                   But turn off for now until we figure out exactly how we do this.
-                 */
-
-                if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR))
-                {
-                    /* we want to use flatness -avoiding- the forced-through samples.  Plus, we need
-                       to convert to floats for this histogram function. */
-
-                    real* modhisto;
-                    snew(modhisto, nlim);
-                    for (i = 0; i < nlim; i++)
-                    {
-                        modhisto[i] = 1.0 * (dfhist->n_at_lam[i] - expand->lmc_forced_nstart);
-                    }
-                    bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio);
-                    sfree(modhisto);
-                    if (!bIfFlat)
-                    {
-                        bDoneEquilibrating = FALSE;
-                    }
-                }
-                break;
-            default: bDoneEquilibrating = TRUE; break;
-        }
-    }
-    return bDoneEquilibrating;
-}
-
-static gmx_bool UpdateWeights(int           nlim,
-                              t_expanded*   expand,
-                              df_history_t* dfhist,
-                              int           fep_state,
-                              const real*   scaled_lamee,
-                              const real*   weighted_lamee,
-                              int64_t       step)
-{
-    gmx_bool bSufficientSamples;
-    real     acceptanceWeight;
-    int      i;
-    int      min_nvalm, min_nvalp, maxc;
-    real     omega_m1_0, omega_p1_0;
-    real     zero_sum_weights;
-    real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array,
-            *dwp_array, *dwm_array;
-    real    clam_varm, clam_varp, clam_osum, clam_weightsm, clam_weightsp, clam_minvar;
-    real *  lam_variance, *lam_dg;
-    double* p_k;
-    double  pks = 0;
-
-    /* Future potential todos for this function (see #3848):
-     *  - Update the names in the dhist structure to be clearer. Not done for now since this
-     *    a bugfix update and we are mininizing other code changes.
-     *  - Modularize the code some more.
-     *  - potentially merge with accelerated weight histogram functionality, since it's very similar.
-     */
-    /*  if we have equilibrated the expanded ensemble weights, we are not updating them, so exit now */
-    if (dfhist->bEquil)
-    {
-        return FALSE;
-    }
-
-    if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step))
-    {
-        dfhist->bEquil = TRUE;
-        /* zero out the visited states so we know how many equilibrated states we have
-           from here on out.*/
-        for (i = 0; i < nlim; i++)
-        {
-            dfhist->n_at_lam[i] = 0;
-        }
-        return TRUE;
-    }
-
-    /* If we reached this far, we have not equilibrated yet, keep on
-       going resetting the weights */
-
-    if (EWL(expand->elamstats))
-    {
-        if (expand->elamstats == elamstatsWL) /* Using standard Wang-Landau for weight updates */
-        {
-            dfhist->sum_weights[fep_state] -= dfhist->wl_delta;
-            dfhist->wl_histo[fep_state] += 1.0;
-        }
-        else if (expand->elamstats == elamstatsWWL)
-        /* Using weighted Wang-Landau for weight updates.
-         * Very closly equivalent to accelerated weight histogram approach
-         * applied to expanded ensemble. */
-        {
-            snew(p_k, nlim);
-
-            /* first increment count */
-            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim - 1);
-            for (i = 0; i < nlim; i++)
-            {
-                dfhist->wl_histo[i] += static_cast<real>(p_k[i]);
-            }
-
-            /* then increment weights (uses count) */
-            pks = 0.0;
-            GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo,
-                                               dfhist->wl_delta);
-
-            for (i = 0; i < nlim; i++)
-            {
-                dfhist->sum_weights[i] -= dfhist->wl_delta * static_cast<real>(p_k[i]);
-            }
-            /* Alternate definition, using logarithms. Shouldn't make very much difference! */
-            /*
-               real di;
-               for (i=0;i<nlim;i++)
-               {
-                di = (real)1.0 + dfhist->wl_delta*(real)p_k[i];
-                dfhist->sum_weights[i] -= log(di);
-               }
-             */
-            sfree(p_k);
-        }
-
-        zero_sum_weights = dfhist->sum_weights[0];
-        for (i = 0; i < nlim; i++)
-        {
-            dfhist->sum_weights[i] -= zero_sum_weights;
-        }
-    }
-
-    if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS
-        || expand->elamstats == elamstatsMINVAR)
-    {
-        maxc = 2 * expand->c_range + 1;
-
-        snew(lam_dg, nlim);
-        snew(lam_variance, nlim);
-
-        snew(omegap_array, maxc);
-        snew(weightsp_array, maxc);
-        snew(varp_array, maxc);
-        snew(dwp_array, maxc);
-
-        snew(omegam_array, maxc);
-        snew(weightsm_array, maxc);
-        snew(varm_array, maxc);
-        snew(dwm_array, maxc);
-
-        /* unpack the values of the free energy differences and the
-         * variance in their estimates between nearby lambdas. We will
-         * only actually update 2 of these, the state we are currently
-         * at and the one we end up moving to
-         */
-
-        for (i = 0; i < nlim - 1; i++)
-        { /* only through the second to last */
-            lam_dg[i] = dfhist->sum_dg[i + 1] - dfhist->sum_dg[i];
-            lam_variance[i] =
-                    gmx::square(dfhist->sum_variance[i + 1]) - gmx::square(dfhist->sum_variance[i]);
-        }
-
-        /* accumulate running averages of thermodynamic averages for Bennett Acceptance Ratio-based
-         * estimates of the free energy .
-         * Rather than peforming self-consistent estimation of the free energies at each step,
-         * we keep track of an array of possible different free energies (cnvals),
-         * and we self-consistently choose the best one. The one that leads to a free energy estimate
-         * that is closest to itself is the best estimate of the free energy.  It is essentially a
-         * parallellized version of self-consistent iteration.  maxc is the number of these constants. */
-
-        for (int nval = 0; nval < maxc; nval++)
-        {
-            const real cnval = static_cast<real>(nval - expand->c_range);
-
-            /* Compute acceptance criterion weight to the state below this one for use in averages.
-             * Note we do not have to have just moved from that state to use this free energy
-             * estimate; these are essentially "virtual" moves. */
-
-            if (fep_state > 0)
-            {
-                const auto lambdaEnergyDifference =
-                        cnval - (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]);
-                acceptanceWeight =
-                        gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference);
-                dfhist->accum_m[fep_state][nval] += acceptanceWeight;
-                dfhist->accum_m2[fep_state][nval] += acceptanceWeight * acceptanceWeight;
-            }
-
-            // Compute acceptance criterion weight to transition to the next state
-            if (fep_state < nlim - 1)
-            {
-                const auto lambdaEnergyDifference =
-                        -cnval + (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]);
-                acceptanceWeight =
-                        gmx::calculateAcceptanceWeight(expand->elamstats, lambdaEnergyDifference);
-                dfhist->accum_p[fep_state][nval] += acceptanceWeight;
-                dfhist->accum_p2[fep_state][nval] += acceptanceWeight * acceptanceWeight;
-            }
-
-            /* Determination of Metropolis transition and Barker transition weights */
-
-            int numObservationsCurrentState = dfhist->n_at_lam[fep_state];
-            /* determine the number of observations above and below the current state */
-            int numObservationsLowerState = 0;
-            if (fep_state > 0)
-            {
-                numObservationsLowerState = dfhist->n_at_lam[fep_state - 1];
-            }
-            int numObservationsHigherState = 0;
-            if (fep_state < nlim - 1)
-            {
-                numObservationsHigherState = dfhist->n_at_lam[fep_state + 1];
-            }
-
-            /* Calculate the biases for each expanded ensemble state that minimize the total
-             * variance, as implemented in Martinez-Veracoechea and Escobedo,
-             * J. Phys. Chem. B 2008, 112, 8120-8128
-             *
-             * The variance associated with the free energy estimate between two states i and j
-             * is calculated as
-             *     Var(i,j) = {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} / numObservations(i->j)
-             *              + {avg[xi(j->i)^2] / avg[xi(j->i)]^2 - 1} / numObservations(j->i)
-             * where xi(i->j) is the acceptance factor / weight associated with moving from state i to j
-             * As we are calculating the acceptance factor to the neighbors every time we're visiting
-             * a state, numObservations(i->j) == numObservations(i) and numObservations(j->i) == numObservations(j)
-             */
-
-            /* Accumulation of acceptance weight averages between the current state and the
-             * states +1 (p1) and -1 (m1), averaged at current state (0)
-             */
-            real avgAcceptanceCurrentToLower  = 0;
-            real avgAcceptanceCurrentToHigher = 0;
-            /* Accumulation of acceptance weight averages quantities between states 0
-             *  and states +1 and -1, squared
-             */
-            real avgAcceptanceCurrentToLowerSquared  = 0;
-            real avgAcceptanceCurrentToHigherSquared = 0;
-            /* Accumulation of free energy quantities from lower state (m1) to current state (0) and squared */
-            real avgAcceptanceLowerToCurrent        = 0;
-            real avgAcceptanceLowerToCurrentSquared = 0;
-            /* Accumulation of free energy quantities from upper state (p1) to current state (0) and squared */
-            real avgAcceptanceHigherToCurrent        = 0;
-            real avgAcceptanceHigherToCurrentSquared = 0;
-
-            if (numObservationsCurrentState > 0)
-            {
-                avgAcceptanceCurrentToLower = dfhist->accum_m[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToHigher =
-                        dfhist->accum_p[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToLowerSquared =
-                        dfhist->accum_m2[fep_state][nval] / numObservationsCurrentState;
-                avgAcceptanceCurrentToHigherSquared =
-                        dfhist->accum_p2[fep_state][nval] / numObservationsCurrentState;
-            }
-
-            if ((fep_state > 0) && (numObservationsLowerState > 0))
-            {
-                avgAcceptanceLowerToCurrent =
-                        dfhist->accum_p[fep_state - 1][nval] / numObservationsLowerState;
-                avgAcceptanceLowerToCurrentSquared =
-                        dfhist->accum_p2[fep_state - 1][nval] / numObservationsLowerState;
-            }
-
-            if ((fep_state < nlim - 1) && (numObservationsHigherState > 0))
-            {
-                avgAcceptanceHigherToCurrent =
-                        dfhist->accum_m[fep_state + 1][nval] / numObservationsHigherState;
-                avgAcceptanceHigherToCurrentSquared =
-                        dfhist->accum_m2[fep_state + 1][nval] / numObservationsHigherState;
-            }
-            /* These are accumulation of positive values (see definition of acceptance functions
-             * above), or of squares of positive values.
-             * We're taking this for granted in the following calculation, so make sure
-             * here that nothing weird happened. Although technically all values should be positive,
-             * because of floating point precisions, they might be numerically zero. */
-            GMX_RELEASE_ASSERT(
-                    avgAcceptanceCurrentToLower >= 0 && avgAcceptanceCurrentToLowerSquared >= 0
-                            && avgAcceptanceCurrentToHigher >= 0
-                            && avgAcceptanceCurrentToHigherSquared >= 0 && avgAcceptanceLowerToCurrent >= 0
-                            && avgAcceptanceLowerToCurrentSquared >= 0 && avgAcceptanceHigherToCurrent >= 0
-                            && avgAcceptanceHigherToCurrentSquared >= 0,
-                    "By definition, the acceptance factors should all be nonnegative.");
-
-            real varianceCurrentToLower   = 0;
-            real varianceCurrentToHigher  = 0;
-            real weightDifferenceToLower  = 0;
-            real weightDifferenceToHigher = 0;
-            real varianceToLower          = 0;
-            real varianceToHigher         = 0;
-
-            if (fep_state > 0)
-            {
-                if (numObservationsCurrentState > 0)
-                {
-                    /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                     *
-                     * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                     * acceptances are all positive!), and hence
-                     *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                     * We're catching that case explicitly to avoid numerical
-                     * problems dividing by zero when the overlap between states is small (#3304)
-                     */
-                    if (avgAcceptanceCurrentToLower > 0)
-                    {
-                        varianceCurrentToLower =
-                                avgAcceptanceCurrentToLowerSquared
-                                        / (avgAcceptanceCurrentToLower * avgAcceptanceCurrentToLower)
-                                - 1.0;
-                    }
-                    if (numObservationsLowerState > 0)
-                    {
-                        /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                         *
-                         * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                         * acceptances are all positive!), and hence
-                         *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                         * We're catching that case explicitly to avoid numerical
-                         * problems dividing by zero when the overlap between states is small (#3304)
-                         */
-                        real varianceLowerToCurrent = 0;
-                        if (avgAcceptanceLowerToCurrent > 0)
-                        {
-                            varianceLowerToCurrent =
-                                    avgAcceptanceLowerToCurrentSquared
-                                            / (avgAcceptanceLowerToCurrent * avgAcceptanceLowerToCurrent)
-                                    - 1.0;
-                        }
-                        /* Free energy difference to the state one state lower */
-                        /* if these either of these quantities are zero, the energies are */
-                        /* way too large for the dynamic range.  We need an alternate guesstimate */
-                        if ((avgAcceptanceCurrentToLower == 0) || (avgAcceptanceLowerToCurrent == 0))
-                        {
-                            weightDifferenceToLower =
-                                    (scaled_lamee[fep_state] - scaled_lamee[fep_state - 1]);
-                        }
-                        else
-                        {
-                            weightDifferenceToLower = (std::log(avgAcceptanceCurrentToLower)
-                                                       - std::log(avgAcceptanceLowerToCurrent))
-                                                      + cnval;
-                        }
-                        /* Variance of the free energy difference to the one state lower */
-                        varianceToLower =
-                                (1.0 / numObservationsCurrentState) * (varianceCurrentToLower)
-                                + (1.0 / numObservationsLowerState) * (varianceLowerToCurrent);
-                    }
-                }
-            }
-
-            if (fep_state < nlim - 1)
-            {
-                if (numObservationsCurrentState > 0)
-                {
-                    /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                     *
-                     * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                     * acceptances are all positive!), and hence
-                     *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                     * We're catching that case explicitly to avoid numerical
-                     * problems dividing by zero when the overlap between states is small (#3304)
-                     */
-
-                    if (avgAcceptanceCurrentToHigher < 0)
-                    {
-                        varianceCurrentToHigher =
-                                avgAcceptanceCurrentToHigherSquared
-                                        / (avgAcceptanceCurrentToHigher * avgAcceptanceCurrentToHigher)
-                                - 1.0;
-                    }
-                    if (numObservationsHigherState > 0)
-                    {
-                        /* Calculate {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1}
-                         *
-                         * Note that if avg[xi(i->j)] == 0, also avg[xi(i->j)^2] == 0 (since the
-                         * acceptances are all positive!), and hence
-                         *     {avg[xi(i->j)^2] / avg[xi(i->j)]^2 - 1} -> 0  for  avg[xi(i->j)] -> 0
-                         * We're catching that case explicitly to avoid numerical
-                         * problems dividing by zero when the overlap between states is small (#3304)
-                         */
-                        real varianceHigherToCurrent = 0;
-                        if (avgAcceptanceHigherToCurrent > 0)
-                        {
-                            varianceHigherToCurrent =
-                                    avgAcceptanceHigherToCurrentSquared
-                                            / (avgAcceptanceHigherToCurrent * avgAcceptanceHigherToCurrent)
-                                    - 1.0;
-                        }
-                        /* Free energy difference to the state one state higher */
-                        /* if these either of these quantities are zero, the energies are */
-                        /* way too large for the dynamic range.  We need an alternate guesstimate */
-                        if ((avgAcceptanceHigherToCurrent == 0) || (avgAcceptanceCurrentToHigher == 0))
-                        {
-                            weightDifferenceToHigher =
-                                    (scaled_lamee[fep_state + 1] - scaled_lamee[fep_state]);
-                        }
-                        else
-                        {
-                            weightDifferenceToHigher = (std::log(avgAcceptanceHigherToCurrent)
-                                                        - std::log(avgAcceptanceCurrentToHigher))
-                                                       + cnval;
-                        }
-                        /* Variance of the free energy difference to the one state higher */
-                        varianceToHigher =
-                                (1.0 / numObservationsHigherState) * (varianceHigherToCurrent)
-                                + (1.0 / numObservationsCurrentState) * (varianceCurrentToHigher);
-                    }
-                }
-            }
-
-            if (numObservationsCurrentState > 0)
-            {
-                omegam_array[nval] = varianceCurrentToLower;
-            }
-            else
-            {
-                omegam_array[nval] = 0;
-            }
-            weightsm_array[nval] = weightDifferenceToLower;
-            varm_array[nval]     = varianceToLower;
-            if (numObservationsLowerState > 0)
-            {
-                dwm_array[nval] =
-                        fabs((cnval + std::log((1.0 * numObservationsCurrentState) / numObservationsLowerState))
-                             - lam_dg[fep_state - 1]);
-            }
-            else
-            {
-                dwm_array[nval] = std::fabs(cnval - lam_dg[fep_state - 1]);
-            }
-
-            if (numObservationsCurrentState > 0)
-            {
-                omegap_array[nval] = varianceCurrentToHigher;
-            }
-            else
-            {
-                omegap_array[nval] = 0;
-            }
-            weightsp_array[nval] = weightDifferenceToHigher;
-            varp_array[nval]     = varianceToHigher;
-            if ((numObservationsHigherState > 0) && (numObservationsCurrentState > 0))
-            {
-                dwp_array[nval] =
-                        fabs((cnval + std::log((1.0 * numObservationsHigherState) / numObservationsCurrentState))
-                             - lam_dg[fep_state]);
-            }
-            else
-            {
-                dwp_array[nval] = std::fabs(cnval - lam_dg[fep_state]);
-            }
-        }
-
-        /* find the free energy estimate closest to the guessed weight's value */
-
-        min_nvalm     = FindMinimum(dwm_array, maxc);
-        omega_m1_0    = omegam_array[min_nvalm];
-        clam_weightsm = weightsm_array[min_nvalm];
-        clam_varm     = varm_array[min_nvalm];
-
-        min_nvalp     = FindMinimum(dwp_array, maxc);
-        omega_p1_0    = omegap_array[min_nvalp];
-        clam_weightsp = weightsp_array[min_nvalp];
-        clam_varp     = varp_array[min_nvalp];
-
-        clam_osum   = omega_m1_0 + omega_p1_0;
-        clam_minvar = 0;
-        if (clam_osum > 0)
-        {
-            clam_minvar = 0.5 * std::log(clam_osum);
-        }
-
-        if (fep_state > 0)
-        {
-            lam_dg[fep_state - 1]       = clam_weightsm;
-            lam_variance[fep_state - 1] = clam_varm;
-        }
-
-        if (fep_state < nlim - 1)
-        {
-            lam_dg[fep_state]       = clam_weightsp;
-            lam_variance[fep_state] = clam_varp;
-        }
-
-        if (expand->elamstats == elamstatsMINVAR)
-        {
-            bSufficientSamples = TRUE;
-            /* make sure the number of samples in each state are all
-             * past a user-specified threshold
-             */
-            for (i = 0; i < nlim; i++)
-            {
-                if (dfhist->n_at_lam[i] < expand->minvarmin)
-                {
-                    bSufficientSamples = FALSE;
-                }
-            }
-            if (bSufficientSamples)
-            {
-                dfhist->sum_minvar[fep_state] = clam_minvar;
-                if (fep_state == 0)
-                {
-                    for (i = 0; i < nlim; i++)
-                    {
-                        dfhist->sum_minvar[i] += (expand->minvar_const - clam_minvar);
-                    }
-                    expand->minvar_const          = clam_minvar;
-                    dfhist->sum_minvar[fep_state] = 0.0;
-                }
-                else
-                {
-                    dfhist->sum_minvar[fep_state] -= expand->minvar_const;
-                }
-            }
-        }
-
-        /* we need to rezero minvar now, since it could change at fep_state = 0 */
-        dfhist->sum_dg[0]       = 0.0;
-        dfhist->sum_variance[0] = 0.0;
-        dfhist->sum_weights[0]  = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */
-
-        for (i = 1; i < nlim; i++)
-        {
-            dfhist->sum_dg[i] = lam_dg[i - 1] + dfhist->sum_dg[i - 1];
-            dfhist->sum_variance[i] =
-                    std::sqrt(lam_variance[i - 1] + gmx::square(dfhist->sum_variance[i - 1]));
-            dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i];
-        }
-
-        sfree(lam_dg);
-        sfree(lam_variance);
-
-        sfree(omegam_array);
-        sfree(weightsm_array);
-        sfree(varm_array);
-        sfree(dwm_array);
-
-        sfree(omegap_array);
-        sfree(weightsp_array);
-        sfree(varp_array);
-        sfree(dwp_array);
-    }
-    return FALSE;
-}
-
-static int ChooseNewLambda(int               nlim,
-                           const t_expanded* expand,
-                           df_history_t*     dfhist,
-                           int               fep_state,
-                           const real*       weighted_lamee,
-                           double*           p_k,
-                           int64_t           seed,
-                           int64_t           step)
-{
-    /* Choose new lambda value, and update transition matrix */
-
-    int                  i, ifep, minfep, maxfep, lamnew, lamtrial, starting_fep_state;
-    real                 r1, r2, de, trialprob, tprob = 0;
-    double *             propose, *accept, *remainder;
-    double               pks;
-    real                 pnorm;
-    gmx::ThreeFry2x64<0> rng(
-            seed, gmx::RandomDomain::ExpandedEnsemble); // We only draw once, so zero bits internal counter is fine
-    gmx::UniformRealDistribution<real> dist;
-
-    starting_fep_state = fep_state;
-    lamnew             = fep_state; /* so that there is a default setting -- stays the same */
-
-    if (!EWL(expand->elamstats)) /* ignore equilibrating the weights if using WL */
-    {
-        if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim - 1] <= expand->lmc_forced_nstart))
-        {
-            /* Use a marching method to run through the lambdas and get preliminary free energy data,
-               before starting 'free' sampling.  We start free sampling when we have enough at each lambda */
-
-            /* if we have enough at this lambda, move on to the next one */
-
-            if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart)
-            {
-                lamnew = fep_state + 1;
-                if (lamnew == nlim) /* whoops, stepped too far! */
-                {
-                    lamnew -= 1;
-                }
-            }
-            else
-            {
-                lamnew = fep_state;
-            }
-            return lamnew;
-        }
-    }
-
-    snew(propose, nlim);
-    snew(accept, nlim);
-    snew(remainder, nlim);
-
-    for (i = 0; i < expand->lmc_repeats; i++)
-    {
-        rng.restart(step, i);
-        dist.reset();
-
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            propose[ifep] = 0;
-            accept[ifep]  = 0;
-        }
-
-        if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS))
-        {
-            /* use the Gibbs sampler, with restricted range */
-            if (expand->gibbsdeltalam < 0)
-            {
-                minfep = 0;
-                maxfep = nlim - 1;
-            }
-            else
-            {
-                minfep = fep_state - expand->gibbsdeltalam;
-                maxfep = fep_state + expand->gibbsdeltalam;
-                if (minfep < 0)
-                {
-                    minfep = 0;
-                }
-                if (maxfep > nlim - 1)
-                {
-                    maxfep = nlim - 1;
-                }
-            }
-
-            GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep);
-
-            if (expand->elmcmove == elmcmoveGIBBS)
-            {
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    propose[ifep] = p_k[ifep];
-                    accept[ifep]  = 1.0;
-                }
-                /* Gibbs sampling */
-                r1 = dist(rng);
-                for (lamnew = minfep; lamnew <= maxfep; lamnew++)
-                {
-                    if (r1 <= p_k[lamnew])
-                    {
-                        break;
-                    }
-                    r1 -= p_k[lamnew];
-                }
-            }
-            else if (expand->elmcmove == elmcmoveMETGIBBS)
-            {
-
-                /* Metropolized Gibbs sampling */
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    remainder[ifep] = 1 - p_k[ifep];
-                }
-
-                /* find the proposal probabilities */
-
-                if (remainder[fep_state] == 0)
-                {
-                    /* only the current state has any probability */
-                    /* we have to stay at the current state */
-                    lamnew = fep_state;
-                }
-                else
-                {
-                    for (ifep = minfep; ifep <= maxfep; ifep++)
-                    {
-                        if (ifep != fep_state)
-                        {
-                            propose[ifep] = p_k[ifep] / remainder[fep_state];
-                        }
-                        else
-                        {
-                            propose[ifep] = 0;
-                        }
-                    }
-
-                    r1 = dist(rng);
-                    for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++)
-                    {
-                        pnorm = p_k[lamtrial] / remainder[fep_state];
-                        if (lamtrial != fep_state)
-                        {
-                            if (r1 <= pnorm)
-                            {
-                                break;
-                            }
-                            r1 -= pnorm;
-                        }
-                    }
-
-                    /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */
-                    tprob = 1.0;
-                    /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */
-                    trialprob = (remainder[fep_state]) / (remainder[lamtrial]);
-                    if (trialprob < tprob)
-                    {
-                        tprob = trialprob;
-                    }
-                    r2 = dist(rng);
-                    if (r2 < tprob)
-                    {
-                        lamnew = lamtrial;
-                    }
-                    else
-                    {
-                        lamnew = fep_state;
-                    }
-                }
-
-                /* now figure out the acceptance probability for each */
-                for (ifep = minfep; ifep <= maxfep; ifep++)
-                {
-                    tprob = 1.0;
-                    if (remainder[ifep] != 0)
-                    {
-                        trialprob = (remainder[fep_state]) / (remainder[ifep]);
-                    }
-                    else
-                    {
-                        trialprob = 1.0; /* this state is the only choice! */
-                    }
-                    if (trialprob < tprob)
-                    {
-                        tprob = trialprob;
-                    }
-                    /* probability for fep_state=0, but that's fine, it's never proposed! */
-                    accept[ifep] = tprob;
-                }
-            }
-
-            if (lamnew > maxfep)
-            {
-                /* it's possible some rounding is failing */
-                if (gmx_within_tol(remainder[fep_state], 0, 50 * GMX_DOUBLE_EPS))
-                {
-                    /* numerical rounding error -- no state other than the original has weight */
-                    lamnew = fep_state;
-                }
-                else
-                {
-                    /* probably not a numerical issue */
-                    int   loc    = 0;
-                    int   nerror = 200 + (maxfep - minfep + 1) * 60;
-                    char* errorstr;
-                    snew(errorstr, nerror);
-                    /* if its greater than maxfep, then something went wrong -- probably underflow
-                       in the calculation of sum weights. Generated detailed info for failure */
-                    loc += sprintf(
-                            errorstr,
-                            "Something wrong in choosing new lambda state with a Gibbs move -- "
-                            "probably underflow in weight determination.\nDenominator is: "
-                            "%3d%17.10e\n  i                dE        numerator          weights\n",
-                            0, pks);
-                    for (ifep = minfep; ifep <= maxfep; ifep++)
-                    {
-                        loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep,
-                                       weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]);
-                    }
-                    gmx_fatal(FARGS, "%s", errorstr);
-                }
-            }
-        }
-        else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER))
-        {
-            /* use the metropolis sampler with trial +/- 1 */
-            r1 = dist(rng);
-            if (r1 < 0.5)
-            {
-                if (fep_state == 0)
-                {
-                    lamtrial = fep_state;
-                }
-                else
-                {
-                    lamtrial = fep_state - 1;
-                }
-            }
-            else
-            {
-                if (fep_state == nlim - 1)
-                {
-                    lamtrial = fep_state;
-                }
-                else
-                {
-                    lamtrial = fep_state + 1;
-                }
-            }
-
-            de = weighted_lamee[lamtrial] - weighted_lamee[fep_state];
-            if (expand->elmcmove == elmcmoveMETROPOLIS)
-            {
-                tprob = 1.0;
-                if (de < 0)
-                {
-                    tprob = std::exp(de);
-                }
-                propose[fep_state] = 0;
-                propose[lamtrial]  = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */
-                accept[fep_state] =
-                        1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */
-                accept[lamtrial] = tprob;
-            }
-            else if (expand->elmcmove == elmcmoveBARKER)
-            {
-                if (de > 0) /* Numerically stable version */
-                {
-                    tprob = 1.0 / (1.0 + std::exp(-de));
-                }
-                else if (de < 0)
-                {
-                    tprob = std::exp(de) / (std::exp(de) + 1.0);
-                }
-                propose[fep_state] = (1 - tprob);
-                propose[lamtrial] +=
-                        tprob; /* we add, to account for the fact that at the end, they might be the same point */
-                accept[fep_state] = 1.0;
-                accept[lamtrial]  = 1.0;
-            }
-
-            r2 = dist(rng);
-            if (r2 < tprob)
-            {
-                lamnew = lamtrial;
-            }
-            else
-            {
-                lamnew = fep_state;
-            }
-        }
-
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            dfhist->Tij[fep_state][ifep] += propose[ifep] * accept[ifep];
-            dfhist->Tij[fep_state][fep_state] += propose[ifep] * (1.0 - accept[ifep]);
-        }
-        fep_state = lamnew;
-    }
-
-    dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0;
-
-    sfree(propose);
-    sfree(accept);
-    sfree(remainder);
-
-    return lamnew;
-}
-
-/* print out the weights to the log, along with current state */
-void PrintFreeEnergyInfoToFile(FILE*               outfile,
-                               const t_lambda*     fep,
-                               const t_expanded*   expand,
-                               const t_simtemp*    simtemp,
-                               const df_history_t* dfhist,
-                               int                 fep_state,
-                               int                 frequency,
-                               int64_t             step)
-{
-    int         nlim, i, ifep, jfep;
-    real        dw, dg, dv, Tprint;
-    const char* print_names[efptNR] = { " FEPL", "MassL", "CoulL",   " VdwL",
-                                        "BondL", "RestT", "Temp.(K)" };
-    gmx_bool    bSimTemp            = FALSE;
-
-    nlim = fep->n_lambda;
-    if (simtemp != nullptr)
-    {
-        bSimTemp = TRUE;
-    }
-
-    if (step % frequency == 0)
-    {
-        fprintf(outfile, "             MC-lambda information\n");
-        if (EWL(expand->elamstats) && (!(dfhist->bEquil)))
-        {
-            fprintf(outfile, "  Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta);
-        }
-        fprintf(outfile, "  N");
-        for (i = 0; i < efptNR; i++)
-        {
-            if (fep->separate_dvdl[i])
-            {
-                fprintf(outfile, "%7s", print_names[i]);
-            }
-            else if ((i == efptTEMPERATURE) && bSimTemp)
-            {
-                fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */
-            }
-        }
-        fprintf(outfile, "    Count   ");
-        if (expand->elamstats == elamstatsMINVAR)
-        {
-            fprintf(outfile, "W(in kT)   G(in kT)  dG(in kT)  dV(in kT)\n");
-        }
-        else
-        {
-            fprintf(outfile, "G(in kT)  dG(in kT)\n");
-        }
-        for (ifep = 0; ifep < nlim; ifep++)
-        {
-            if (ifep == nlim - 1)
-            {
-                dw = 0.0;
-                dg = 0.0;
-                dv = 0.0;
-            }
-            else
-            {
-                dw = dfhist->sum_weights[ifep + 1] - dfhist->sum_weights[ifep];
-                dg = dfhist->sum_dg[ifep + 1] - dfhist->sum_dg[ifep];
-                dv = std::sqrt(gmx::square(dfhist->sum_variance[ifep + 1])
-                               - gmx::square(dfhist->sum_variance[ifep]));
-            }
-            fprintf(outfile, "%3d", (ifep + 1));
-            for (i = 0; i < efptNR; i++)
-            {
-                if (fep->separate_dvdl[i])
-                {
-                    fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]);
-                }
-                else if (i == efptTEMPERATURE && bSimTemp)
-                {
-                    fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]);
-                }
-            }
-            if (EWL(expand->elamstats)
-                && (!(dfhist->bEquil))) /* if performing WL and still haven't equilibrated */
-            {
-                if (expand->elamstats == elamstatsWL)
-                {
-                    fprintf(outfile, " %8d", static_cast<int>(dfhist->wl_histo[ifep]));
-                }
-                else
-                {
-                    fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]);
-                }
-            }
-            else /* we have equilibrated weights */
-            {
-                fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]);
-            }
-            if (expand->elamstats == elamstatsMINVAR)
-            {
-                fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep],
-                        dfhist->sum_dg[ifep], dg, dv);
-            }
-            else
-            {
-                fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw);
-            }
-            if (ifep == fep_state)
-            {
-                fprintf(outfile, " <<\n");
-            }
-            else
-            {
-                fprintf(outfile, "   \n");
-            }
-        }
-        fprintf(outfile, "\n");
-
-        if ((step % expand->nstTij == 0) && (expand->nstTij > 0) && (step > 0))
-        {
-            fprintf(outfile, "                     Transition Matrix\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                fprintf(outfile, "%12d", (ifep + 1));
-            }
-            fprintf(outfile, "\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                for (jfep = 0; jfep < nlim; jfep++)
-                {
-                    if (dfhist->n_at_lam[ifep] > 0)
-                    {
-                        if (expand->bSymmetrizedTMatrix)
-                        {
-                            Tprint = (dfhist->Tij[ifep][jfep] + dfhist->Tij[jfep][ifep])
-                                     / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]);
-                        }
-                        else
-                        {
-                            Tprint = (dfhist->Tij[ifep][jfep]) / (dfhist->n_at_lam[ifep]);
-                        }
-                    }
-                    else
-                    {
-                        Tprint = 0.0;
-                    }
-                    fprintf(outfile, "%12.8f", Tprint);
-                }
-                fprintf(outfile, "%3d\n", (ifep + 1));
-            }
-
-            fprintf(outfile, "                  Empirical Transition Matrix\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                fprintf(outfile, "%12d", (ifep + 1));
-            }
-            fprintf(outfile, "\n");
-            for (ifep = 0; ifep < nlim; ifep++)
-            {
-                for (jfep = 0; jfep < nlim; jfep++)
-                {
-                    if (dfhist->n_at_lam[ifep] > 0)
-                    {
-                        if (expand->bSymmetrizedTMatrix)
-                        {
-                            Tprint = (dfhist->Tij_empirical[ifep][jfep] + dfhist->Tij_empirical[jfep][ifep])
-                                     / (dfhist->n_at_lam[ifep] + dfhist->n_at_lam[jfep]);
-                        }
-                        else
-                        {
-                            Tprint = dfhist->Tij_empirical[ifep][jfep] / (dfhist->n_at_lam[ifep]);
-                        }
-                    }
-                    else
-                    {
-                        Tprint = 0.0;
-                    }
-                    fprintf(outfile, "%12.8f", Tprint);
-                }
-                fprintf(outfile, "%3d\n", (ifep + 1));
-            }
-        }
-    }
-}
-
-int ExpandedEnsembleDynamics(FILE*                 log,
-                             const t_inputrec*     ir,
-                             const gmx_enerdata_t* enerd,
-                             t_state*              state,
-                             t_extmass*            MassQ,
-                             int                   fep_state,
-                             df_history_t*         dfhist,
-                             int64_t               step,
-                             rvec*                 v,
-                             const t_mdatoms*      mdatoms)
-/* Note that the state variable is only needed for simulated tempering, not
-   Hamiltonian expanded ensemble.  May be able to remove it after integrator refactoring. */
-{
-    real *      pfep_lamee, *scaled_lamee, *weighted_lamee;
-    double*     p_k;
-    int         i, nlim, lamnew, totalsamples;
-    real        oneovert, maxscaled = 0, maxweighted = 0;
-    t_expanded* expand;
-    t_simtemp*  simtemp;
-    gmx_bool    bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE;
-
-    expand  = ir->expandedvals;
-    simtemp = ir->simtempvals;
-    nlim    = ir->fepvals->n_lambda;
-
-    snew(scaled_lamee, nlim);
-    snew(weighted_lamee, nlim);
-    snew(pfep_lamee, nlim);
-    snew(p_k, nlim);
-
-    /* update the count at the current lambda*/
-    dfhist->n_at_lam[fep_state]++;
-
-    /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda
-       state that's pressure controlled.*/
-    /*
-       pVTerm = 0;
-       where does this PV term go?
-       for (i=0;i<nlim;i++)
-       {
-       fep_lamee[i] += pVTerm;
-       }
-     */
-
-    /* determine the minimum value to avoid overflow.  Probably a better way to do this */
-    /* we don't need to include the pressure term, since the volume is the same between the two.
-       is there some term we are neglecting, however? */
-
-    if (ir->efep != efepNO)
-    {
-        for (i = 0; i < nlim; i++)
-        {
-            if (ir->bSimTemp)
-            {
-                /* Note -- this assumes no mass changes, since kinetic energy is not added  . . . */
-                scaled_lamee[i] = (enerd->enerpart_lambda[i + 1] - enerd->enerpart_lambda[0])
-                                          / (simtemp->temperatures[i] * BOLTZ)
-                                  + enerd->term[F_EPOT]
-                                            * (1.0 / (simtemp->temperatures[i])
-                                               - 1.0 / (simtemp->temperatures[fep_state]))
-                                            / BOLTZ;
-            }
-            else
-            {
-                scaled_lamee[i] = (enerd->enerpart_lambda[i + 1] - enerd->enerpart_lambda[0])
-                                  / (expand->mc_temp * BOLTZ);
-                /* mc_temp is currently set to the system reft unless otherwise defined */
-            }
-
-            /* save these energies for printing, so they don't get overwritten by the next step */
-            /* they aren't overwritten in the non-free energy case, but we always print with these
-               for simplicity */
-        }
-    }
-    else
-    {
-        if (ir->bSimTemp)
-        {
-            for (i = 0; i < nlim; i++)
-            {
-                scaled_lamee[i] =
-                        enerd->term[F_EPOT]
-                        * (1.0 / simtemp->temperatures[i] - 1.0 / simtemp->temperatures[fep_state]) / BOLTZ;
-            }
-        }
-    }
-
-    for (i = 0; i < nlim; i++)
-    {
-        pfep_lamee[i] = scaled_lamee[i];
-
-        weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i];
-        if (i == 0)
-        {
-            maxscaled   = scaled_lamee[i];
-            maxweighted = weighted_lamee[i];
-        }
-        else
-        {
-            if (scaled_lamee[i] > maxscaled)
-            {
-                maxscaled = scaled_lamee[i];
-            }
-            if (weighted_lamee[i] > maxweighted)
-            {
-                maxweighted = weighted_lamee[i];
-            }
-        }
-    }
-
-    for (i = 0; i < nlim; i++)
-    {
-        scaled_lamee[i] -= maxscaled;
-        weighted_lamee[i] -= maxweighted;
-    }
-
-    /* update weights - we decide whether or not to actually do this inside */
-
-    bDoneEquilibrating =
-            UpdateWeights(nlim, expand, dfhist, fep_state, scaled_lamee, weighted_lamee, step);
-    if (bDoneEquilibrating)
-    {
-        if (log)
-        {
-            fprintf(log, "\nStep %" PRId64 ": Weights have equilibrated, using criteria: %s\n",
-                    step, elmceq_names[expand->elmceq]);
-        }
-    }
-
-    lamnew = ChooseNewLambda(nlim, expand, dfhist, fep_state, weighted_lamee, p_k,
-                             ir->expandedvals->lmc_seed, step);
-    /* if using simulated tempering, we need to adjust the temperatures */
-    if (ir->bSimTemp && (lamnew != fep_state)) /* only need to change the temperatures if we change the state */
-    {
-        int   i, j, n, d;
-        real* buf_ngtc;
-        real  told;
-        int   nstart, nend, gt;
-
-        snew(buf_ngtc, ir->opts.ngtc);
-
-        for (i = 0; i < ir->opts.ngtc; i++)
-        {
-            if (ir->opts.ref_t[i] > 0)
-            {
-                told              = ir->opts.ref_t[i];
-                ir->opts.ref_t[i] = simtemp->temperatures[lamnew];
-                buf_ngtc[i]       = std::sqrt(ir->opts.ref_t[i] / told); /* using the buffer as temperature scaling */
-            }
-        }
-
-        /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */
-
-        nstart = 0;
-        nend   = mdatoms->homenr;
-        for (n = nstart; n < nend; n++)
-        {
-            gt = 0;
-            if (mdatoms->cTC)
-            {
-                gt = mdatoms->cTC[n];
-            }
-            for (d = 0; d < DIM; d++)
-            {
-                v[n][d] *= buf_ngtc[gt];
-            }
-        }
-
-        if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))
-        {
-            /* we need to recalculate the masses if the temperature has changed */
-            init_npt_masses(ir, state, MassQ, FALSE);
-            for (i = 0; i < state->nnhpres; i++)
-            {
-                for (j = 0; j < ir->opts.nhchainlength; j++)
-                {
-                    state->nhpres_vxi[i + j] *= buf_ngtc[i];
-                }
-            }
-            for (i = 0; i < ir->opts.ngtc; i++)
-            {
-                for (j = 0; j < ir->opts.nhchainlength; j++)
-                {
-                    state->nosehoover_vxi[i + j] *= buf_ngtc[i];
-                }
-            }
-        }
-        sfree(buf_ngtc);
-    }
-
-    /* now check on the Wang-Landau updating critera */
-
-    if (EWL(expand->elamstats))
-    {
-        bSwitchtoOneOverT = FALSE;
-        if (expand->bWLoneovert)
-        {
-            totalsamples = 0;
-            for (i = 0; i < nlim; i++)
-            {
-                totalsamples += dfhist->n_at_lam[i];
-            }
-            oneovert = (1.0 * nlim) / totalsamples;
-            /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */
-            /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */
-            if ((dfhist->wl_delta <= ((totalsamples) / (totalsamples - 1.00001)) * oneovert)
-                && (dfhist->wl_delta < expand->init_wl_delta))
-            {
-                bSwitchtoOneOverT = TRUE;
-            }
-        }
-        if (bSwitchtoOneOverT)
-        {
-            dfhist->wl_delta =
-                    oneovert; /* now we reduce by this each time, instead of only at flatness */
-        }
-        else
-        {
-            bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio);
-            if (bIfReset)
-            {
-                for (i = 0; i < nlim; i++)
-                {
-                    dfhist->wl_histo[i] = 0;
-                }
-                dfhist->wl_delta *= expand->wl_scale;
-                if (log)
-                {
-                    fprintf(log, "\nStep %d: weights are now:", static_cast<int>(step));
-                    for (i = 0; i < nlim; i++)
-                    {
-                        fprintf(log, " %.5f", dfhist->sum_weights[i]);
-                    }
-                    fprintf(log, "\n");
-                }
-            }
-        }
-    }
-    sfree(pfep_lamee);
-    sfree(scaled_lamee);
-    sfree(weighted_lamee);
-    sfree(p_k);
-
-    return lamnew;
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h
deleted file mode 100644
index 7766a864fd..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef GMX_MDLIB_EXPANDED_H
-#define GMX_MDLIB_EXPANDED_H
-
-#include <stdio.h>
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/utility/basedefinitions.h"
-
-struct df_history_t;
-struct gmx_enerdata_t;
-struct t_expanded;
-struct t_extmass;
-struct t_inputrec;
-struct t_lambda;
-struct t_mdatoms;
-struct t_simtemp;
-class t_state;
-
-namespace gmx
-{
-class MDLogger;
-} // namespace gmx
-
-void init_npt_masses(const t_inputrec* ir, t_state* state, t_extmass* MassQ, gmx_bool bInit);
-
-void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist, const gmx::MDLogger& mdlog);
-
-int ExpandedEnsembleDynamics(FILE*                 log,
-                             const t_inputrec*     ir,
-                             const gmx_enerdata_t* enerd,
-                             t_state*              state,
-                             t_extmass*            MassQ,
-                             int                   fep_state,
-                             df_history_t*         dfhist,
-                             int64_t               step,
-                             rvec*                 v,
-                             const t_mdatoms*      mdatoms,
-                             real*                 realFepState);
-
-void PrintFreeEnergyInfoToFile(FILE*               outfile,
-                               const t_lambda*     fep,
-                               const t_expanded*   expand,
-                               const t_simtemp*    simtemp,
-                               const df_history_t* dfhist,
-                               int                 fep_state,
-                               int                 frequency,
-                               int64_t             step);
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h.preplumed
deleted file mode 100644
index 6f6bec9804..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/expanded.h.preplumed
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#ifndef GMX_MDLIB_EXPANDED_H
-#define GMX_MDLIB_EXPANDED_H
-
-#include <stdio.h>
-
-#include "gromacs/math/vectypes.h"
-#include "gromacs/utility/basedefinitions.h"
-
-struct df_history_t;
-struct gmx_enerdata_t;
-struct t_expanded;
-struct t_extmass;
-struct t_inputrec;
-struct t_lambda;
-struct t_mdatoms;
-struct t_simtemp;
-class t_state;
-
-void init_npt_masses(const t_inputrec* ir, t_state* state, t_extmass* MassQ, gmx_bool bInit);
-
-void init_expanded_ensemble(gmx_bool bStateFromCP, const t_inputrec* ir, df_history_t* dfhist);
-
-int ExpandedEnsembleDynamics(FILE*                 log,
-                             const t_inputrec*     ir,
-                             const gmx_enerdata_t* enerd,
-                             t_state*              state,
-                             t_extmass*            MassQ,
-                             int                   fep_state,
-                             df_history_t*         dfhist,
-                             int64_t               step,
-                             rvec*                 v,
-                             const t_mdatoms*      mdatoms);
-
-void PrintFreeEnergyInfoToFile(FILE*               outfile,
-                               const t_lambda*     fep,
-                               const t_expanded*   expand,
-                               const t_simtemp*    simtemp,
-                               const df_history_t* dfhist,
-                               int                 fep_state,
-                               int                 frequency,
-                               int64_t             step);
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp
deleted file mode 100644
index 7349417bfd..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "force.h"
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/ewald/ewald.h"
-#include "gromacs/ewald/long_range_correction.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/listed_forces/listed_forces.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vecdump.h"
-#include "gromacs/mdlib/forcerec_threading.h"
-#include "gromacs/mdlib/qmmm.h"
-#include "gromacs/mdlib/rf_util.h"
-#include "gromacs/mdlib/wall.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/forceoutput.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-int    plumedswitch=0;
-plumed plumedmain;
-/* END PLUMED */
-
-static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
-{
-    ewc_t->Vcorr_q        = 0;
-    ewc_t->Vcorr_lj       = 0;
-    ewc_t->dvdl[efptCOUL] = 0;
-    ewc_t->dvdl[efptVDW]  = 0;
-    clear_mat(ewc_t->vir_q);
-    clear_mat(ewc_t->vir_lj);
-}
-
-static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t* ewc_t)
-{
-    ewald_corr_thread_t& dest = ewc_t[0];
-
-    for (int t = 1; t < nthreads; t++)
-    {
-        dest.Vcorr_q += ewc_t[t].Vcorr_q;
-        dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
-        dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
-        dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW];
-        m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
-        m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
-    }
-}
-
-void do_force_lowlevel(t_forcerec*                         fr,
-                       const t_inputrec*                   ir,
-                       const t_idef*                       idef,
-                       const t_commrec*                    cr,
-                       const gmx_multisim_t*               ms,
-                       t_nrnb*                             nrnb,
-                       gmx_wallcycle_t                     wcycle,
-                       const t_mdatoms*                    md,
-                       gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
-                       history_t*                          hist,
-                       gmx::ForceOutputs*                  forceOutputs,
-                       gmx_enerdata_t*                     enerd,
-                       t_fcdata*                           fcd,
-                       const matrix                        box,
-                       const real*                         lambda,
-                       const t_graph*                      graph,
-                       const rvec*                         mu_tot,
-                       const gmx::StepWorkload&            stepWork,
-                       const DDBalanceRegionHandler&       ddBalanceRegionHandler)
-{
-    // TODO: Replace all uses of x by const coordinates
-    rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
-
-    auto& forceWithVirial = forceOutputs->forceWithVirial();
-
-    /* do QMMM first if requested */
-    if (fr->bQMMM)
-    {
-        enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr);
-    }
-
-    /* Call the short range functions all in one go. */
-
-    if (ir->nwall)
-    {
-        /* foreign lambda component for walls */
-        real dvdl_walls = do_walls(*ir, *fr, box, *md, x, &forceWithVirial, lambda[efptVDW],
-                                   enerd->grpp.ener[egLJSR].data(), nrnb);
-        enerd->dvdl_lin[efptVDW] += dvdl_walls;
-    }
-
-    /* Shift the coordinates. Must be done before listed forces and PPPM,
-     * but is also necessary for SHAKE and update, therefore it can NOT
-     * go when no listed forces have to be evaluated.
-     *
-     * The shifting and PBC code is deliberately not timed, since with
-     * the Verlet scheme it only takes non-zero time with triclinic
-     * boxes, and even then the time is around a factor of 100 less
-     * than the next smallest counter.
-     */
-
-
-    /* Here sometimes we would not need to shift with NBFonly,
-     * but we do so anyhow for consistency of the returned coordinates.
-     */
-    if (graph)
-    {
-        shift_self(graph, box, x);
-        if (TRICLINIC(box))
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, 2 * graph->nnodes);
-        }
-        else
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
-        }
-    }
-
-    {
-        t_pbc pbc;
-
-        /* Check whether we need to take into account PBC in listed interactions. */
-        const auto needPbcForListedForces =
-                fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
-        if (needPbcForListedForces)
-        {
-            /* Since all atoms are in the rectangular or triclinic unit-cell,
-             * only single box vector shifts (2 in x) are required.
-             */
-            set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, TRUE, box);
-        }
-
-        do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, forceOutputs, fr, &pbc,
-                        graph, enerd, nrnb, lambda, md, fcd,
-                        DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork);
-    }
-
-    const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-                                 && thisRankHasDuty(cr, DUTY_PME)
-                                 && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
-
-    const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(*ir);
-
-    /* Do long-range electrostatics and/or LJ-PME
-     * and compute PME surface terms when necessary.
-     */
-    if (computePmeOnCpu || fr->ic->eeltype == eelEWALD || haveEwaldSurfaceTerm)
-    {
-        int  status = 0;
-        real Vlr_q = 0, Vlr_lj = 0;
-
-        /* We reduce all virial, dV/dlambda and energy contributions, except
-         * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
-         */
-        ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0];
-        clearEwaldThreadOutput(&ewaldOutput);
-
-        if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-        {
-            /* Calculate the Ewald surface force and energy contributions, when necessary */
-            if (haveEwaldSurfaceTerm)
-            {
-                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
-
-                if (fr->n_tpi > 0)
-                {
-                    gmx_fatal(FARGS,
-                              "TPI with PME currently only works in a 3D geometry with tin-foil "
-                              "boundary conditions");
-                }
-
-                int nthreads = fr->nthread_ewc;
-#pragma omp parallel for num_threads(nthreads) schedule(static)
-                for (int t = 0; t < nthreads; t++)
-                {
-                    try
-                    {
-                        ewald_corr_thread_t& ewc_t = fr->ewc_t[t];
-                        if (t > 0)
-                        {
-                            clearEwaldThreadOutput(&ewc_t);
-                        }
-
-                        /* Threading is only supported with the Verlet cut-off
-                         * scheme and then only single particle forces (no
-                         * exclusion forces) are calculated, so we can store
-                         * the forces in the normal, single forceWithVirial->force_ array.
-                         */
-                        ewald_LRcorrection(md->homenr, cr, nthreads, t, *fr, *ir, md->chargeA,
-                                           md->chargeB, (md->nChargePerturbed != 0), x, box, mu_tot,
-                                           as_rvec_array(forceWithVirial.force_.data()),
-                                           &ewc_t.Vcorr_q, lambda[efptCOUL], &ewc_t.dvdl[efptCOUL]);
-                    }
-                    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-                }
-                if (nthreads > 1)
-                {
-                    reduceEwaldThreadOuput(nthreads, fr->ewc_t);
-                }
-                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
-            }
-
-            if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
-            {
-                /* This is not in a subcounter because it takes a
-                   negligible and constant-sized amount of time */
-                ewaldOutput.Vcorr_q += ewald_charge_correction(
-                        cr, fr, lambda[efptCOUL], box, &ewaldOutput.dvdl[efptCOUL], ewaldOutput.vir_q);
-            }
-
-            if (computePmeOnCpu)
-            {
-                /* Do reciprocal PME for Coulomb and/or LJ. */
-                assert(fr->n_tpi >= 0);
-                if (fr->n_tpi == 0 || stepWork.stateChanged)
-                {
-                    int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
-
-                    if (stepWork.computeForces)
-                    {
-                        pme_flags |= GMX_PME_CALC_F;
-                    }
-                    if (stepWork.computeVirial)
-                    {
-                        pme_flags |= GMX_PME_CALC_ENER_VIR;
-                    }
-                    if (fr->n_tpi > 0)
-                    {
-                        /* We don't calculate f, but we do want the potential */
-                        pme_flags |= GMX_PME_CALC_POT;
-                    }
-
-                    /* With domain decomposition we close the CPU side load
-                     * balancing region here, because PME does global
-                     * communication that acts as a global barrier.
-                     */
-                    ddBalanceRegionHandler.closeAfterForceComputationCpu();
-
-                    wallcycle_start(wcycle, ewcPMEMESH);
-                    status = gmx_pme_do(
-                            fr->pmedata,
-                            gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(),
-                                                        md->homenr - fr->n_tpi),
-                            forceWithVirial.force_, md->chargeA, md->chargeB, md->sqrt_c6A,
-                            md->sqrt_c6B, md->sigmaA, md->sigmaB, box, cr,
-                            DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
-                            DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle,
-                            ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, &Vlr_lj,
-                            lambda[efptCOUL], lambda[efptVDW], &ewaldOutput.dvdl[efptCOUL],
-                            &ewaldOutput.dvdl[efptVDW], pme_flags);
-                    wallcycle_stop(wcycle, ewcPMEMESH);
-                    if (status != 0)
-                    {
-                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
-                    }
-
-                    /* We should try to do as little computation after
-                     * this as possible, because parallel PME synchronizes
-                     * the nodes, so we want all load imbalance of the
-                     * rest of the force calculation to be before the PME
-                     * call.  DD load balancing is done on the whole time
-                     * of the force call (without PME).
-                     */
-                }
-                if (fr->n_tpi > 0)
-                {
-                    if (EVDW_PME(ir->vdwtype))
-                    {
-
-                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
-                    }
-                    /* Determine the PME grid energy of the test molecule
-                     * with the PME grid potential of the other charges.
-                     */
-                    gmx_pme_calc_energy(
-                            fr->pmedata,
-                            coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
-                            gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
-                            &Vlr_q);
-                }
-            }
-        }
-
-        if (fr->ic->eeltype == eelEWALD)
-        {
-            Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()), md->chargeA,
-                             md->chargeB, box, cr, md->homenr, ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
-                             lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], fr->ewald_table);
-        }
-
-        /* Note that with separate PME nodes we get the real energies later */
-        // TODO it would be simpler if we just accumulated a single
-        // long-range virial contribution.
-        forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
-        forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
-        enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
-        enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW];
-        enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
-        enerd->term[F_LJ_RECIP]   = Vlr_lj + ewaldOutput.Vcorr_lj;
-
-        if (debug)
-        {
-            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q,
-                    ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
-            pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
-            rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
-            pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
-            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj,
-                    ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
-            pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
-        }
-    }
-
-    if (debug)
-    {
-        print_nrnb(debug, nrnb);
-    }
-
-    if (debug)
-    {
-        rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
-        pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
-    }
-
-    /* PLUMED */
-    if(plumedswitch){
-      int plumedNeedsEnergy;
-      plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-      if(!plumedNeedsEnergy) plumed_cmd(plumedmain,"performCalc",NULL);
-    }
-    /* END PLUMED */
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp.preplumed
deleted file mode 100644
index f659874b92..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdlib/force.cpp.preplumed
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "force.h"
-
-#include <cassert>
-#include <cmath>
-#include <cstring>
-
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/ewald/ewald.h"
-#include "gromacs/ewald/long_range_correction.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/listed_forces/listed_forces.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vecdump.h"
-#include "gromacs/mdlib/forcerec_threading.h"
-#include "gromacs/mdlib/qmmm.h"
-#include "gromacs/mdlib/rf_util.h"
-#include "gromacs/mdlib/wall.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/forceoutput.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-
-static void clearEwaldThreadOutput(ewald_corr_thread_t* ewc_t)
-{
-    ewc_t->Vcorr_q        = 0;
-    ewc_t->Vcorr_lj       = 0;
-    ewc_t->dvdl[efptCOUL] = 0;
-    ewc_t->dvdl[efptVDW]  = 0;
-    clear_mat(ewc_t->vir_q);
-    clear_mat(ewc_t->vir_lj);
-}
-
-static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t* ewc_t)
-{
-    ewald_corr_thread_t& dest = ewc_t[0];
-
-    for (int t = 1; t < nthreads; t++)
-    {
-        dest.Vcorr_q += ewc_t[t].Vcorr_q;
-        dest.Vcorr_lj += ewc_t[t].Vcorr_lj;
-        dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
-        dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW];
-        m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q);
-        m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
-    }
-}
-
-void do_force_lowlevel(t_forcerec*                         fr,
-                       const t_inputrec*                   ir,
-                       const t_idef*                       idef,
-                       const t_commrec*                    cr,
-                       const gmx_multisim_t*               ms,
-                       t_nrnb*                             nrnb,
-                       gmx_wallcycle_t                     wcycle,
-                       const t_mdatoms*                    md,
-                       gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
-                       history_t*                          hist,
-                       gmx::ForceOutputs*                  forceOutputs,
-                       gmx_enerdata_t*                     enerd,
-                       t_fcdata*                           fcd,
-                       const matrix                        box,
-                       const real*                         lambda,
-                       const t_graph*                      graph,
-                       const rvec*                         mu_tot,
-                       const gmx::StepWorkload&            stepWork,
-                       const DDBalanceRegionHandler&       ddBalanceRegionHandler)
-{
-    // TODO: Replace all uses of x by const coordinates
-    rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
-
-    auto& forceWithVirial = forceOutputs->forceWithVirial();
-
-    /* do QMMM first if requested */
-    if (fr->bQMMM)
-    {
-        enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr);
-    }
-
-    /* Call the short range functions all in one go. */
-
-    if (ir->nwall)
-    {
-        /* foreign lambda component for walls */
-        real dvdl_walls = do_walls(*ir, *fr, box, *md, x, &forceWithVirial, lambda[efptVDW],
-                                   enerd->grpp.ener[egLJSR].data(), nrnb);
-        enerd->dvdl_lin[efptVDW] += dvdl_walls;
-    }
-
-    /* Shift the coordinates. Must be done before listed forces and PPPM,
-     * but is also necessary for SHAKE and update, therefore it can NOT
-     * go when no listed forces have to be evaluated.
-     *
-     * The shifting and PBC code is deliberately not timed, since with
-     * the Verlet scheme it only takes non-zero time with triclinic
-     * boxes, and even then the time is around a factor of 100 less
-     * than the next smallest counter.
-     */
-
-
-    /* Here sometimes we would not need to shift with NBFonly,
-     * but we do so anyhow for consistency of the returned coordinates.
-     */
-    if (graph)
-    {
-        shift_self(graph, box, x);
-        if (TRICLINIC(box))
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, 2 * graph->nnodes);
-        }
-        else
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
-        }
-    }
-
-    {
-        t_pbc pbc;
-
-        /* Check whether we need to take into account PBC in listed interactions. */
-        const auto needPbcForListedForces =
-                fr->bMolPBC && stepWork.computeListedForces && haveCpuListedForces(*fr, *idef, *fcd);
-        if (needPbcForListedForces)
-        {
-            /* Since all atoms are in the rectangular or triclinic unit-cell,
-             * only single box vector shifts (2 in x) are required.
-             */
-            set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, TRUE, box);
-        }
-
-        do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, forceOutputs, fr, &pbc,
-                        graph, enerd, nrnb, lambda, md, fcd,
-                        DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, stepWork);
-    }
-
-    const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-                                 && thisRankHasDuty(cr, DUTY_PME)
-                                 && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU);
-
-    const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(*ir);
-
-    /* Do long-range electrostatics and/or LJ-PME
-     * and compute PME surface terms when necessary.
-     */
-    if (computePmeOnCpu || fr->ic->eeltype == eelEWALD || haveEwaldSurfaceTerm)
-    {
-        int  status = 0;
-        real Vlr_q = 0, Vlr_lj = 0;
-
-        /* We reduce all virial, dV/dlambda and energy contributions, except
-         * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
-         */
-        ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0];
-        clearEwaldThreadOutput(&ewaldOutput);
-
-        if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-        {
-            /* Calculate the Ewald surface force and energy contributions, when necessary */
-            if (haveEwaldSurfaceTerm)
-            {
-                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
-
-                if (fr->n_tpi > 0)
-                {
-                    gmx_fatal(FARGS,
-                              "TPI with PME currently only works in a 3D geometry with tin-foil "
-                              "boundary conditions");
-                }
-
-                int nthreads = fr->nthread_ewc;
-#pragma omp parallel for num_threads(nthreads) schedule(static)
-                for (int t = 0; t < nthreads; t++)
-                {
-                    try
-                    {
-                        ewald_corr_thread_t& ewc_t = fr->ewc_t[t];
-                        if (t > 0)
-                        {
-                            clearEwaldThreadOutput(&ewc_t);
-                        }
-
-                        /* Threading is only supported with the Verlet cut-off
-                         * scheme and then only single particle forces (no
-                         * exclusion forces) are calculated, so we can store
-                         * the forces in the normal, single forceWithVirial->force_ array.
-                         */
-                        ewald_LRcorrection(md->homenr, cr, nthreads, t, *fr, *ir, md->chargeA,
-                                           md->chargeB, (md->nChargePerturbed != 0), x, box, mu_tot,
-                                           as_rvec_array(forceWithVirial.force_.data()),
-                                           &ewc_t.Vcorr_q, lambda[efptCOUL], &ewc_t.dvdl[efptCOUL]);
-                    }
-                    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-                }
-                if (nthreads > 1)
-                {
-                    reduceEwaldThreadOuput(nthreads, fr->ewc_t);
-                }
-                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
-            }
-
-            if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
-            {
-                /* This is not in a subcounter because it takes a
-                   negligible and constant-sized amount of time */
-                ewaldOutput.Vcorr_q += ewald_charge_correction(
-                        cr, fr, lambda[efptCOUL], box, &ewaldOutput.dvdl[efptCOUL], ewaldOutput.vir_q);
-            }
-
-            if (computePmeOnCpu)
-            {
-                /* Do reciprocal PME for Coulomb and/or LJ. */
-                assert(fr->n_tpi >= 0);
-                if (fr->n_tpi == 0 || stepWork.stateChanged)
-                {
-                    int pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
-
-                    if (stepWork.computeForces)
-                    {
-                        pme_flags |= GMX_PME_CALC_F;
-                    }
-                    if (stepWork.computeVirial)
-                    {
-                        pme_flags |= GMX_PME_CALC_ENER_VIR;
-                    }
-                    if (fr->n_tpi > 0)
-                    {
-                        /* We don't calculate f, but we do want the potential */
-                        pme_flags |= GMX_PME_CALC_POT;
-                    }
-
-                    /* With domain decomposition we close the CPU side load
-                     * balancing region here, because PME does global
-                     * communication that acts as a global barrier.
-                     */
-                    ddBalanceRegionHandler.closeAfterForceComputationCpu();
-
-                    wallcycle_start(wcycle, ewcPMEMESH);
-                    status = gmx_pme_do(
-                            fr->pmedata,
-                            gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(),
-                                                        md->homenr - fr->n_tpi),
-                            forceWithVirial.force_, md->chargeA, md->chargeB, md->sqrt_c6A,
-                            md->sqrt_c6B, md->sigmaA, md->sigmaB, box, cr,
-                            DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
-                            DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, nrnb, wcycle,
-                            ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, &Vlr_lj,
-                            lambda[efptCOUL], lambda[efptVDW], &ewaldOutput.dvdl[efptCOUL],
-                            &ewaldOutput.dvdl[efptVDW], pme_flags);
-                    wallcycle_stop(wcycle, ewcPMEMESH);
-                    if (status != 0)
-                    {
-                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
-                    }
-
-                    /* We should try to do as little computation after
-                     * this as possible, because parallel PME synchronizes
-                     * the nodes, so we want all load imbalance of the
-                     * rest of the force calculation to be before the PME
-                     * call.  DD load balancing is done on the whole time
-                     * of the force call (without PME).
-                     */
-                }
-                if (fr->n_tpi > 0)
-                {
-                    if (EVDW_PME(ir->vdwtype))
-                    {
-
-                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
-                    }
-                    /* Determine the PME grid energy of the test molecule
-                     * with the PME grid potential of the other charges.
-                     */
-                    gmx_pme_calc_energy(
-                            fr->pmedata,
-                            coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
-                            gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
-                            &Vlr_q);
-                }
-            }
-        }
-
-        if (fr->ic->eeltype == eelEWALD)
-        {
-            Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial.force_.data()), md->chargeA,
-                             md->chargeB, box, cr, md->homenr, ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
-                             lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], fr->ewald_table);
-        }
-
-        /* Note that with separate PME nodes we get the real energies later */
-        // TODO it would be simpler if we just accumulated a single
-        // long-range virial contribution.
-        forceWithVirial.addVirialContribution(ewaldOutput.vir_q);
-        forceWithVirial.addVirialContribution(ewaldOutput.vir_lj);
-        enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
-        enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW];
-        enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q;
-        enerd->term[F_LJ_RECIP]   = Vlr_lj + ewaldOutput.Vcorr_lj;
-
-        if (debug)
-        {
-            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q,
-                    ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
-            pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
-            rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
-            pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
-            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj,
-                    ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
-            pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
-        }
-    }
-
-    if (debug)
-    {
-        print_nrnb(debug, nrnb);
-    }
-
-    if (debug)
-    {
-        rvec* fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
-        pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
-    }
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp
deleted file mode 100644
index a6ff208900..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief This file declares helper functionality for legacy option handling for mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "legacymdrunoptions.h"
-
-#include <cstring>
-
-#include "gromacs/math/functions.h"
-#include "gromacs/utility/arraysize.h"
-#include "gromacs/utility/fatalerror.h"
-
-namespace gmx
-{
-
-/*! \brief Return whether the command-line parameter that
- *  will trigger a multi-simulation is set */
-static bool is_multisim_option_set(int argc, const char* const argv[])
-{
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "-multidir") == 0)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-int LegacyMdrunOptions::updateFromCommandLine(int argc, char** argv, ArrayRef<const char*> desc)
-{
-    unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM;
-    // With -multidir, the working directory still needs to be
-    // changed, so we can't check for the existence of files during
-    // parsing.  It isn't useful to do any completion based on file
-    // system contents, either.
-    if (is_multisim_option_set(argc, argv))
-    {
-        PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING;
-    }
-
-    if (!parse_common_args(&argc, argv, PCA_Flags, ssize(filenames), filenames.data(), asize(pa),
-                           pa, ssize(desc), desc.data(), 0, nullptr, &oenv))
-    {
-        return 0;
-    }
-
-    // Handle the options that permits the user to either declare
-    // which compatible GPUs are availble for use, or to select a GPU
-    // task assignment. Either could be in an environment variable (so
-    // that there is a way to customize it, when using MPI in
-    // heterogeneous contexts).
-    {
-        // TODO Argument parsing can't handle std::string. We should
-        // fix that by changing the parsing, once more of the roles of
-        // handling, validating and implementing defaults for user
-        // command-line options have been seperated.
-        hw_opt.gpuIdsAvailable       = gpuIdsAvailable;
-        hw_opt.userGpuTaskAssignment = userGpuTaskAssignment;
-
-        const char* env = getenv("GMX_GPU_ID");
-        if (env != nullptr)
-        {
-            if (!hw_opt.gpuIdsAvailable.empty())
-            {
-                gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
-            }
-            hw_opt.gpuIdsAvailable = env;
-        }
-
-        env = getenv("GMX_GPUTASKS");
-        if (env != nullptr)
-        {
-            if (!hw_opt.userGpuTaskAssignment.empty())
-            {
-                gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time");
-            }
-            hw_opt.userGpuTaskAssignment = env;
-        }
-
-        if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty())
-        {
-            gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time");
-        }
-    }
-
-    hw_opt.threadAffinity = static_cast<ThreadAffinity>(nenum(thread_aff_opt_choices));
-
-    if (!opt2parg_bSet("-append", asize(pa), pa))
-    {
-        mdrunOptions.appendingBehavior = AppendingBehavior::Auto;
-    }
-    else
-    {
-        if (opt2parg_bool("-append", asize(pa), pa))
-        {
-            mdrunOptions.appendingBehavior = AppendingBehavior::Appending;
-        }
-        else
-        {
-            mdrunOptions.appendingBehavior = AppendingBehavior::NoAppending;
-        }
-    }
-
-    mdrunOptions.rerun            = opt2bSet("-rerun", ssize(filenames), filenames.data());
-    mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa);
-
-    domdecOptions.rankOrder    = static_cast<DdRankOrder>(nenum(ddrank_opt_choices));
-    domdecOptions.dlbOption    = static_cast<DlbOption>(nenum(dddlb_opt_choices));
-    domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]);
-    domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]);
-    domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]);
-
-    /* PLUMED */
-    plumedswitch=0;
-    if (opt2bSet("-plumed", static_cast<int>(filenames.size()), filenames.data())) plumedswitch=1;
-    if(plumedswitch){
-      int real_precision=sizeof(real);
-      real energyUnits=1.0;
-      real lengthUnits=1.0;
-      real timeUnits=1.0;
-  
-      if(!plumed_installed()){
-        gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable.");
-      }
-      plumedmain=plumed_create();
-      plumed_cmd(plumedmain,"setRealPrecision",&real_precision);
-      // this is not necessary for gromacs units:
-      plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits);
-      plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits);
-      plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits);
-      //
-      plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,static_cast<int>(filenames.size()), filenames.data()));
-      plumedswitch=1;
-    }
-    /* PLUMED HREX*/
-    if(getenv("PLUMED_HREX")) plumed_hrex=1;
-    if(plumed_hrex){
-      if(!plumedswitch) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) requires -plumed");
-      if(replExParams.exchangeInterval==0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) replica exchange");
-      if(replExParams.numExchanges!=0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) not compatible with -nex");
-    }
-    /* END PLUMED HREX */
-
-    /* END PLUMED */
-
-    return 1;
-}
-
-LegacyMdrunOptions::~LegacyMdrunOptions()
-{
-    output_env_done(oenv);
-}
-
-} // namespace gmx
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed
deleted file mode 100644
index 2b8e3a0760..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief This file declares helper functionality for legacy option handling for mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "legacymdrunoptions.h"
-
-#include <cstring>
-
-#include "gromacs/math/functions.h"
-#include "gromacs/utility/arraysize.h"
-#include "gromacs/utility/fatalerror.h"
-
-namespace gmx
-{
-
-/*! \brief Return whether the command-line parameter that
- *  will trigger a multi-simulation is set */
-static bool is_multisim_option_set(int argc, const char* const argv[])
-{
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "-multidir") == 0)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-int LegacyMdrunOptions::updateFromCommandLine(int argc, char** argv, ArrayRef<const char*> desc)
-{
-    unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM;
-    // With -multidir, the working directory still needs to be
-    // changed, so we can't check for the existence of files during
-    // parsing.  It isn't useful to do any completion based on file
-    // system contents, either.
-    if (is_multisim_option_set(argc, argv))
-    {
-        PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING;
-    }
-
-    if (!parse_common_args(&argc, argv, PCA_Flags, ssize(filenames), filenames.data(), asize(pa),
-                           pa, ssize(desc), desc.data(), 0, nullptr, &oenv))
-    {
-        return 0;
-    }
-
-    // Handle the options that permits the user to either declare
-    // which compatible GPUs are availble for use, or to select a GPU
-    // task assignment. Either could be in an environment variable (so
-    // that there is a way to customize it, when using MPI in
-    // heterogeneous contexts).
-    {
-        // TODO Argument parsing can't handle std::string. We should
-        // fix that by changing the parsing, once more of the roles of
-        // handling, validating and implementing defaults for user
-        // command-line options have been seperated.
-        hw_opt.gpuIdsAvailable       = gpuIdsAvailable;
-        hw_opt.userGpuTaskAssignment = userGpuTaskAssignment;
-
-        const char* env = getenv("GMX_GPU_ID");
-        if (env != nullptr)
-        {
-            if (!hw_opt.gpuIdsAvailable.empty())
-            {
-                gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
-            }
-            hw_opt.gpuIdsAvailable = env;
-        }
-
-        env = getenv("GMX_GPUTASKS");
-        if (env != nullptr)
-        {
-            if (!hw_opt.userGpuTaskAssignment.empty())
-            {
-                gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time");
-            }
-            hw_opt.userGpuTaskAssignment = env;
-        }
-
-        if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty())
-        {
-            gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time");
-        }
-    }
-
-    hw_opt.threadAffinity = static_cast<ThreadAffinity>(nenum(thread_aff_opt_choices));
-
-    if (!opt2parg_bSet("-append", asize(pa), pa))
-    {
-        mdrunOptions.appendingBehavior = AppendingBehavior::Auto;
-    }
-    else
-    {
-        if (opt2parg_bool("-append", asize(pa), pa))
-        {
-            mdrunOptions.appendingBehavior = AppendingBehavior::Appending;
-        }
-        else
-        {
-            mdrunOptions.appendingBehavior = AppendingBehavior::NoAppending;
-        }
-    }
-
-    mdrunOptions.rerun            = opt2bSet("-rerun", ssize(filenames), filenames.data());
-    mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa);
-
-    domdecOptions.rankOrder    = static_cast<DdRankOrder>(nenum(ddrank_opt_choices));
-    domdecOptions.dlbOption    = static_cast<DlbOption>(nenum(dddlb_opt_choices));
-    domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]);
-    domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]);
-    domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]);
-
-    return 1;
-}
-
-LegacyMdrunOptions::~LegacyMdrunOptions()
-{
-    output_env_done(oenv);
-}
-
-} // namespace gmx
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h
deleted file mode 100644
index 966e5a41a8..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- *
- * \brief This file declares helper functionality for legacy option handling for mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- * \inlibraryapi
- */
-#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H
-#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/commandline/pargs.h"
-#include "gromacs/domdec/options.h"
-#include "gromacs/hardware/hw_info.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-
-#include "replicaexchange.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain; 
-/* END PLUMED */
-
-/* PLUMED HREX */
-extern int plumed_hrex;
-/* END PLUMED HREX */
-
-namespace gmx
-
-{
-
-/*! \libinternal
- * \brief This class provides the same command-line option
- * functionality to both CLI and API sessions.
- *
- * This class should not exist, but is necessary now to introduce
- * support for the CLI and API without duplicating code. It should be
- * eliminated following the TODOs below.
- *
- * \todo Modules in mdrun should acquire proper option handling so
- * that all of these declarations and defaults are local to the
- * modules.
- *
- * \todo Contextual aspects, such as working directory
- * and environment variable handling are more properly
- * the role of SimulationContext, and should be moved there */
-class LegacyMdrunOptions
-{
-public:
-    //! Ongoing collection of mdrun options
-    MdrunOptions mdrunOptions;
-    //! Options for the domain decomposition.
-    DomdecOptions domdecOptions;
-    //! Parallelism-related user options.
-    gmx_hw_opt_t hw_opt;
-    //! Command-line override for the duration of a neighbor list with the Verlet scheme.
-    int nstlist_cmdline = 0;
-    //! Parameters for replica-exchange simulations.
-    ReplicaExchangeParameters replExParams;
-
-    //! Filename options to fill from command-line argument values.
-    std::vector<t_filenm> filenames = { { { efTPR, nullptr, nullptr, ffREAD },
-                                          { efTRN, "-o", nullptr, ffWRITE },
-                                          { efCOMPRESSED, "-x", nullptr, ffOPTWR },
-                                          { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING },
-                                          { efCPT, "-cpo", nullptr, ffOPTWR },
-                                          { efSTO, "-c", "confout", ffWRITE },
-                                          { efEDR, "-e", "ener", ffWRITE },
-                                          { efLOG, "-g", "md", ffWRITE },
-                                          { efXVG, "-dhdl", "dhdl", ffOPTWR },
-                                          { efXVG, "-field", "field", ffOPTWR },
-                                          { efXVG, "-table", "table", ffOPTRD },
-                                          { efXVG, "-tablep", "tablep", ffOPTRD },
-                                          { efXVG, "-tableb", "table", ffOPTRDMULT },
-                                          { efTRX, "-rerun", "rerun", ffOPTRD },
-                                          { efXVG, "-tpi", "tpi", ffOPTWR },
-                                          { efXVG, "-tpid", "tpidist", ffOPTWR },
-                                          { efEDI, "-ei", "sam", ffOPTRD },
-                                          { efXVG, "-eo", "edsam", ffOPTWR },
-                                          { efXVG, "-px", "pullx", ffOPTWR },
-                                          { efXVG, "-pf", "pullf", ffOPTWR },
-                                          { efXVG, "-ro", "rotation", ffOPTWR },
-                                          { efLOG, "-ra", "rotangles", ffOPTWR },
-                                          { efLOG, "-rs", "rotslabs", ffOPTWR },
-                                          { efLOG, "-rt", "rottorque", ffOPTWR },
-                                          { efMTX, "-mtx", "nm", ffOPTWR },
-                                          { efRND, "-multidir", nullptr, ffOPTRDMULT },
-                                          { efXVG, "-awh", "awhinit", ffOPTRD },
-                                          { efDAT, "-plumed", "plumed", ffOPTRD },  /* PLUMED */
-                                          { efDAT, "-membed", "membed", ffOPTRD },
-                                          { efTOP, "-mp", "membed", ffOPTRD },
-                                          { efNDX, "-mn", "membed", ffOPTRD },
-                                          { efXVG, "-if", "imdforces", ffOPTWR },
-                                          { efXVG, "-swap", "swapions", ffOPTWR } } };
-
-    //! Print a warning if any force is larger than this (in kJ/mol nm).
-    real pforce = -1;
-
-    //! The value of the -append option
-    bool appendOption = true;
-
-    /*! \brief Output context for writing text files
-     *
-     * \todo Clarify initialization, ownership, and lifetime. */
-    gmx_output_env_t* oenv = nullptr;
-
-    /*! \brief Command line options, defaults, docs and storage for them to fill. */
-    /*! \{ */
-    rvec        realddxyz                                                    = { 0, 0, 0 };
-    const char* ddrank_opt_choices[static_cast<int>(DdRankOrder::Count) + 1] = {
-        nullptr, "interleave", "pp_pme", "cartesian", nullptr
-    };
-    const char* dddlb_opt_choices[static_cast<int>(DlbOption::Count) + 1] = { nullptr, "auto", "no",
-                                                                              "yes", nullptr };
-    const char* thread_aff_opt_choices[static_cast<int>(ThreadAffinity::Count) + 1] = {
-        nullptr, "auto", "on", "off", nullptr
-    };
-    const char* nbpu_opt_choices[5]    = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* pme_opt_choices[5]     = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* pme_fft_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* bonded_opt_choices[5]  = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* update_opt_choices[5]  = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* gpuIdsAvailable        = "";
-    const char* userGpuTaskAssignment  = "";
-
-
-    ImdOptions& imdOptions = mdrunOptions.imdOptions;
-
-        t_pargs           pa[49] = {
-
-        { "-dd", FALSE, etRVEC, { &realddxyz }, "Domain decomposition grid, 0 is optimize" },
-        { "-ddorder", FALSE, etENUM, { ddrank_opt_choices }, "DD rank order" },
-        { "-npme",
-          FALSE,
-          etINT,
-          { &domdecOptions.numPmeRanks },
-          "Number of separate ranks to be used for PME, -1 is guess" },
-        { "-nt",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_tot },
-          "Total number of threads to start (0 is guess)" },
-        { "-ntmpi",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_tmpi },
-          "Number of thread-MPI ranks to start (0 is guess)" },
-        { "-ntomp",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_omp },
-          "Number of OpenMP threads per MPI rank to start (0 is guess)" },
-        { "-ntomp_pme",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_omp_pme },
-          "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" },
-        { "-pin",
-          FALSE,
-          etENUM,
-          { thread_aff_opt_choices },
-          "Whether mdrun should try to set thread affinities" },
-        { "-pinoffset",
-          FALSE,
-          etINT,
-          { &hw_opt.core_pinning_offset },
-          "The lowest logical core number to which mdrun should pin the first thread" },
-        { "-pinstride",
-          FALSE,
-          etINT,
-          { &hw_opt.core_pinning_stride },
-          "Pinning distance in logical cores for threads, use 0 to minimize the number of threads "
-          "per physical core" },
-        { "-gpu_id",
-          FALSE,
-          etSTR,
-          { &gpuIdsAvailable },
-          "List of unique GPU device IDs available to use" },
-        { "-gputasks",
-          FALSE,
-          etSTR,
-          { &userGpuTaskAssignment },
-          "List of GPU device IDs, mapping each PP task on each node to a device" },
-        { "-ddcheck",
-          FALSE,
-          etBOOL,
-          { &domdecOptions.checkBondedInteractions },
-          "Check for all bonded interactions with DD" },
-        { "-ddbondcomm",
-          FALSE,
-          etBOOL,
-          { &domdecOptions.useBondedCommunication },
-          "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
-        { "-rdd",
-          FALSE,
-          etREAL,
-          { &domdecOptions.minimumCommunicationRange },
-          "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial "
-          "coordinates" },
-        { "-rcon",
-          FALSE,
-          etREAL,
-          { &domdecOptions.constraintCommunicationRange },
-          "Maximum distance for P-LINCS (nm), 0 is estimate" },
-        { "-dlb", FALSE, etENUM, { dddlb_opt_choices }, "Dynamic load balancing (with DD)" },
-        { "-dds",
-          FALSE,
-          etREAL,
-          { &domdecOptions.dlbScaling },
-          "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in "
-          "order to "
-          "provide a margin in which dynamic load balancing can act while preserving the minimum "
-          "cell size." },
-        { "-ddcsx",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeX },
-          "HIDDENA string containing a vector of the relative sizes in the x "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsy",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeY },
-          "HIDDENA string containing a vector of the relative sizes in the y "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsz",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeZ },
-          "HIDDENA string containing a vector of the relative sizes in the z "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-nb", FALSE, etENUM, { nbpu_opt_choices }, "Calculate non-bonded interactions on" },
-        { "-nstlist",
-          FALSE,
-          etINT,
-          { &nstlist_cmdline },
-          "Set nstlist when using a Verlet buffer tolerance (0 is guess)" },
-        { "-tunepme",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.tunePme },
-          "Optimize PME load between PP/PME ranks or GPU/CPU" },
-        { "-pme", FALSE, etENUM, { pme_opt_choices }, "Perform PME calculations on" },
-        { "-pmefft", FALSE, etENUM, { pme_fft_opt_choices }, "Perform PME FFT calculations on" },
-        { "-bonded", FALSE, etENUM, { bonded_opt_choices }, "Perform bonded calculations on" },
-        { "-update", FALSE, etENUM, { update_opt_choices }, "Perform update and constraints on" },
-        { "-v", FALSE, etBOOL, { &mdrunOptions.verbose }, "Be loud and noisy" },
-        { "-pforce", FALSE, etREAL, { &pforce }, "Print all forces larger than this (kJ/mol nm)" },
-        { "-reprod",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.reproducible },
-          "Try to avoid optimizations that affect binary reproducibility" },
-        { "-cpt",
-          FALSE,
-          etREAL,
-          { &mdrunOptions.checkpointOptions.period },
-          "Checkpoint interval (minutes)" },
-        { "-cpnum",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles },
-          "Keep and number checkpoint files" },
-        { "-append",
-          FALSE,
-          etBOOL,
-          { &appendOption },
-          "Append to previous output files when continuing from checkpoint instead of adding the "
-          "simulation part number to all file names" },
-        { "-nsteps",
-          FALSE,
-          etINT64,
-          { &mdrunOptions.numStepsCommandline },
-          "Run this number of steps (-1 means infinite, -2 means use mdp option, smaller is "
-          "invalid)" },
-        { "-maxh",
-          FALSE,
-          etREAL,
-          { &mdrunOptions.maximumHoursToRun },
-          "Terminate after 0.99 times this time (hours)" },
-        { "-replex",
-          FALSE,
-          etINT,
-          { &replExParams.exchangeInterval },
-          "Attempt replica exchange periodically with this period (steps)" },
-        { "-nex",
-          FALSE,
-          etINT,
-          { &replExParams.numExchanges },
-          "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). "
-          " -nex zero or not specified gives neighbor replica exchange." },
-        { "-reseed",
-          FALSE,
-          etINT,
-          { &replExParams.randomSeed },
-          "Seed for replica exchange, -1 is generate a seed" },
-        { "-hrex",  FALSE, etBOOL, {&plumed_hrex}, /* PLUMED HREX */
-              "Enable hamiltonian replica exchange" },
-        { "-imdport", FALSE, etINT, { &imdOptions.port }, "HIDDENIMD listening port" },
-        { "-imdwait",
-          FALSE,
-          etBOOL,
-          { &imdOptions.wait },
-          "HIDDENPause the simulation while no IMD client is connected" },
-        { "-imdterm",
-          FALSE,
-          etBOOL,
-          { &imdOptions.terminatable },
-          "HIDDENAllow termination of the simulation from IMD client" },
-        { "-imdpull",
-          FALSE,
-          etBOOL,
-          { &imdOptions.pull },
-          "HIDDENAllow pulling in the simulation from IMD client" },
-        { "-rerunvsite",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.rerunConstructVsites },
-          "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
-        { "-confout",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.writeConfout },
-          "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last "
-          "step" },
-        { "-stepout",
-          FALSE,
-          etINT,
-          { &mdrunOptions.verboseStepPrintInterval },
-          "HIDDENFrequency of writing the remaining wall clock time for the run" },
-        { "-resetstep",
-          FALSE,
-          etINT,
-          { &mdrunOptions.timingOptions.resetStep },
-          "HIDDENReset cycle counters after these many time steps" },
-        { "-resethway",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.timingOptions.resetHalfway },
-          "HIDDENReset the cycle counters after half the number of steps or halfway "
-          "[TT]-maxh[tt]" }
-    };
-    /*! \} */
-
-    //! Parses the command-line input and prepares to start mdrun.
-    int updateFromCommandLine(int argc, char** argv, ArrayRef<const char*> desc);
-
-    ~LegacyMdrunOptions();
-};
-
-} // end namespace gmx
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed
deleted file mode 100644
index 796e479490..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- *
- * \brief This file declares helper functionality for legacy option handling for mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- * \inlibraryapi
- */
-#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H
-#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/commandline/pargs.h"
-#include "gromacs/domdec/options.h"
-#include "gromacs/hardware/hw_info.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-
-#include "replicaexchange.h"
-
-namespace gmx
-{
-
-/*! \libinternal
- * \brief This class provides the same command-line option
- * functionality to both CLI and API sessions.
- *
- * This class should not exist, but is necessary now to introduce
- * support for the CLI and API without duplicating code. It should be
- * eliminated following the TODOs below.
- *
- * \todo Modules in mdrun should acquire proper option handling so
- * that all of these declarations and defaults are local to the
- * modules.
- *
- * \todo Contextual aspects, such as working directory
- * and environment variable handling are more properly
- * the role of SimulationContext, and should be moved there */
-class LegacyMdrunOptions
-{
-public:
-    //! Ongoing collection of mdrun options
-    MdrunOptions mdrunOptions;
-    //! Options for the domain decomposition.
-    DomdecOptions domdecOptions;
-    //! Parallelism-related user options.
-    gmx_hw_opt_t hw_opt;
-    //! Command-line override for the duration of a neighbor list with the Verlet scheme.
-    int nstlist_cmdline = 0;
-    //! Parameters for replica-exchange simulations.
-    ReplicaExchangeParameters replExParams;
-
-    //! Filename options to fill from command-line argument values.
-    std::vector<t_filenm> filenames = { { { efTPR, nullptr, nullptr, ffREAD },
-                                          { efTRN, "-o", nullptr, ffWRITE },
-                                          { efCOMPRESSED, "-x", nullptr, ffOPTWR },
-                                          { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING },
-                                          { efCPT, "-cpo", nullptr, ffOPTWR },
-                                          { efSTO, "-c", "confout", ffWRITE },
-                                          { efEDR, "-e", "ener", ffWRITE },
-                                          { efLOG, "-g", "md", ffWRITE },
-                                          { efXVG, "-dhdl", "dhdl", ffOPTWR },
-                                          { efXVG, "-field", "field", ffOPTWR },
-                                          { efXVG, "-table", "table", ffOPTRD },
-                                          { efXVG, "-tablep", "tablep", ffOPTRD },
-                                          { efXVG, "-tableb", "table", ffOPTRDMULT },
-                                          { efTRX, "-rerun", "rerun", ffOPTRD },
-                                          { efXVG, "-tpi", "tpi", ffOPTWR },
-                                          { efXVG, "-tpid", "tpidist", ffOPTWR },
-                                          { efEDI, "-ei", "sam", ffOPTRD },
-                                          { efXVG, "-eo", "edsam", ffOPTWR },
-                                          { efXVG, "-px", "pullx", ffOPTWR },
-                                          { efXVG, "-pf", "pullf", ffOPTWR },
-                                          { efXVG, "-ro", "rotation", ffOPTWR },
-                                          { efLOG, "-ra", "rotangles", ffOPTWR },
-                                          { efLOG, "-rs", "rotslabs", ffOPTWR },
-                                          { efLOG, "-rt", "rottorque", ffOPTWR },
-                                          { efMTX, "-mtx", "nm", ffOPTWR },
-                                          { efRND, "-multidir", nullptr, ffOPTRDMULT },
-                                          { efXVG, "-awh", "awhinit", ffOPTRD },
-                                          { efDAT, "-membed", "membed", ffOPTRD },
-                                          { efTOP, "-mp", "membed", ffOPTRD },
-                                          { efNDX, "-mn", "membed", ffOPTRD },
-                                          { efXVG, "-if", "imdforces", ffOPTWR },
-                                          { efXVG, "-swap", "swapions", ffOPTWR } } };
-
-    //! Print a warning if any force is larger than this (in kJ/mol nm).
-    real pforce = -1;
-
-    //! The value of the -append option
-    bool appendOption = true;
-
-    /*! \brief Output context for writing text files
-     *
-     * \todo Clarify initialization, ownership, and lifetime. */
-    gmx_output_env_t* oenv = nullptr;
-
-    /*! \brief Command line options, defaults, docs and storage for them to fill. */
-    /*! \{ */
-    rvec        realddxyz                                                    = { 0, 0, 0 };
-    const char* ddrank_opt_choices[static_cast<int>(DdRankOrder::Count) + 1] = {
-        nullptr, "interleave", "pp_pme", "cartesian", nullptr
-    };
-    const char* dddlb_opt_choices[static_cast<int>(DlbOption::Count) + 1] = { nullptr, "auto", "no",
-                                                                              "yes", nullptr };
-    const char* thread_aff_opt_choices[static_cast<int>(ThreadAffinity::Count) + 1] = {
-        nullptr, "auto", "on", "off", nullptr
-    };
-    const char* nbpu_opt_choices[5]    = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* pme_opt_choices[5]     = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* pme_fft_opt_choices[5] = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* bonded_opt_choices[5]  = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* update_opt_choices[5]  = { nullptr, "auto", "cpu", "gpu", nullptr };
-    const char* gpuIdsAvailable        = "";
-    const char* userGpuTaskAssignment  = "";
-
-
-    ImdOptions& imdOptions = mdrunOptions.imdOptions;
-
-    t_pargs pa[48] = {
-
-        { "-dd", FALSE, etRVEC, { &realddxyz }, "Domain decomposition grid, 0 is optimize" },
-        { "-ddorder", FALSE, etENUM, { ddrank_opt_choices }, "DD rank order" },
-        { "-npme",
-          FALSE,
-          etINT,
-          { &domdecOptions.numPmeRanks },
-          "Number of separate ranks to be used for PME, -1 is guess" },
-        { "-nt",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_tot },
-          "Total number of threads to start (0 is guess)" },
-        { "-ntmpi",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_tmpi },
-          "Number of thread-MPI ranks to start (0 is guess)" },
-        { "-ntomp",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_omp },
-          "Number of OpenMP threads per MPI rank to start (0 is guess)" },
-        { "-ntomp_pme",
-          FALSE,
-          etINT,
-          { &hw_opt.nthreads_omp_pme },
-          "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" },
-        { "-pin",
-          FALSE,
-          etENUM,
-          { thread_aff_opt_choices },
-          "Whether mdrun should try to set thread affinities" },
-        { "-pinoffset",
-          FALSE,
-          etINT,
-          { &hw_opt.core_pinning_offset },
-          "The lowest logical core number to which mdrun should pin the first thread" },
-        { "-pinstride",
-          FALSE,
-          etINT,
-          { &hw_opt.core_pinning_stride },
-          "Pinning distance in logical cores for threads, use 0 to minimize the number of threads "
-          "per physical core" },
-        { "-gpu_id",
-          FALSE,
-          etSTR,
-          { &gpuIdsAvailable },
-          "List of unique GPU device IDs available to use" },
-        { "-gputasks",
-          FALSE,
-          etSTR,
-          { &userGpuTaskAssignment },
-          "List of GPU device IDs, mapping each PP task on each node to a device" },
-        { "-ddcheck",
-          FALSE,
-          etBOOL,
-          { &domdecOptions.checkBondedInteractions },
-          "Check for all bonded interactions with DD" },
-        { "-ddbondcomm",
-          FALSE,
-          etBOOL,
-          { &domdecOptions.useBondedCommunication },
-          "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
-        { "-rdd",
-          FALSE,
-          etREAL,
-          { &domdecOptions.minimumCommunicationRange },
-          "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial "
-          "coordinates" },
-        { "-rcon",
-          FALSE,
-          etREAL,
-          { &domdecOptions.constraintCommunicationRange },
-          "Maximum distance for P-LINCS (nm), 0 is estimate" },
-        { "-dlb", FALSE, etENUM, { dddlb_opt_choices }, "Dynamic load balancing (with DD)" },
-        { "-dds",
-          FALSE,
-          etREAL,
-          { &domdecOptions.dlbScaling },
-          "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in "
-          "order to "
-          "provide a margin in which dynamic load balancing can act while preserving the minimum "
-          "cell size." },
-        { "-ddcsx",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeX },
-          "HIDDENA string containing a vector of the relative sizes in the x "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsy",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeY },
-          "HIDDENA string containing a vector of the relative sizes in the y "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsz",
-          FALSE,
-          etSTR,
-          { &domdecOptions.cellSizeZ },
-          "HIDDENA string containing a vector of the relative sizes in the z "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-nb", FALSE, etENUM, { nbpu_opt_choices }, "Calculate non-bonded interactions on" },
-        { "-nstlist",
-          FALSE,
-          etINT,
-          { &nstlist_cmdline },
-          "Set nstlist when using a Verlet buffer tolerance (0 is guess)" },
-        { "-tunepme",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.tunePme },
-          "Optimize PME load between PP/PME ranks or GPU/CPU" },
-        { "-pme", FALSE, etENUM, { pme_opt_choices }, "Perform PME calculations on" },
-        { "-pmefft", FALSE, etENUM, { pme_fft_opt_choices }, "Perform PME FFT calculations on" },
-        { "-bonded", FALSE, etENUM, { bonded_opt_choices }, "Perform bonded calculations on" },
-        { "-update", FALSE, etENUM, { update_opt_choices }, "Perform update and constraints on" },
-        { "-v", FALSE, etBOOL, { &mdrunOptions.verbose }, "Be loud and noisy" },
-        { "-pforce", FALSE, etREAL, { &pforce }, "Print all forces larger than this (kJ/mol nm)" },
-        { "-reprod",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.reproducible },
-          "Try to avoid optimizations that affect binary reproducibility" },
-        { "-cpt",
-          FALSE,
-          etREAL,
-          { &mdrunOptions.checkpointOptions.period },
-          "Checkpoint interval (minutes)" },
-        { "-cpnum",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles },
-          "Keep and number checkpoint files" },
-        { "-append",
-          FALSE,
-          etBOOL,
-          { &appendOption },
-          "Append to previous output files when continuing from checkpoint instead of adding the "
-          "simulation part number to all file names" },
-        { "-nsteps",
-          FALSE,
-          etINT64,
-          { &mdrunOptions.numStepsCommandline },
-          "Run this number of steps (-1 means infinite, -2 means use mdp option, smaller is "
-          "invalid)" },
-        { "-maxh",
-          FALSE,
-          etREAL,
-          { &mdrunOptions.maximumHoursToRun },
-          "Terminate after 0.99 times this time (hours)" },
-        { "-replex",
-          FALSE,
-          etINT,
-          { &replExParams.exchangeInterval },
-          "Attempt replica exchange periodically with this period (steps)" },
-        { "-nex",
-          FALSE,
-          etINT,
-          { &replExParams.numExchanges },
-          "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). "
-          " -nex zero or not specified gives neighbor replica exchange." },
-        { "-reseed",
-          FALSE,
-          etINT,
-          { &replExParams.randomSeed },
-          "Seed for replica exchange, -1 is generate a seed" },
-        { "-imdport", FALSE, etINT, { &imdOptions.port }, "HIDDENIMD listening port" },
-        { "-imdwait",
-          FALSE,
-          etBOOL,
-          { &imdOptions.wait },
-          "HIDDENPause the simulation while no IMD client is connected" },
-        { "-imdterm",
-          FALSE,
-          etBOOL,
-          { &imdOptions.terminatable },
-          "HIDDENAllow termination of the simulation from IMD client" },
-        { "-imdpull",
-          FALSE,
-          etBOOL,
-          { &imdOptions.pull },
-          "HIDDENAllow pulling in the simulation from IMD client" },
-        { "-rerunvsite",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.rerunConstructVsites },
-          "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
-        { "-confout",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.writeConfout },
-          "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last "
-          "step" },
-        { "-stepout",
-          FALSE,
-          etINT,
-          { &mdrunOptions.verboseStepPrintInterval },
-          "HIDDENFrequency of writing the remaining wall clock time for the run" },
-        { "-resetstep",
-          FALSE,
-          etINT,
-          { &mdrunOptions.timingOptions.resetStep },
-          "HIDDENReset cycle counters after these many time steps" },
-        { "-resethway",
-          FALSE,
-          etBOOL,
-          { &mdrunOptions.timingOptions.resetHalfway },
-          "HIDDENReset the cycle counters after half the number of steps or halfway "
-          "[TT]-maxh[tt]" }
-    };
-    /*! \} */
-
-    //! Parses the command-line input and prepares to start mdrun.
-    int updateFromCommandLine(int argc, char** argv, ArrayRef<const char*> desc);
-
-    ~LegacyMdrunOptions();
-};
-
-} // end namespace gmx
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp
deleted file mode 100644
index bc21ddacf0..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp
+++ /dev/null
@@ -1,1897 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief Implements the integrator for normal molecular dynamics simulations
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include <cinttypes>
-#include <cmath>
-#include <cstdio>
-#include <cstdlib>
-
-#include <algorithm>
-#include <memory>
-
-#include "gromacs/awh/awh.h"
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/collect.h"
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_network.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/mdsetup.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_load_balancing.h"
-#include "gromacs/fileio/trxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/listed_forces/manage_threading.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/checkpointhandler.h"
-#include "gromacs/mdlib/compute_io.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/ebin.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/energyoutput.h"
-#include "gromacs/mdlib/expanded.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/mdoutf.h"
-#include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/resethandler.h"
-#include "gromacs/mdlib/sighandler.h"
-#include "gromacs/mdlib/simulationsignal.h"
-#include "gromacs/mdlib/stat.h"
-#include "gromacs/mdlib/stophandler.h"
-#include "gromacs/mdlib/tgroup.h"
-#include "gromacs/mdlib/trajectory_writing.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/update_constrain_cuda.h"
-#include "gromacs/mdlib/vcm.h"
-#include "gromacs/mdlib/vsite.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdtypes/awh_history.h"
-#include "gromacs/mdtypes/awh_params.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/df_history.h"
-#include "gromacs/mdtypes/energyhistory.h"
-#include "gromacs/mdtypes/fcdata.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/group.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/observableshistory.h"
-#include "gromacs/mdtypes/pullhistory.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/mdtypes/state_propagator_data_gpu.h"
-#include "gromacs/modularsimulator/energyelement.h"
-#include "gromacs/nbnxm/gpu_data_mgmt.h"
-#include "gromacs/nbnxm/nbnxm.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/output.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/atoms.h"
-#include "gromacs/topology/idef.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/trajectory/trajectoryframe.h"
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/real.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "legacysimulator.h"
-#include "replicaexchange.h"
-#include "shellfc.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-/* PLUMED HREX */
-extern int plumed_hrex;
-/* END PLUMED HREX */
-
-using gmx::SimulationSignaller;
-
-void gmx::LegacySimulator::do_md()
-{
-    // TODO Historically, the EM and MD "integrators" used different
-    // names for the t_inputrec *parameter, but these must have the
-    // same name, now that it's a member of a struct. We use this ir
-    // alias to avoid a large ripple of nearly useless changes.
-    // t_inputrec is being replaced by IMdpOptionsProvider, so this
-    // will go away eventually.
-    t_inputrec*  ir = inputrec;
-    int64_t      step, step_rel;
-    double       t, t0 = ir->init_t, lam0[efptNR];
-    gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
-    gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
-    gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
-    gmx_bool     do_ene, do_log, do_verbose;
-    gmx_bool     bMasterState;
-    unsigned int force_flags;
-    tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, tmp_vir = { { 0 } },
-           pres = { { 0 } };
-    int                         i, m;
-    rvec                        mu_tot;
-    matrix                      pressureCouplingMu, M;
-    gmx_repl_ex_t               repl_ex = nullptr;
-    gmx_localtop_t              top;
-    PaddedHostVector<gmx::RVec> f{};
-    gmx_global_stat_t           gstat;
-    t_graph*                    graph = nullptr;
-    gmx_shellfc_t*              shellfc;
-    gmx_bool                    bSumEkinhOld, bDoReplEx, bDoReplExPrev, bExchanged, bNeedRepartition;
-    gmx_bool                    bTemp, bPres, bTrotter;
-    real                        dvdl_constr;
-    std::vector<RVec>           cbuf;
-    matrix                      lastbox;
-    int                         lamnew = 0;
-    /* for FEP */
-    int       nstfep = 0;
-    double    cycles;
-    real      saved_conserved_quantity = 0;
-    real      last_ekin                = 0;
-    t_extmass MassQ;
-    char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
-
-    /* PME load balancing data for GPU kernels */
-    gmx_bool bPMETune         = FALSE;
-    gmx_bool bPMETunePrinting = FALSE;
-
-    bool bInteractiveMDstep = false;
-
-    /* PLUMED */
-    int plumedNeedsEnergy=0;
-    int plumedWantsToStop=0;
-    matrix plumed_vir;
-    real lambdaForce=0;
-    real realFepState=0;
-    /* END PLUMED */
-
-    /* Domain decomposition could incorrectly miss a bonded
-       interaction, but checking for that requires a global
-       communication stage, which does not otherwise happen in DD
-       code. So we do that alongside the first global energy reduction
-       after a new DD is made. These variables handle whether the
-       check happens, and the result it returns. */
-    bool shouldCheckNumberOfBondedInteractions = false;
-    int  totalNumberOfBondedInteractions       = -1;
-
-    SimulationSignals signals;
-    // Most global communnication stages don't propagate mdrun
-    // signals, and will use this object to achieve that.
-    SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
-
-    if (!mdrunOptions.writeConfout)
-    {
-        // This is on by default, and the main known use case for
-        // turning it off is for convenience in benchmarking, which is
-        // something that should not show up in the general user
-        // interface.
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -noconfout functionality is deprecated, and may be removed in a "
-                        "future version.");
-    }
-
-    /* md-vv uses averaged full step velocities for T-control
-       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
-       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
-    bTrotter = (EI_VV(ir->eI)
-                && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
-
-    const bool bRerunMD = false;
-
-    int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
-    bGStatEveryStep   = (nstglobalcomm == 1);
-
-    SimulationGroups* groups = &top_global->groups;
-
-    std::unique_ptr<EssentialDynamics> ed = nullptr;
-    if (opt2bSet("-ei", nfile, fnm))
-    {
-        /* Initialize essential dynamics sampling */
-        ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
-                        ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
-    }
-    else if (observablesHistory->edsamHistory)
-    {
-        gmx_fatal(FARGS,
-                  "The checkpoint is from a run with essential dynamics sampling, "
-                  "but the current run did not specify the -ei option. "
-                  "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
-    }
-
-    initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
-    Update     upd(ir, deform);
-    const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
-    const bool useReplicaExchange   = (replExParams.exchangeInterval > 0);
-
-    bool simulationsShareState = false;
-    int  nstSignalComm         = nstglobalcomm;
-    {
-        // TODO This implementation of ensemble orientation restraints is nasty because
-        // a user can't just do multi-sim with single-sim orientation restraints.
-        bool usingEnsembleRestraints =
-                (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
-        bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
-
-        // Replica exchange, ensemble restraints and AWH need all
-        // simulations to remain synchronized, so they need
-        // checkpoints and stop conditions to act on the same step, so
-        // the propagation of such signals must take place between
-        // simulations, not just within simulations.
-        // TODO: Make algorithm initializers set these flags.
-        simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim || (plumedswitch && ms); // PLUMED hack, if we have multiple sim and plumed we usually want them to be in sync 
-
-        if (simulationsShareState)
-        {
-            // Inter-simulation signal communication does not need to happen
-            // often, so we use a minimum of 200 steps to reduce overhead.
-            const int c_minimumInterSimulationSignallingInterval = 200;
-            nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
-                            * nstglobalcomm;
-        }
-    }
-
-    if (startingBehavior != StartingBehavior::RestartWithAppending)
-    {
-        pleaseCiteCouplingAlgorithms(fplog, *ir);
-    }
-    gmx_mdoutf* outf =
-            init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
-                        top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
-                                   mdoutf_get_fp_dhdl(outf), false, startingBehavior, mdModulesNotifier);
-
-    gstat = global_stat_init(ir);
-
-    /* Check for polarizable models and flexible constraints */
-    shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
-                                 ir->nstcalcenergy, DOMAINDECOMP(cr));
-
-    {
-        double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
-        if ((io > 2000) && MASTER(cr))
-        {
-            fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
-        }
-    }
-
-    // Local state only becomes valid now.
-    std::unique_ptr<t_state> stateInstance;
-    t_state*                 state;
-
-
-    auto mdatoms = mdAtoms->mdatoms();
-
-    std::unique_ptr<UpdateConstrainCuda> integrator;
-
-    if (DOMAINDECOMP(cr))
-    {
-        dd_init_local_top(*top_global, &top);
-
-        stateInstance = std::make_unique<t_state>();
-        state         = stateInstance.get();
-        dd_init_local_state(cr->dd, state_global, state);
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
-                            imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
-                            nrnb, nullptr, FALSE);
-        shouldCheckNumberOfBondedInteractions = true;
-        upd.setNumAtoms(state->natoms);
-    }
-    else
-    {
-        state_change_natoms(state_global, state_global->natoms);
-        f.resizeWithPadding(state_global->natoms);
-        /* Copy the pointer to the global state */
-        state = state_global;
-
-        /* Generate and initialize new topology */
-        mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
-
-        upd.setNumAtoms(state->natoms);
-    }
-
-    const auto& simulationWork     = runScheduleWork->simulationWork;
-    const bool  useGpuForPme       = simulationWork.useGpuPme;
-    const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
-    const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
-    const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
-
-    StatePropagatorDataGpu* stateGpu = fr->stateGpu;
-
-    if (useGpuForUpdate)
-    {
-        GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
-                                   || constr->numConstraintsTotal() == 0,
-                           "Constraints in domain decomposition are only supported with update "
-                           "groups if using GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
-                                   || constr->numConstraintsTotal() == 0,
-                           "SHAKE is not supported with GPU update.");
-        GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
-                           "Either PME or short-ranged non-bonded interaction tasks must run on "
-                           "the GPU to use GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->eI == eiMD,
-                           "Only the md integrator is supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(
-                ir->etc != etcNOSEHOOVER,
-                "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN,
-                           "Only Parrinello-Rahman and Berendsen pressure coupling are supported "
-                           "with the GPU update.\n");
-        GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
-                           "Virtual sites are not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ed == nullptr,
-                           "Essential dynamics is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(ir->pull),
-                           "Constraints pulling is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
-                           "Orientation restraints are not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->efep == efepNO,
-                           "Free energy perturbations are not supported with the GPU update.");
-        GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
-
-        if (constr != nullptr && constr->numConstraintsTotal() > 0)
-        {
-            GMX_LOG(mdlog.info)
-                    .asParagraph()
-                    .appendText("Updating coordinates and applying constraints on the GPU.");
-        }
-        else
-        {
-            GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
-        }
-        integrator = std::make_unique<UpdateConstrainCuda>(
-                *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
-
-        t_pbc pbc;
-        set_pbc(&pbc, epbcXYZ, state->box);
-        integrator->setPbc(&pbc);
-    }
-
-    if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
-    {
-        changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
-    }
-    if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
-    {
-        changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
-    }
-    if (useGpuForUpdate)
-    {
-        changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
-    }
-
-    // NOTE: The global state is no longer used at this point.
-    // But state_global is still used as temporary storage space for writing
-    // the global state to file and potentially for replica exchange.
-    // (Global topology should persist.)
-
-    update_mdatoms(mdatoms, state->lambda[efptMASS]);
-
-    if (ir->bExpanded)
-    {
-        /* Check nstexpanded here, because the grompp check was broken */
-        if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
-        {
-            gmx_fatal(FARGS,
-                      "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
-        }
-        init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist, mdlog);
-    }
-
-    if (MASTER(cr))
-    {
-        EnergyElement::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
-    }
-
-    preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
-                           startingBehavior != StartingBehavior::NewSimulation);
-
-    // TODO: Remove this by converting AWH into a ForceProvider
-    auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
-                                startingBehavior != StartingBehavior::NewSimulation,
-                                shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
-
-    if (useReplicaExchange && MASTER(cr))
-    {
-        repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
-    }
-    /* PME tuning is only supported in the Verlet scheme, with PME for
-     * Coulomb. It is not supported with only LJ PME. */
-    bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
-                && ir->cutoff_scheme != ecutsGROUP);
-
-    pme_load_balancing_t* pme_loadbal = nullptr;
-    if (bPMETune)
-    {
-        pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
-                         fr->nbv->useGpu());
-    }
-
-    if (!ir->bContinuation)
-    {
-        if (state->flags & (1U << estV))
-        {
-            auto v = makeArrayRef(state->v);
-            /* Set the velocities of vsites, shells and frozen atoms to zero */
-            for (i = 0; i < mdatoms->homenr; i++)
-            {
-                if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
-                {
-                    clear_rvec(v[i]);
-                }
-                else if (mdatoms->cFREEZE)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
-                        {
-                            v[i][m] = 0;
-                        }
-                    }
-                }
-            }
-        }
-
-        if (constr)
-        {
-            /* Constrain the initial coordinates and velocities */
-            do_constrain_first(fplog, constr, ir, mdatoms, state->natoms, state->x.arrayRefWithPadding(),
-                               state->v.arrayRefWithPadding(), state->box, state->lambda[efptBONDED]);
-        }
-        if (vsite)
-        {
-            /* Construct the virtual sites for the initial configuration */
-            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
-                             top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
-        }
-    }
-
-    if (ir->efep != efepNO)
-    {
-        /* Set free energy calculation frequency as the greatest common
-         * denominator of nstdhdl and repl_ex_nst. */
-        nstfep = ir->fepvals->nstdhdl;
-        if (ir->bExpanded)
-        {
-            nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
-        }
-        if (useReplicaExchange)
-        {
-            nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
-        }
-    }
-
-    /* Be REALLY careful about what flags you set here. You CANNOT assume
-     * this is the first step, since we might be restarting from a checkpoint,
-     * and in that case we should not do any modifications to the state.
-     */
-    bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
-
-    // When restarting from a checkpoint, it can be appropriate to
-    // initialize ekind from quantities in the checkpoint. Otherwise,
-    // compute_globals must initialize ekind before the simulation
-    // starts/restarts. However, only the master rank knows what was
-    // found in the checkpoint file, so we have to communicate in
-    // order to coordinate the restart.
-    //
-    // TODO Consider removing this communication if/when checkpoint
-    // reading directly follows .tpr reading, because all ranks can
-    // agree on hasReadEkinState at that time.
-    bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
-    }
-    if (hasReadEkinState)
-    {
-        restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
-    }
-
-    unsigned int cglo_flags =
-            (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
-             | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
-
-    bSumEkinhOld = FALSE;
-
-    t_vcm vcm(top_global->groups, *ir);
-    reportComRemovalInfo(fplog, vcm);
-
-    /* To minimize communication, compute_globals computes the COM velocity
-     * and the kinetic energy for the velocities without COM motion removed.
-     * Thus to get the kinetic energy without the COM contribution, we need
-     * to call compute_globals twice.
-     */
-    for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
-    {
-        unsigned int cglo_flags_iteration = cglo_flags;
-        if (bStopCM && cgloIteration == 0)
-        {
-            cglo_flags_iteration |= CGLO_STOPCM;
-            cglo_flags_iteration &= ~CGLO_TEMPERATURE;
-        }
-        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                        state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                        cglo_flags_iteration
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
-        if (cglo_flags_iteration & CGLO_STOPCM)
-        {
-            /* At initialization, do not pass x with acceleration-correction mode
-             * to avoid (incorrect) correction of the initial coordinates.
-             */
-            rvec* xPtr = nullptr;
-            if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
-            {
-                xPtr = state->x.rvec_array();
-            }
-            process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
-            inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-        }
-    }
-    checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
-                                    state->x.rvec_array(), state->box,
-                                    &shouldCheckNumberOfBondedInteractions);
-    if (ir->eI == eiVVAK)
-    {
-        /* a second call to get the half step temperature initialized as well */
-        /* we do the same call as above, but turn the pressure off -- internally to
-           compute_globals, this is recognized as a velocity verlet half-step
-           kinetic energy calculation.  This minimized excess variables, but
-           perhaps loses some logic?*/
-
-        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                        state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
-    }
-
-    /* Calculate the initial half step temperature, and save the ekinh_old */
-    if (startingBehavior == StartingBehavior::NewSimulation)
-    {
-        for (i = 0; (i < ir->opts.ngtc); i++)
-        {
-            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
-        }
-    }
-
-    /* need to make an initiation call to get the Trotter variables set, as well as other constants
-       for non-trotter temperature control */
-    auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
-
-    if (MASTER(cr))
-    {
-        if (!ir->bContinuation)
-        {
-            if (constr && ir->eConstrAlg == econtLINCS)
-            {
-                fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
-                        constr->rmsd());
-            }
-            if (EI_STATE_VELOCITY(ir->eI))
-            {
-                real temp = enerd->term[F_TEMP];
-                if (ir->eI != eiVV)
-                {
-                    /* Result of Ekin averaged over velocities of -half
-                     * and +half step, while we only have -half step here.
-                     */
-                    temp *= 2;
-                }
-                fprintf(fplog, "Initial temperature: %g K\n", temp);
-            }
-        }
-
-        char tbuf[20];
-        fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
-        if (ir->nsteps >= 0)
-        {
-            sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
-        }
-        else
-        {
-            sprintf(tbuf, "%s", "infinite");
-        }
-        if (ir->init_step > 0)
-        {
-            fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
-                    gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
-                    gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
-        }
-        else
-        {
-            fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
-        }
-        fprintf(fplog, "\n");
-    }
-
-    /* PLUMED */
-    if(plumedswitch){
-      /* detect plumed API version */
-      int pversion=0;
-      plumed_cmd(plumedmain,"getApiVersion",&pversion);
-      /* setting kbT is only implemented with api>1) */
-      real kbT=ir->opts.ref_t[0]*BOLTZ;
-      if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT);
-      if(pversion>2){
-        int res=1;
-        if( (startingBehavior != StartingBehavior::NewSimulation) ) plumed_cmd(plumedmain,"setRestart",&res);
-      }
-
-      if(ms && ms->nsim>1) {
-        if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mpi_comm_masters);
-        if(PAR(cr)){
-          if(DOMAINDECOMP(cr)) {
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
-          }else{
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
-          }
-        }
-        plumed_cmd(plumedmain,"GREX init",NULL);
-      }
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
-        }
-      }
-      plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
-      plumed_cmd(plumedmain,"setMDEngine","gromacs");
-      plumed_cmd(plumedmain,"setLog",fplog);
-      real real_delta_t=ir->delta_t;
-      plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
-      plumed_cmd(plumedmain,"init",NULL);
-
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          int nat_home = dd_numHomeAtoms(*cr->dd);
-          plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
-          plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
-        }
-      }
-      realFepState = state->fep_state;
-      plumed_cmd(plumedmain, "setExtraCV lambda", &realFepState);
-      plumed_cmd(plumedmain, "setExtraCVForce lambda", &lambdaForce);
-    }
-    /* END PLUMED */
-
-    walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, "mdrun");
-
-    /***********************************************************
-     *
-     *             Loop over MD steps
-     *
-     ************************************************************/
-
-    bFirstStep = TRUE;
-    /* Skip the first Nose-Hoover integration when we get the state from tpx */
-    bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
-    bSumEkinhOld     = FALSE;
-    bExchanged       = FALSE;
-    bNeedRepartition = FALSE;
-    bDoReplEx        = FALSE;
-
-    step     = ir->init_step;
-    step_rel = 0;
-
-    auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
-            compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
-            MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
-            mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
-
-    auto checkpointHandler = std::make_unique<CheckpointHandler>(
-            compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
-            ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
-            mdrunOptions.checkpointOptions.period);
-
-    const bool resetCountersIsLocal = true;
-    auto       resetHandler         = std::make_unique<ResetHandler>(
-            compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
-            !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
-            mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
-
-    const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
-
-    // TODO extract this to new multi-simulation module
-    if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
-    {
-        if (!multisim_int_all_are_equal(ms, ir->nsteps))
-        {
-            GMX_LOG(mdlog.warning)
-                    .appendText(
-                            "Note: The number of steps is not consistent across multi "
-                            "simulations,\n"
-                            "but we are proceeding anyway!");
-        }
-        if (!multisim_int_all_are_equal(ms, ir->init_step))
-        {
-            if (simulationsShareState)
-            {
-                if (MASTER(cr))
-                {
-                    gmx_fatal(FARGS,
-                              "The initial step is not consistent across multi simulations which "
-                              "share the state");
-                }
-                gmx_barrier(cr);
-            }
-            else
-            {
-                GMX_LOG(mdlog.warning)
-                        .appendText(
-                                "Note: The initial step is not consistent across multi "
-                                "simulations,\n"
-                                "but we are proceeding anyway!");
-            }
-        }
-    }
-
-    /* and stop now if we should */
-    bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
-    while (!bLastStep)
-    {
-
-        /* Determine if this is a neighbor search step */
-        bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
-
-        if (bPMETune && bNStList)
-        {
-            // This has to be here because PME load balancing is called so early.
-            // TODO: Move to after all booleans are defined.
-            if (useGpuForUpdate && !bFirstStep)
-            {
-                stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-                stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-            }
-            /* PME grid + cut-off optimization with GPUs or PME nodes */
-            pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
-                           fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
-                           &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
-        }
-
-        wallcycle_start(wcycle, ewcSTEP);
-
-        bLastStep = (step_rel == ir->nsteps);
-        t         = t0 + step * ir->delta_t;
-
-        // TODO Refactor this, so that nstfep does not need a default value of zero
-        if (ir->efep != efepNO || ir->bSimTemp)
-        {
-            /* find and set the current lambdas */
-            setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
-
-            bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
-            bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
-            bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
-                           && (!bFirstStep));
-        }
-
-        bDoReplExPrev = bDoReplEx;
-        bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
-                     && do_per_step(step, replExParams.exchangeInterval));
-
-        if (doSimulatedAnnealing)
-        {
-            update_annealing_target_temp(ir, t, &upd);
-        }
-
-        /* Stop Center of Mass motion */
-        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
-
-        /* Determine whether or not to do Neighbour Searching */
-        bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
-
-        /* Note that the stopHandler will cause termination at nstglobalcomm
-         * steps. Since this concides with nstcalcenergy, nsttcouple and/or
-         * nstpcouple steps, we have computed the half-step kinetic energy
-         * of the previous step and can always output energies at the last step.
-         */
-        bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
-
-        /* do_log triggers energy and virial calculation. Because this leads
-         * to different code paths, forces can be different. Thus for exact
-         * continuation we should avoid extra log output.
-         * Note that the || bLastStep can result in non-exact continuation
-         * beyond the last step. But we don't consider that to be an issue.
-         */
-        do_log     = (do_per_step(step, ir->nstlog)
-                  || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
-        do_verbose = mdrunOptions.verbose
-                     && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
-
-        if (useGpuForUpdate && !bFirstStep && bNS)
-        {
-            // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
-            stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-            // Copy coordinate from the GPU when needed at the search step.
-            // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
-            // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
-            stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-        }
-
-        if (bNS && !(bFirstStep && ir->bContinuation))
-        {
-            bMasterState = FALSE;
-            /* Correct the new box if it is too skewed */
-            if (inputrecDynamicBox(ir))
-            {
-                if (correct_box(fplog, step, state->box, graph))
-                {
-                    bMasterState = TRUE;
-                    // If update is offloaded, it should be informed about the box size change
-                    if (useGpuForUpdate)
-                    {
-                        t_pbc pbc;
-                        set_pbc(&pbc, epbcXYZ, state->box);
-                        integrator->setPbc(&pbc);
-                    }
-                }
-            }
-            if (DOMAINDECOMP(cr) && bMasterState)
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-
-            if (DOMAINDECOMP(cr))
-            {
-                /* Repartition the domain decomposition */
-                dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
-                                    *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
-                                    fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
-                shouldCheckNumberOfBondedInteractions = true;
-                upd.setNumAtoms(state->natoms);
-
-                /* PLUMED */
-                if(plumedswitch){
-                  int nat_home = dd_numHomeAtoms(*cr->dd);
-                  plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
-                  plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
-                }
-                /* END PLUMED */
-            }
-        }
-
-        if (MASTER(cr) && do_log)
-        {
-            energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
-        }
-
-        if (ir->efep != efepNO)
-        {
-            update_mdatoms(mdatoms, state->lambda[efptMASS]);
-        }
-
-        if (bExchanged)
-        {
-
-            /* We need the kinetic energy at minus the half step for determining
-             * the full step kinetic energy and possibly for T-coupling.*/
-            /* This may not be quite working correctly yet . . . . */
-            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                            nullptr, nullptr, nullptr, nullptr, mu_tot, constr, &nullSignaller,
-                            state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                            CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
-            checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
-                                            &top, state->x.rvec_array(), state->box,
-                                            &shouldCheckNumberOfBondedInteractions);
-        }
-        clear_mat(force_vir);
-
-        /* PLUMED HREX */
-        gmx_bool bHREX = bDoReplEx && plumed_hrex;
-
-        if (plumedswitch && bHREX) {
-          // gmx_enerdata_t *hrex_enerd;
-          int nlambda = enerd->enerpart_lambda.end() - enerd->enerpart_lambda.begin();
-          gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? 0 : nlambda - 1);
-          int repl  = -1;
-          int nrepl = -1;
-          if (MASTER(cr)){
-            repl  = replica_exchange_get_repl(repl_ex);
-            nrepl = replica_exchange_get_nrepl(repl_ex);
-          }
-
-          if (DOMAINDECOMP(cr)) {
-            dd_collect_state(cr->dd,state,state_global);
-          } else {
-            copy_state_serial(state, state_global);
-          }
-
-          if(MASTER(cr)){
-            if(repl%2==step/replExParams.exchangeInterval%2){
-              if(repl-1>=0) exchange_state(ms,repl-1,state_global);
-            }else{
-              if(repl+1<nrepl) exchange_state(ms,repl+1,state_global);
-            }
-          }
-          if (!DOMAINDECOMP(cr)) {
-            copy_state_serial(state_global, state);
-          }
-          if(PAR(cr)){
-            if (DOMAINDECOMP(cr)) {
-              dd_partition_system(fplog,mdlog,step,cr,TRUE,1,
-                                  state_global,*top_global,ir,
-                                  imdSession, pull_work,
-                                  state,&f,mdAtoms,&top,fr,vsite,constr,
-                                  nrnb,wcycle,FALSE);
-            }
-          }
-          do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
-                   nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
-                   f.arrayRefWithPadding(), force_vir, mdatoms, &hrex_enerd, fcd, state->lambda, graph,
-                   fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
-                   GMX_FORCE_STATECHANGED |
-                   GMX_FORCE_DYNAMICBOX |
-                   GMX_FORCE_ALLFORCES |
-                   GMX_FORCE_VIRIAL |
-                   GMX_FORCE_ENERGY |
-                   GMX_FORCE_DHDL |
-                   GMX_FORCE_NS,
-                   ddBalanceRegionHandler);
-
-          plumed_cmd(plumedmain,"GREX cacheLocalUSwap",&(&hrex_enerd)->term[F_EPOT]);
-
-          /* exchange back */
-          if (DOMAINDECOMP(cr)) {
-            dd_collect_state(cr->dd,state,state_global);
-          } else {
-            copy_state_serial(state, state_global);
-          }
-
-          if(MASTER(cr)){
-            if(repl%2==step/replExParams.exchangeInterval%2){
-              if(repl-1>=0) exchange_state(ms,repl-1,state_global);
-            }else{
-              if(repl+1<nrepl) exchange_state(ms,repl+1,state_global);
-            }
-          }
-
-          if (!DOMAINDECOMP(cr)) {
-            copy_state_serial(state_global, state);
-          }
-          if(PAR(cr)){
-            if (DOMAINDECOMP(cr)) {
-              dd_partition_system(fplog,mdlog,step,cr,TRUE,1,
-                                  state_global,*top_global,ir,
-                                  imdSession, pull_work,
-                                  state,&f,mdAtoms,&top,fr,vsite,constr,
-                                  nrnb,wcycle,FALSE);
-              int nat_home = dd_numHomeAtoms(*cr->dd);
-              plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
-              plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
-            }
-          }
-          bNS=true;
-        }
-        /* END PLUMED HREX */
-
-        checkpointHandler->decideIfCheckpointingThisStep(bNS||bDoReplExPrev, bFirstStep, bLastStep);
-
-        /* Determine the energy and pressure:
-         * at nstcalcenergy steps and at energy output steps (set below).
-         */
-        if (EI_VV(ir->eI) && (!bInitStep))
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep
-                       || (ir->epc != epcNO
-                           && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
-        }
-        else
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
-        }
-        bCalcEner = bCalcEnerStep;
-
-        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
-
-        if (do_ene || do_log || bDoReplEx)
-        {
-            bCalcVir  = TRUE;
-            bCalcEner = TRUE;
-        }
-
-        /* Do we need global communication ? */
-        bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
-                  || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
-
-        force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
-                       | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
-                       | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
-
-        if (shellfc)
-        {
-            /* Now is the time to relax the shells */
-            relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
-                                imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
-                                state->natoms, state->x.arrayRefWithPadding(),
-                                state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
-                                f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
-                                shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
-        }
-        else
-        {
-            /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
-               is updated (or the AWH update will be performed twice for one step when continuing).
-               It would be best to call this update function from do_md_trajectory_writing but that
-               would occur after do_force. One would have to divide the update_awh function into one
-               function applying the AWH force and one doing the AWH bias update. The update AWH
-               bias function could then be called after do_md_trajectory_writing (then containing
-               update_awh_history). The checkpointing will in the future probably moved to the start
-               of the md loop which will rid of this issue. */
-            if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
-            {
-                awh->updateHistory(state_global->awhHistory.get());
-            }
-
-            /* The coordinates (x) are shifted (to get whole molecules)
-             * in do_force.
-             * This is parallellized as well, and does communication too.
-             * Check comments in sim_util.c
-             */
-
-            /* PLUMED */
-            plumedNeedsEnergy=0;
-            if(plumedswitch){
-              int pversion=0;
-              plumed_cmd(plumedmain,"getApiVersion",&pversion);
-              long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep);
-              plumed_cmd(plumedmain,"setPositions",&state->x[0][0]);
-              plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]);
-              plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]);
-              plumed_cmd(plumedmain,"setBox",&state->box[0][0]);
-              plumed_cmd(plumedmain,"prepareCalc",NULL);
-              plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop);
-              int checkp=0; if(checkpointHandler->isCheckpointingStep()) checkp=1;
-              if(pversion>3) plumed_cmd(plumedmain,"doCheckPoint",&checkp);
-              plumed_cmd(plumedmain,"setForces",&f[0][0]);
-              plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-              if(plumedNeedsEnergy) force_flags |= GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL;
-              clear_mat(plumed_vir);
-              plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]);
-            }
-            /* END PLUMED */
-            do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
-                     nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
-                     f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
-                     fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
-                     (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
-            /* PLUMED */
-            if(plumedswitch){
-              if(plumedNeedsEnergy){
-                msmul(force_vir,2.0,plumed_vir);
-                plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
-                plumed_cmd(plumedmain,"performCalc",NULL);
-                msmul(plumed_vir,0.5,force_vir);
-              } else {
-                msmul(plumed_vir,0.5,plumed_vir);
-                m_add(force_vir,plumed_vir,force_vir);
-              }
-              if(bDoReplEx) plumed_cmd(plumedmain,"GREX savePositions",NULL);
-              if(plumedWantsToStop) ir->nsteps=step_rel+1;
-              if(bHREX) plumed_cmd(plumedmain,"GREX cacheLocalUNow",&enerd->term[F_EPOT]);
-            }
-            /* END PLUMED */
-        }
-
-        // VV integrators do not need the following velocity half step
-        // if it is the first step after starting from a checkpoint.
-        // That is, the half step is needed on all other steps, and
-        // also the first step when starting from a .tpr file.
-        if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
-        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
-        {
-            rvec* vbuf = nullptr;
-
-            wallcycle_start(wcycle, ewcUPDATE);
-            if (ir->eI == eiVV && bInitStep)
-            {
-                /* if using velocity verlet with full time step Ekin,
-                 * take the first half step only to compute the
-                 * virial for the first step. From there,
-                 * revert back to the initial coordinates
-                 * so that the input is actually the initial step.
-                 */
-                snew(vbuf, state->natoms);
-                copy_rvecn(state->v.rvec_array(), vbuf, 0,
-                           state->natoms); /* should make this better for parallelizing? */
-            }
-            else
-            {
-                /* this is for NHC in the Ekin(t+dt/2) version of vv */
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
-                               trotter_seq, ettTSEQ1);
-            }
-
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtVELOCITY1, cr, constr);
-
-            wallcycle_stop(wcycle, ewcUPDATE);
-            constrain_velocities(step, nullptr, state, shake_vir, constr, bCalcVir, do_log, do_ene);
-            wallcycle_start(wcycle, ewcUPDATE);
-            /* if VV, compute the pressure and constraints */
-            /* For VV2, we strictly only need this if using pressure
-             * control, but we really would like to have accurate pressures
-             * printed out.
-             * Think about ways around this in the future?
-             * For now, keep this choice in comments.
-             */
-            /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
-            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
-            bPres = TRUE;
-            bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
-            if (bCalcEner && ir->eI == eiVVAK)
-            {
-                bSumEkinhOld = TRUE;
-            }
-            /* for vv, the first half of the integration actually corresponds to the previous step.
-               So we need information from the last step in the first half of the integration */
-            if (bGStat || do_per_step(step - 1, nstglobalcomm))
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                                state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                                force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                                state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                                (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
-                                        | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
-                                        | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
-                                        | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                                 : 0)
-                                        | CGLO_SCALEEKIN);
-                /* explanation of above:
-                   a) We compute Ekin at the full time step
-                   if 1) we are using the AveVel Ekin, and it's not the
-                   initial step, or 2) if we are using AveEkin, but need the full
-                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
-                   b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
-                   EkinAveVel because it's needed for the pressure */
-                checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
-                                                top_global, &top, state->x.rvec_array(), state->box,
-                                                &shouldCheckNumberOfBondedInteractions);
-                if (bStopCM)
-                {
-                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
-                                           state->v.rvec_array());
-                    inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-                }
-                wallcycle_start(wcycle, ewcUPDATE);
-            }
-            /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
-            if (!bInitStep)
-            {
-                if (bTrotter)
-                {
-                    m_add(force_vir, shake_vir,
-                          total_vir); /* we need the un-dispersion corrected total vir here */
-                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
-                                   trotter_seq, ettTSEQ2);
-
-                    /* TODO This is only needed when we're about to write
-                     * a checkpoint, because we use it after the restart
-                     * (in a kludge?). But what should we be doing if
-                     * the startingBehavior is NewSimulation or bInitStep are true? */
-                    if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
-                    {
-                        copy_mat(shake_vir, state->svir_prev);
-                        copy_mat(force_vir, state->fvir_prev);
-                    }
-                    if ((inputrecNptTrotter(ir) || inputrecNvtTrotter(ir)) && ir->eI == eiVV)
-                    {
-                        /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
-                        enerd->term[F_TEMP] =
-                                sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
-                        enerd->term[F_EKIN] = trace(ekind->ekin);
-                    }
-                }
-                else if (bExchanged)
-                {
-                    wallcycle_stop(wcycle, ewcUPDATE);
-                    /* We need the kinetic energy at minus the half step for determining
-                     * the full step kinetic energy and possibly for T-coupling.*/
-                    /* This may not be quite working correctly yet . . . . */
-                    compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(),
-                                    state->v.rvec_array(), state->box, state->lambda[efptVDW],
-                                    mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
-                                    nullptr, mu_tot, constr, &nullSignaller, state->box, nullptr,
-                                    &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
-                    wallcycle_start(wcycle, ewcUPDATE);
-                }
-            }
-            /* if it's the initial step, we performed this first step just to get the constraint virial */
-            if (ir->eI == eiVV && bInitStep)
-            {
-                copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
-                sfree(vbuf);
-            }
-            wallcycle_stop(wcycle, ewcUPDATE);
-        }
-
-        /* compute the conserved quantity */
-        if (EI_VV(ir->eI))
-        {
-            saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
-            if (ir->eI == eiVV)
-            {
-                last_ekin = enerd->term[F_EKIN];
-            }
-            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
-            {
-                saved_conserved_quantity -= enerd->term[F_DISPCORR];
-            }
-            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
-            if (ir->efep != efepNO)
-            {
-                sum_dhdl(enerd, state->lambda, *ir->fepvals);
-            }
-        }
-
-        /* ########  END FIRST UPDATE STEP  ############## */
-        /* ########  If doing VV, we now have v(dt) ###### */
-        if (bDoExpanded)
-        {
-            /* perform extended ensemble sampling in lambda - we don't
-               actually move to the new state before outputting
-               statistics, but if performing simulated tempering, we
-               do update the velocities and the tau_t. */
-
-            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
-                                              state->dfhist, step, state->v.rvec_array(), mdatoms, &realFepState);
-            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
-            if (MASTER(cr))
-            {
-                copy_df_history(state_global->dfhist, state->dfhist);
-            }
-        }
-
-        // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
-        // coordinates have not already been copied for i) search or ii) CPU force tasks.
-        if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
-            && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
-                || checkpointHandler->isCheckpointingStep()))
-        {
-            stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-        }
-        // Copy velocities if needed for the output/checkpointing.
-        // NOTE: Copy on the search steps is done at the beginning of the step.
-        if (useGpuForUpdate && !bNS
-            && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
-        {
-            stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-        }
-        // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
-        // and update is offloaded hence forces are kept on the GPU for update and have not been
-        // already transferred in do_force().
-        // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
-        //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
-        //       prior to GPU update.
-        // TODO: When the output flags will be included in step workload, this copy can be combined with the
-        //       copy call in do_force(...).
-        // NOTE: The forces should not be copied here if the vsites are present, since they were modified
-        //       on host after the D2H copy in do_force(...).
-        if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
-            && do_per_step(step, ir->nstfout))
-        {
-            stateGpu->copyForcesFromGpu(ArrayRef<RVec>(f), AtomLocality::Local);
-            stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
-        }
-        /* Now we have the energies and forces corresponding to the
-         * coordinates at time t. We must output all of this before
-         * the update.
-         */
-        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
-                                 observablesHistory, top_global, fr, outf, energyOutput, ekind, f,
-                                 checkpointHandler->isCheckpointingStep(), bRerunMD, bLastStep,
-                                 mdrunOptions.writeConfout, bSumEkinhOld);
-        /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
-        bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
-
-        /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
-        if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
-            && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
-        {
-            copy_mat(state->svir_prev, shake_vir);
-            copy_mat(state->fvir_prev, force_vir);
-        }
-
-        stopHandler->setSignal();
-        resetHandler->setSignal(walltime_accounting);
-
-        if (bGStat || !PAR(cr))
-        {
-            /* In parallel we only have to check for checkpointing in steps
-             * where we do global communication,
-             *  otherwise the other nodes don't know.
-             */
-            checkpointHandler->setSignal(walltime_accounting);
-        }
-
-        /* #########   START SECOND UPDATE STEP ################# */
-
-        /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
-           controlled in preprocessing */
-
-        if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
-        {
-            gmx_bool bIfRandomize;
-            bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
-            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
-            if (constr && bIfRandomize)
-            {
-                constrain_velocities(step, nullptr, state, tmp_vir, constr, bCalcVir, do_log, do_ene);
-            }
-        }
-        /* Box is changed in update() when we do pressure coupling,
-         * but we should still use the old box for energy corrections and when
-         * writing it to the energy file, so it matches the trajectory files for
-         * the same timestep above. Make a copy in a separate array.
-         */
-        copy_mat(state->box, lastbox);
-
-        dvdl_constr = 0;
-
-        wallcycle_start(wcycle, ewcUPDATE);
-        /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
-        if (bTrotter)
-        {
-            trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
-            /* We can only do Berendsen coupling after we have summed
-             * the kinetic energy or virial. Since the happens
-             * in global_state after update, we should only do it at
-             * step % nstlist = 1 with bGStatEveryStep=FALSE.
-             */
-        }
-        else
-        {
-            update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
-            update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
-        }
-
-        if (EI_VV(ir->eI))
-        {
-            /* velocity half-step update */
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtVELOCITY2, cr, constr);
-        }
-
-        /* Above, initialize just copies ekinh into ekin,
-         * it doesn't copy position (for VV),
-         * and entire integrator for MD.
-         */
-
-        if (ir->eI == eiVVAK)
-        {
-            cbuf.resize(state->x.size());
-            std::copy(state->x.begin(), state->x.end(), cbuf.begin());
-        }
-
-        /* With leap-frog type integrators we compute the kinetic energy
-         * at a whole time step as the average of the half-time step kinetic
-         * energies of two subsequent steps. Therefore we need to compute the
-         * half step kinetic energy also if we need energies at the next step.
-         */
-        const bool needHalfStepKineticEnergy =
-                (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
-
-        // Parrinello-Rahman requires the pressure to be availible before the update to compute
-        // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
-        const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
-                                         && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
-
-        if (useGpuForUpdate)
-        {
-            if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
-            {
-                integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
-                                stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
-
-                // Copy data to the GPU after buffers might have being reinitialized
-                stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
-            }
-
-            // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
-            if (!runScheduleWork->stepWork.useGpuFBufferOps)
-            {
-                stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::Local);
-            }
-
-            const bool doTemperatureScaling =
-                    (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
-
-            // This applies Leap-Frog, LINCS and SETTLE in succession
-            integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(
-                                          AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
-                                  ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling,
-                                  ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
-
-            // Copy velocities D2H after update if:
-            // - Globals are computed this step (includes the energy output steps).
-            // - Temperature is needed for the next step.
-            if (bGStat || needHalfStepKineticEnergy)
-            {
-                stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-                stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-            }
-        }
-        else
-        {
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtPOSITION, cr, constr);
-
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            constrain_coordinates(step, &dvdl_constr, state, shake_vir, &upd, constr, bCalcVir,
-                                  do_log, do_ene);
-
-            update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
-                                  constr, do_log, do_ene);
-            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
-        }
-
-        if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
-        {
-            updatePrevStepPullCom(pull_work, state);
-        }
-
-        if (ir->eI == eiVVAK)
-        {
-            /* erase F_EKIN and F_TEMP here? */
-            /* just compute the kinetic energy at the half step to perform a trotter step */
-            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                            force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller, lastbox,
-                            nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
-            wallcycle_start(wcycle, ewcUPDATE);
-            trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
-            /* now we know the scaling, we can compute the positions again */
-            std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
-
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtPOSITION, cr, constr);
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
-            /* are the small terms in the shake_vir here due
-             * to numerical errors, or are they important
-             * physically? I'm thinking they are just errors, but not completely sure.
-             * For now, will call without actually constraining, constr=NULL*/
-            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
-        }
-        if (EI_VV(ir->eI))
-        {
-            /* this factor or 2 correction is necessary
-               because half of the constraint force is removed
-               in the vv step, so we have to double it.  See
-               the Redmine issue #1255.  It is not yet clear
-               if the factor of 2 is exact, or just a very
-               good approximation, and this will be
-               investigated.  The next step is to see if this
-               can be done adding a dhdl contribution from the
-               rattle step, but this is somewhat more
-               complicated with the current code. Will be
-               investigated, hopefully for 4.6.3. However,
-               this current solution is much better than
-               having it completely wrong.
-             */
-            enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr;
-        }
-        else
-        {
-            enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        }
-
-        if (vsite != nullptr)
-        {
-            wallcycle_start(wcycle, ewcVSITECONSTR);
-            if (graph != nullptr)
-            {
-                shift_self(graph, state->box, state->x.rvec_array());
-            }
-            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
-                             top.idef.iparams, top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
-
-            if (graph != nullptr)
-            {
-                unshift_self(graph, state->box, state->x.rvec_array());
-            }
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
-        }
-
-        /* ############## IF NOT VV, Calculate globals HERE  ############ */
-        /* With Leap-Frog we can skip compute_globals at
-         * non-communication steps, but we need to calculate
-         * the kinetic energy one step before communication.
-         */
-        {
-            // Organize to do inter-simulation signalling on steps if
-            // and when algorithms require it.
-            const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
-
-            if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
-            {
-                // Copy coordinates when needed to stop the CM motion.
-                if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
-                {
-                    stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-                    stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-                }
-                // Since we're already communicating at this step, we
-                // can propagate intra-simulation signals. Note that
-                // check_nstglobalcomm has the responsibility for
-                // choosing the value of nstglobalcomm that is one way
-                // bGStat becomes true, so we can't get into a
-                // situation where e.g. checkpointing can't be
-                // signalled.
-                bool                doIntraSimSignal = true;
-                SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
-
-                compute_globals(
-                        gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &signaller, lastbox,
-                        &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                        (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
-                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
-                checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
-                                                top_global, &top, state->x.rvec_array(), state->box,
-                                                &shouldCheckNumberOfBondedInteractions);
-                if (!EI_VV(ir->eI) && bStopCM)
-                {
-                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
-                                           state->v.rvec_array());
-                    inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-
-                    // TODO: The special case of removing CM motion should be dealt more gracefully
-                    if (useGpuForUpdate)
-                    {
-                        stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
-                        // Here we block until the H2D copy completes because event sync with the
-                        // force kernels that use the coordinates on the next steps is not implemented
-                        // (not because of a race on state->x being modified on the CPU while H2D is in progress).
-                        stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
-                        // If the COM removal changed the velocities on the CPU, this has to be accounted for.
-                        if (vcm.mode != ecmNO)
-                        {
-                            stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                        }
-                    }
-                }
-            }
-        }
-
-        /* #############  END CALC EKIN AND PRESSURE ################# */
-
-        /* Note: this is OK, but there are some numerical precision issues with using the convergence of
-           the virial that should probably be addressed eventually. state->veta has better properies,
-           but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
-           generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
-
-        if (ir->efep != efepNO && !EI_VV(ir->eI))
-        {
-            /* Sum up the foreign energy and dhdl terms for md and sd.
-               Currently done every step so that dhdl is correct in the .edr */
-            sum_dhdl(enerd, state->lambda, *ir->fepvals);
-        }
-
-        update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
-                                         pressureCouplingMu, state, nrnb, &upd, !useGpuForUpdate);
-
-        const bool doBerendsenPressureCoupling =
-                (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
-        if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
-        {
-            integrator->scaleCoordinates(pressureCouplingMu);
-            t_pbc pbc;
-            set_pbc(&pbc, epbcXYZ, state->box);
-            integrator->setPbc(&pbc);
-        }
-
-        /* ################# END UPDATE STEP 2 ################# */
-        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
-
-        /* The coordinates (x) were unshifted in update */
-        if (!bGStat)
-        {
-            /* We will not sum ekinh_old,
-             * so signal that we still have to do it.
-             */
-            bSumEkinhOld = TRUE;
-        }
-
-        if (bCalcEner)
-        {
-            /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-
-            /* use the directly determined last velocity, not actually the averaged half steps */
-            if (bTrotter && ir->eI == eiVV)
-            {
-                enerd->term[F_EKIN] = last_ekin;
-            }
-            enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
-
-            if (integratorHasConservedEnergyQuantity(ir))
-            {
-                if (EI_VV(ir->eI))
-                {
-                    enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
-                }
-                else
-                {
-                    enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
-                }
-            }
-            /* #########  END PREPARING EDR OUTPUT  ###########  */
-        }
-
-        /* Output stuff */
-        if (MASTER(cr))
-        {
-            if (fplog && do_log && bDoExpanded)
-            {
-                /* only needed if doing expanded ensemble */
-                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
-                                          ir->bSimTemp ? ir->simtempvals : nullptr,
-                                          state_global->dfhist, state->fep_state, ir->nstlog, step);
-            }
-            if (bCalcEner)
-            {
-                energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, state,
-                                                 ir->fepvals, ir->expandedvals, lastbox, shake_vir,
-                                                 force_vir, total_vir, pres, ekind, mu_tot, constr);
-            }
-            else
-            {
-                energyOutput.recordNonEnergyStep();
-            }
-
-            gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
-            gmx_bool do_or = do_per_step(step, ir->nstorireout);
-
-            if (doSimulatedAnnealing)
-            {
-                energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
-            }
-            if (do_log || do_ene || do_dr || do_or)
-            {
-                energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
-                                                   do_log ? fplog : nullptr, step, t, fcd, awh.get());
-            }
-
-            if (ir->bPull)
-            {
-                pull_print_output(pull_work, step, t);
-            }
-
-            if (do_per_step(step, ir->nstlog))
-            {
-                if (fflush(fplog) != 0)
-                {
-                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
-                }
-            }
-        }
-        if (bDoExpanded)
-        {
-            /* Have to do this part _after_ outputting the logfile and the edr file */
-            /* Gets written into the state at the beginning of next loop*/
-            state->fep_state = lamnew;
-            if(plumedswitch)
-            {
-                realFepState = state->fep_state;
-            }
-        }
-        /* Print the remaining wall clock time for the run */
-        if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
-        {
-            if (shellfc)
-            {
-                fprintf(stderr, "\n");
-            }
-            print_time(stderr, walltime_accounting, step, ir, cr);
-        }
-
-        /* Ion/water position swapping.
-         * Not done in last step since trajectory writing happens before this call
-         * in the MD loop and exchanges would be lost anyway. */
-        bNeedRepartition = FALSE;
-        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
-        {
-            bNeedRepartition =
-                    do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
-                                  state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
-
-            if (bNeedRepartition && DOMAINDECOMP(cr))
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-        }
-
-        /* Replica exchange */
-        bExchanged = FALSE;
-        if (bDoReplEx)
-        {
-            bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
-        }
-
-        if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
-        {
-            dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
-                                imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
-                                nrnb, wcycle, FALSE);
-            shouldCheckNumberOfBondedInteractions = true;
-            upd.setNumAtoms(state->natoms);
-        }
-
-        bFirstStep = FALSE;
-        bInitStep  = FALSE;
-
-        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* With all integrators, except VV, we need to retain the pressure
-         * at the current step for coupling at the next step.
-         */
-        if ((state->flags & (1U << estPRES_PREV))
-            && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
-        {
-            /* Store the pressure in t_state for pressure coupling
-             * at the next MD step.
-             */
-            copy_mat(pres, state->pres_prev);
-        }
-
-        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
-
-        if ((membed != nullptr) && (!bLastStep))
-        {
-            rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
-        }
-
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
-        if (DOMAINDECOMP(cr) && wcycle)
-        {
-            dd_cycles_add(cr->dd, cycles, ddCyclStep);
-        }
-
-        /* increase the MD step number */
-        step++;
-        step_rel++;
-
-#if GMX_FAHCORE
-        if (MASTER(cr))
-        {
-            fcReportProgress(ir->nsteps + ir->init_step, step);
-        }
-#endif
-
-        resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
-                                    fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
-
-        /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
-        imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
-    }
-    /* End of main MD loop */
-
-    /* Closing TNG files can include compressing data. Therefore it is good to do that
-     * before stopping the time measurements. */
-    mdoutf_tng_close(outf);
-
-    /* Stop measuring walltime */
-    walltime_accounting_end_time(walltime_accounting);
-
-    if (!thisRankHasDuty(cr, DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    if (MASTER(cr))
-    {
-        if (ir->nstcalcenergy > 0)
-        {
-            energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
-            energyOutput.printAverages(fplog, groups);
-        }
-    }
-    done_mdoutf(outf);
-
-    if (bPMETune)
-    {
-        pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
-    }
-
-    done_shellfc(fplog, shellfc, step_rel);
-
-    if (useReplicaExchange && MASTER(cr))
-    {
-        print_replica_exchange_statistics(fplog, repl_ex);
-    }
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
-
-    global_stat_destroy(gstat);
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp.preplumed
deleted file mode 100644
index 7df4a68b21..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/md.cpp.preplumed
+++ /dev/null
@@ -1,1689 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief Implements the integrator for normal molecular dynamics simulations
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include <cinttypes>
-#include <cmath>
-#include <cstdio>
-#include <cstdlib>
-
-#include <algorithm>
-#include <memory>
-
-#include "gromacs/awh/awh.h"
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/collect.h"
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_network.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/mdsetup.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_load_balancing.h"
-#include "gromacs/fileio/trxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/listed_forces/manage_threading.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/checkpointhandler.h"
-#include "gromacs/mdlib/compute_io.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/ebin.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/energyoutput.h"
-#include "gromacs/mdlib/expanded.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/force_flags.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/mdoutf.h"
-#include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/resethandler.h"
-#include "gromacs/mdlib/sighandler.h"
-#include "gromacs/mdlib/simulationsignal.h"
-#include "gromacs/mdlib/stat.h"
-#include "gromacs/mdlib/stophandler.h"
-#include "gromacs/mdlib/tgroup.h"
-#include "gromacs/mdlib/trajectory_writing.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/update_constrain_cuda.h"
-#include "gromacs/mdlib/vcm.h"
-#include "gromacs/mdlib/vsite.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdtypes/awh_history.h"
-#include "gromacs/mdtypes/awh_params.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/df_history.h"
-#include "gromacs/mdtypes/energyhistory.h"
-#include "gromacs/mdtypes/fcdata.h"
-#include "gromacs/mdtypes/forcerec.h"
-#include "gromacs/mdtypes/group.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/interaction_const.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdatom.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/observableshistory.h"
-#include "gromacs/mdtypes/pullhistory.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/mdtypes/state_propagator_data_gpu.h"
-#include "gromacs/modularsimulator/energyelement.h"
-#include "gromacs/nbnxm/gpu_data_mgmt.h"
-#include "gromacs/nbnxm/nbnxm.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/output.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/atoms.h"
-#include "gromacs/topology/idef.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/trajectory/trajectoryframe.h"
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/real.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "legacysimulator.h"
-#include "replicaexchange.h"
-#include "shellfc.h"
-
-using gmx::SimulationSignaller;
-
-void gmx::LegacySimulator::do_md()
-{
-    // TODO Historically, the EM and MD "integrators" used different
-    // names for the t_inputrec *parameter, but these must have the
-    // same name, now that it's a member of a struct. We use this ir
-    // alias to avoid a large ripple of nearly useless changes.
-    // t_inputrec is being replaced by IMdpOptionsProvider, so this
-    // will go away eventually.
-    t_inputrec*  ir = inputrec;
-    int64_t      step, step_rel;
-    double       t, t0 = ir->init_t, lam0[efptNR];
-    gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
-    gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
-    gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
-    gmx_bool     do_ene, do_log, do_verbose;
-    gmx_bool     bMasterState;
-    unsigned int force_flags;
-    tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, tmp_vir = { { 0 } },
-           pres = { { 0 } };
-    int                         i, m;
-    rvec                        mu_tot;
-    matrix                      pressureCouplingMu, M;
-    gmx_repl_ex_t               repl_ex = nullptr;
-    gmx_localtop_t              top;
-    PaddedHostVector<gmx::RVec> f{};
-    gmx_global_stat_t           gstat;
-    t_graph*                    graph = nullptr;
-    gmx_shellfc_t*              shellfc;
-    gmx_bool                    bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
-    gmx_bool                    bTemp, bPres, bTrotter;
-    real                        dvdl_constr;
-    std::vector<RVec>           cbuf;
-    matrix                      lastbox;
-    int                         lamnew = 0;
-    /* for FEP */
-    int       nstfep = 0;
-    double    cycles;
-    real      saved_conserved_quantity = 0;
-    real      last_ekin                = 0;
-    t_extmass MassQ;
-    char      sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
-
-    /* PME load balancing data for GPU kernels */
-    gmx_bool bPMETune         = FALSE;
-    gmx_bool bPMETunePrinting = FALSE;
-
-    bool bInteractiveMDstep = false;
-
-    /* Domain decomposition could incorrectly miss a bonded
-       interaction, but checking for that requires a global
-       communication stage, which does not otherwise happen in DD
-       code. So we do that alongside the first global energy reduction
-       after a new DD is made. These variables handle whether the
-       check happens, and the result it returns. */
-    bool shouldCheckNumberOfBondedInteractions = false;
-    int  totalNumberOfBondedInteractions       = -1;
-
-    SimulationSignals signals;
-    // Most global communnication stages don't propagate mdrun
-    // signals, and will use this object to achieve that.
-    SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
-
-    if (!mdrunOptions.writeConfout)
-    {
-        // This is on by default, and the main known use case for
-        // turning it off is for convenience in benchmarking, which is
-        // something that should not show up in the general user
-        // interface.
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -noconfout functionality is deprecated, and may be removed in a "
-                        "future version.");
-    }
-
-    /* md-vv uses averaged full step velocities for T-control
-       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
-       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
-    bTrotter = (EI_VV(ir->eI)
-                && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
-
-    const bool bRerunMD = false;
-
-    int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
-    bGStatEveryStep   = (nstglobalcomm == 1);
-
-    SimulationGroups* groups = &top_global->groups;
-
-    std::unique_ptr<EssentialDynamics> ed = nullptr;
-    if (opt2bSet("-ei", nfile, fnm))
-    {
-        /* Initialize essential dynamics sampling */
-        ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
-                        ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
-    }
-    else if (observablesHistory->edsamHistory)
-    {
-        gmx_fatal(FARGS,
-                  "The checkpoint is from a run with essential dynamics sampling, "
-                  "but the current run did not specify the -ei option. "
-                  "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
-    }
-
-    initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
-    Update     upd(ir, deform);
-    const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
-    const bool useReplicaExchange   = (replExParams.exchangeInterval > 0);
-
-    bool simulationsShareState = false;
-    int  nstSignalComm         = nstglobalcomm;
-    {
-        // TODO This implementation of ensemble orientation restraints is nasty because
-        // a user can't just do multi-sim with single-sim orientation restraints.
-        bool usingEnsembleRestraints =
-                (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
-        bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
-
-        // Replica exchange, ensemble restraints and AWH need all
-        // simulations to remain synchronized, so they need
-        // checkpoints and stop conditions to act on the same step, so
-        // the propagation of such signals must take place between
-        // simulations, not just within simulations.
-        // TODO: Make algorithm initializers set these flags.
-        simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim;
-
-        if (simulationsShareState)
-        {
-            // Inter-simulation signal communication does not need to happen
-            // often, so we use a minimum of 200 steps to reduce overhead.
-            const int c_minimumInterSimulationSignallingInterval = 200;
-            nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
-                            * nstglobalcomm;
-        }
-    }
-
-    if (startingBehavior != StartingBehavior::RestartWithAppending)
-    {
-        pleaseCiteCouplingAlgorithms(fplog, *ir);
-    }
-    gmx_mdoutf* outf =
-            init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
-                        top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
-                                   mdoutf_get_fp_dhdl(outf), false, startingBehavior, mdModulesNotifier);
-
-    gstat = global_stat_init(ir);
-
-    /* Check for polarizable models and flexible constraints */
-    shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
-                                 ir->nstcalcenergy, DOMAINDECOMP(cr));
-
-    {
-        double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
-        if ((io > 2000) && MASTER(cr))
-        {
-            fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
-        }
-    }
-
-    // Local state only becomes valid now.
-    std::unique_ptr<t_state> stateInstance;
-    t_state*                 state;
-
-
-    auto mdatoms = mdAtoms->mdatoms();
-
-    std::unique_ptr<UpdateConstrainCuda> integrator;
-
-    if (DOMAINDECOMP(cr))
-    {
-        dd_init_local_top(*top_global, &top);
-
-        stateInstance = std::make_unique<t_state>();
-        state         = stateInstance.get();
-        dd_init_local_state(cr->dd, state_global, state);
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
-                            imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
-                            nrnb, nullptr, FALSE);
-        shouldCheckNumberOfBondedInteractions = true;
-        upd.setNumAtoms(state->natoms);
-    }
-    else
-    {
-        state_change_natoms(state_global, state_global->natoms);
-        f.resizeWithPadding(state_global->natoms);
-        /* Copy the pointer to the global state */
-        state = state_global;
-
-        /* Generate and initialize new topology */
-        mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
-
-        upd.setNumAtoms(state->natoms);
-    }
-
-    const auto& simulationWork     = runScheduleWork->simulationWork;
-    const bool  useGpuForPme       = simulationWork.useGpuPme;
-    const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
-    const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
-    const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
-
-    StatePropagatorDataGpu* stateGpu = fr->stateGpu;
-
-    if (useGpuForUpdate)
-    {
-        GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
-                                   || constr->numConstraintsTotal() == 0,
-                           "Constraints in domain decomposition are only supported with update "
-                           "groups if using GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
-                                   || constr->numConstraintsTotal() == 0,
-                           "SHAKE is not supported with GPU update.");
-        GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
-                           "Either PME or short-ranged non-bonded interaction tasks must run on "
-                           "the GPU to use GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->eI == eiMD,
-                           "Only the md integrator is supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(
-                ir->etc != etcNOSEHOOVER,
-                "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN,
-                           "Only Parrinello-Rahman and Berendsen pressure coupling are supported "
-                           "with the GPU update.\n");
-        GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
-                           "Virtual sites are not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ed == nullptr,
-                           "Essential dynamics is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(ir->pull),
-                           "Constraints pulling is not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
-                           "Orientation restraints are not supported with the GPU update.\n");
-        GMX_RELEASE_ASSERT(ir->efep == efepNO,
-                           "Free energy perturbations are not supported with the GPU update.");
-        GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
-
-        if (constr != nullptr && constr->numConstraintsTotal() > 0)
-        {
-            GMX_LOG(mdlog.info)
-                    .asParagraph()
-                    .appendText("Updating coordinates and applying constraints on the GPU.");
-        }
-        else
-        {
-            GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
-        }
-        integrator = std::make_unique<UpdateConstrainCuda>(
-                *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
-
-        t_pbc pbc;
-        set_pbc(&pbc, epbcXYZ, state->box);
-        integrator->setPbc(&pbc);
-    }
-
-    if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
-    {
-        changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
-    }
-    if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
-    {
-        changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
-    }
-    if (useGpuForUpdate)
-    {
-        changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
-    }
-
-    // NOTE: The global state is no longer used at this point.
-    // But state_global is still used as temporary storage space for writing
-    // the global state to file and potentially for replica exchange.
-    // (Global topology should persist.)
-
-    update_mdatoms(mdatoms, state->lambda[efptMASS]);
-
-    if (ir->bExpanded)
-    {
-        /* Check nstexpanded here, because the grompp check was broken */
-        if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
-        {
-            gmx_fatal(FARGS,
-                      "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
-        }
-        init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
-    }
-
-    if (MASTER(cr))
-    {
-        EnergyElement::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
-    }
-
-    preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
-                           startingBehavior != StartingBehavior::NewSimulation);
-
-    // TODO: Remove this by converting AWH into a ForceProvider
-    auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
-                                startingBehavior != StartingBehavior::NewSimulation,
-                                shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
-
-    if (useReplicaExchange && MASTER(cr))
-    {
-        repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
-    }
-    /* PME tuning is only supported in the Verlet scheme, with PME for
-     * Coulomb. It is not supported with only LJ PME. */
-    bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
-                && ir->cutoff_scheme != ecutsGROUP);
-
-    pme_load_balancing_t* pme_loadbal = nullptr;
-    if (bPMETune)
-    {
-        pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
-                         fr->nbv->useGpu());
-    }
-
-    if (!ir->bContinuation)
-    {
-        if (state->flags & (1U << estV))
-        {
-            auto v = makeArrayRef(state->v);
-            /* Set the velocities of vsites, shells and frozen atoms to zero */
-            for (i = 0; i < mdatoms->homenr; i++)
-            {
-                if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
-                {
-                    clear_rvec(v[i]);
-                }
-                else if (mdatoms->cFREEZE)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
-                        {
-                            v[i][m] = 0;
-                        }
-                    }
-                }
-            }
-        }
-
-        if (constr)
-        {
-            /* Constrain the initial coordinates and velocities */
-            do_constrain_first(fplog, constr, ir, mdatoms, state->natoms, state->x.arrayRefWithPadding(),
-                               state->v.arrayRefWithPadding(), state->box, state->lambda[efptBONDED]);
-        }
-        if (vsite)
-        {
-            /* Construct the virtual sites for the initial configuration */
-            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
-                             top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
-        }
-    }
-
-    if (ir->efep != efepNO)
-    {
-        /* Set free energy calculation frequency as the greatest common
-         * denominator of nstdhdl and repl_ex_nst. */
-        nstfep = ir->fepvals->nstdhdl;
-        if (ir->bExpanded)
-        {
-            nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
-        }
-        if (useReplicaExchange)
-        {
-            nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
-        }
-    }
-
-    /* Be REALLY careful about what flags you set here. You CANNOT assume
-     * this is the first step, since we might be restarting from a checkpoint,
-     * and in that case we should not do any modifications to the state.
-     */
-    bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
-
-    // When restarting from a checkpoint, it can be appropriate to
-    // initialize ekind from quantities in the checkpoint. Otherwise,
-    // compute_globals must initialize ekind before the simulation
-    // starts/restarts. However, only the master rank knows what was
-    // found in the checkpoint file, so we have to communicate in
-    // order to coordinate the restart.
-    //
-    // TODO Consider removing this communication if/when checkpoint
-    // reading directly follows .tpr reading, because all ranks can
-    // agree on hasReadEkinState at that time.
-    bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
-    }
-    if (hasReadEkinState)
-    {
-        restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
-    }
-
-    unsigned int cglo_flags =
-            (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
-             | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
-
-    bSumEkinhOld = FALSE;
-
-    t_vcm vcm(top_global->groups, *ir);
-    reportComRemovalInfo(fplog, vcm);
-
-    /* To minimize communication, compute_globals computes the COM velocity
-     * and the kinetic energy for the velocities without COM motion removed.
-     * Thus to get the kinetic energy without the COM contribution, we need
-     * to call compute_globals twice.
-     */
-    for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
-    {
-        unsigned int cglo_flags_iteration = cglo_flags;
-        if (bStopCM && cgloIteration == 0)
-        {
-            cglo_flags_iteration |= CGLO_STOPCM;
-            cglo_flags_iteration &= ~CGLO_TEMPERATURE;
-        }
-        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                        state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                        cglo_flags_iteration
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
-        if (cglo_flags_iteration & CGLO_STOPCM)
-        {
-            /* At initialization, do not pass x with acceleration-correction mode
-             * to avoid (incorrect) correction of the initial coordinates.
-             */
-            rvec* xPtr = nullptr;
-            if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
-            {
-                xPtr = state->x.rvec_array();
-            }
-            process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
-            inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-        }
-    }
-    checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
-                                    state->x.rvec_array(), state->box,
-                                    &shouldCheckNumberOfBondedInteractions);
-    if (ir->eI == eiVVAK)
-    {
-        /* a second call to get the half step temperature initialized as well */
-        /* we do the same call as above, but turn the pressure off -- internally to
-           compute_globals, this is recognized as a velocity verlet half-step
-           kinetic energy calculation.  This minimized excess variables, but
-           perhaps loses some logic?*/
-
-        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                        state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
-    }
-
-    /* Calculate the initial half step temperature, and save the ekinh_old */
-    if (startingBehavior == StartingBehavior::NewSimulation)
-    {
-        for (i = 0; (i < ir->opts.ngtc); i++)
-        {
-            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
-        }
-    }
-
-    /* need to make an initiation call to get the Trotter variables set, as well as other constants
-       for non-trotter temperature control */
-    auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
-
-    if (MASTER(cr))
-    {
-        if (!ir->bContinuation)
-        {
-            if (constr && ir->eConstrAlg == econtLINCS)
-            {
-                fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
-                        constr->rmsd());
-            }
-            if (EI_STATE_VELOCITY(ir->eI))
-            {
-                real temp = enerd->term[F_TEMP];
-                if (ir->eI != eiVV)
-                {
-                    /* Result of Ekin averaged over velocities of -half
-                     * and +half step, while we only have -half step here.
-                     */
-                    temp *= 2;
-                }
-                fprintf(fplog, "Initial temperature: %g K\n", temp);
-            }
-        }
-
-        char tbuf[20];
-        fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
-        if (ir->nsteps >= 0)
-        {
-            sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
-        }
-        else
-        {
-            sprintf(tbuf, "%s", "infinite");
-        }
-        if (ir->init_step > 0)
-        {
-            fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
-                    gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
-                    gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
-        }
-        else
-        {
-            fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
-        }
-        fprintf(fplog, "\n");
-    }
-
-    walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, "mdrun");
-
-    /***********************************************************
-     *
-     *             Loop over MD steps
-     *
-     ************************************************************/
-
-    bFirstStep = TRUE;
-    /* Skip the first Nose-Hoover integration when we get the state from tpx */
-    bInitStep        = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
-    bSumEkinhOld     = FALSE;
-    bExchanged       = FALSE;
-    bNeedRepartition = FALSE;
-
-    step     = ir->init_step;
-    step_rel = 0;
-
-    auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
-            compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
-            MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
-            mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
-
-    auto checkpointHandler = std::make_unique<CheckpointHandler>(
-            compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
-            ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
-            mdrunOptions.checkpointOptions.period);
-
-    const bool resetCountersIsLocal = true;
-    auto       resetHandler         = std::make_unique<ResetHandler>(
-            compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
-            !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
-            mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
-
-    const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
-
-    // TODO extract this to new multi-simulation module
-    if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
-    {
-        if (!multisim_int_all_are_equal(ms, ir->nsteps))
-        {
-            GMX_LOG(mdlog.warning)
-                    .appendText(
-                            "Note: The number of steps is not consistent across multi "
-                            "simulations,\n"
-                            "but we are proceeding anyway!");
-        }
-        if (!multisim_int_all_are_equal(ms, ir->init_step))
-        {
-            if (simulationsShareState)
-            {
-                if (MASTER(cr))
-                {
-                    gmx_fatal(FARGS,
-                              "The initial step is not consistent across multi simulations which "
-                              "share the state");
-                }
-                gmx_barrier(cr);
-            }
-            else
-            {
-                GMX_LOG(mdlog.warning)
-                        .appendText(
-                                "Note: The initial step is not consistent across multi "
-                                "simulations,\n"
-                                "but we are proceeding anyway!");
-            }
-        }
-    }
-
-    /* and stop now if we should */
-    bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
-    while (!bLastStep)
-    {
-
-        /* Determine if this is a neighbor search step */
-        bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
-
-        if (bPMETune && bNStList)
-        {
-            // This has to be here because PME load balancing is called so early.
-            // TODO: Move to after all booleans are defined.
-            if (useGpuForUpdate && !bFirstStep)
-            {
-                stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-                stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-            }
-            /* PME grid + cut-off optimization with GPUs or PME nodes */
-            pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
-                           fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
-                           &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
-        }
-
-        wallcycle_start(wcycle, ewcSTEP);
-
-        bLastStep = (step_rel == ir->nsteps);
-        t         = t0 + step * ir->delta_t;
-
-        // TODO Refactor this, so that nstfep does not need a default value of zero
-        if (ir->efep != efepNO || ir->bSimTemp)
-        {
-            /* find and set the current lambdas */
-            setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
-
-            bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
-            bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
-            bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
-                           && (!bFirstStep));
-        }
-
-        bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
-                     && do_per_step(step, replExParams.exchangeInterval));
-
-        if (doSimulatedAnnealing)
-        {
-            update_annealing_target_temp(ir, t, &upd);
-        }
-
-        /* Stop Center of Mass motion */
-        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
-
-        /* Determine whether or not to do Neighbour Searching */
-        bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
-
-        /* Note that the stopHandler will cause termination at nstglobalcomm
-         * steps. Since this concides with nstcalcenergy, nsttcouple and/or
-         * nstpcouple steps, we have computed the half-step kinetic energy
-         * of the previous step and can always output energies at the last step.
-         */
-        bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
-
-        /* do_log triggers energy and virial calculation. Because this leads
-         * to different code paths, forces can be different. Thus for exact
-         * continuation we should avoid extra log output.
-         * Note that the || bLastStep can result in non-exact continuation
-         * beyond the last step. But we don't consider that to be an issue.
-         */
-        do_log     = (do_per_step(step, ir->nstlog)
-                  || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
-        do_verbose = mdrunOptions.verbose
-                     && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
-
-        if (useGpuForUpdate && !bFirstStep && bNS)
-        {
-            // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
-            stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-            // Copy coordinate from the GPU when needed at the search step.
-            // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
-            // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
-            stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-        }
-
-        if (bNS && !(bFirstStep && ir->bContinuation))
-        {
-            bMasterState = FALSE;
-            /* Correct the new box if it is too skewed */
-            if (inputrecDynamicBox(ir))
-            {
-                if (correct_box(fplog, step, state->box, graph))
-                {
-                    bMasterState = TRUE;
-                    // If update is offloaded, it should be informed about the box size change
-                    if (useGpuForUpdate)
-                    {
-                        t_pbc pbc;
-                        set_pbc(&pbc, epbcXYZ, state->box);
-                        integrator->setPbc(&pbc);
-                    }
-                }
-            }
-            if (DOMAINDECOMP(cr) && bMasterState)
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-
-            if (DOMAINDECOMP(cr))
-            {
-                /* Repartition the domain decomposition */
-                dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
-                                    *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
-                                    fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
-                shouldCheckNumberOfBondedInteractions = true;
-                upd.setNumAtoms(state->natoms);
-            }
-        }
-
-        if (MASTER(cr) && do_log)
-        {
-            energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
-        }
-
-        if (ir->efep != efepNO)
-        {
-            update_mdatoms(mdatoms, state->lambda[efptMASS]);
-        }
-
-        if (bExchanged)
-        {
-
-            /* We need the kinetic energy at minus the half step for determining
-             * the full step kinetic energy and possibly for T-coupling.*/
-            /* This may not be quite working correctly yet . . . . */
-            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                            nullptr, nullptr, nullptr, nullptr, mu_tot, constr, &nullSignaller,
-                            state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                            CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
-            checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
-                                            &top, state->x.rvec_array(), state->box,
-                                            &shouldCheckNumberOfBondedInteractions);
-        }
-        clear_mat(force_vir);
-
-        checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
-
-        /* Determine the energy and pressure:
-         * at nstcalcenergy steps and at energy output steps (set below).
-         */
-        if (EI_VV(ir->eI) && (!bInitStep))
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep
-                       || (ir->epc != epcNO
-                           && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
-        }
-        else
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
-        }
-        bCalcEner = bCalcEnerStep;
-
-        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
-
-        if (do_ene || do_log || bDoReplEx)
-        {
-            bCalcVir  = TRUE;
-            bCalcEner = TRUE;
-        }
-
-        /* Do we need global communication ? */
-        bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
-                  || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
-
-        force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
-                       | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
-                       | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
-
-        if (shellfc)
-        {
-            /* Now is the time to relax the shells */
-            relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
-                                imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
-                                state->natoms, state->x.arrayRefWithPadding(),
-                                state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
-                                f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
-                                shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
-        }
-        else
-        {
-            /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
-               is updated (or the AWH update will be performed twice for one step when continuing).
-               It would be best to call this update function from do_md_trajectory_writing but that
-               would occur after do_force. One would have to divide the update_awh function into one
-               function applying the AWH force and one doing the AWH bias update. The update AWH
-               bias function could then be called after do_md_trajectory_writing (then containing
-               update_awh_history). The checkpointing will in the future probably moved to the start
-               of the md loop which will rid of this issue. */
-            if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
-            {
-                awh->updateHistory(state_global->awhHistory.get());
-            }
-
-            /* The coordinates (x) are shifted (to get whole molecules)
-             * in do_force.
-             * This is parallellized as well, and does communication too.
-             * Check comments in sim_util.c
-             */
-            do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
-                     nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
-                     f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
-                     fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
-                     (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
-        }
-
-        // VV integrators do not need the following velocity half step
-        // if it is the first step after starting from a checkpoint.
-        // That is, the half step is needed on all other steps, and
-        // also the first step when starting from a .tpr file.
-        if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
-        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
-        {
-            rvec* vbuf = nullptr;
-
-            wallcycle_start(wcycle, ewcUPDATE);
-            if (ir->eI == eiVV && bInitStep)
-            {
-                /* if using velocity verlet with full time step Ekin,
-                 * take the first half step only to compute the
-                 * virial for the first step. From there,
-                 * revert back to the initial coordinates
-                 * so that the input is actually the initial step.
-                 */
-                snew(vbuf, state->natoms);
-                copy_rvecn(state->v.rvec_array(), vbuf, 0,
-                           state->natoms); /* should make this better for parallelizing? */
-            }
-            else
-            {
-                /* this is for NHC in the Ekin(t+dt/2) version of vv */
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
-                               trotter_seq, ettTSEQ1);
-            }
-
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtVELOCITY1, cr, constr);
-
-            wallcycle_stop(wcycle, ewcUPDATE);
-            constrain_velocities(step, nullptr, state, shake_vir, constr, bCalcVir, do_log, do_ene);
-            wallcycle_start(wcycle, ewcUPDATE);
-            /* if VV, compute the pressure and constraints */
-            /* For VV2, we strictly only need this if using pressure
-             * control, but we really would like to have accurate pressures
-             * printed out.
-             * Think about ways around this in the future?
-             * For now, keep this choice in comments.
-             */
-            /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
-            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
-            bPres = TRUE;
-            bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
-            if (bCalcEner && ir->eI == eiVVAK)
-            {
-                bSumEkinhOld = TRUE;
-            }
-            /* for vv, the first half of the integration actually corresponds to the previous step.
-               So we need information from the last step in the first half of the integration */
-            if (bGStat || do_per_step(step - 1, nstglobalcomm))
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                                state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                                force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
-                                state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                                (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
-                                        | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
-                                        | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
-                                        | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                                 : 0)
-                                        | CGLO_SCALEEKIN);
-                /* explanation of above:
-                   a) We compute Ekin at the full time step
-                   if 1) we are using the AveVel Ekin, and it's not the
-                   initial step, or 2) if we are using AveEkin, but need the full
-                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
-                   b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
-                   EkinAveVel because it's needed for the pressure */
-                checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
-                                                top_global, &top, state->x.rvec_array(), state->box,
-                                                &shouldCheckNumberOfBondedInteractions);
-                if (bStopCM)
-                {
-                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
-                                           state->v.rvec_array());
-                    inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-                }
-                wallcycle_start(wcycle, ewcUPDATE);
-            }
-            /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
-            if (!bInitStep)
-            {
-                if (bTrotter)
-                {
-                    m_add(force_vir, shake_vir,
-                          total_vir); /* we need the un-dispersion corrected total vir here */
-                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
-                                   trotter_seq, ettTSEQ2);
-
-                    /* TODO This is only needed when we're about to write
-                     * a checkpoint, because we use it after the restart
-                     * (in a kludge?). But what should we be doing if
-                     * the startingBehavior is NewSimulation or bInitStep are true? */
-                    if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
-                    {
-                        copy_mat(shake_vir, state->svir_prev);
-                        copy_mat(force_vir, state->fvir_prev);
-                    }
-                    if ((inputrecNptTrotter(ir) || inputrecNvtTrotter(ir)) && ir->eI == eiVV)
-                    {
-                        /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
-                        enerd->term[F_TEMP] =
-                                sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
-                        enerd->term[F_EKIN] = trace(ekind->ekin);
-                    }
-                }
-                else if (bExchanged)
-                {
-                    wallcycle_stop(wcycle, ewcUPDATE);
-                    /* We need the kinetic energy at minus the half step for determining
-                     * the full step kinetic energy and possibly for T-coupling.*/
-                    /* This may not be quite working correctly yet . . . . */
-                    compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(),
-                                    state->v.rvec_array(), state->box, state->lambda[efptVDW],
-                                    mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
-                                    nullptr, mu_tot, constr, &nullSignaller, state->box, nullptr,
-                                    &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
-                    wallcycle_start(wcycle, ewcUPDATE);
-                }
-            }
-            /* if it's the initial step, we performed this first step just to get the constraint virial */
-            if (ir->eI == eiVV && bInitStep)
-            {
-                copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
-                sfree(vbuf);
-            }
-            wallcycle_stop(wcycle, ewcUPDATE);
-        }
-
-        /* compute the conserved quantity */
-        if (EI_VV(ir->eI))
-        {
-            saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
-            if (ir->eI == eiVV)
-            {
-                last_ekin = enerd->term[F_EKIN];
-            }
-            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
-            {
-                saved_conserved_quantity -= enerd->term[F_DISPCORR];
-            }
-            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
-            if (ir->efep != efepNO)
-            {
-                sum_dhdl(enerd, state->lambda, *ir->fepvals);
-            }
-        }
-
-        /* ########  END FIRST UPDATE STEP  ############## */
-        /* ########  If doing VV, we now have v(dt) ###### */
-        if (bDoExpanded)
-        {
-            /* perform extended ensemble sampling in lambda - we don't
-               actually move to the new state before outputting
-               statistics, but if performing simulated tempering, we
-               do update the velocities and the tau_t. */
-
-            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
-                                              state->dfhist, step, state->v.rvec_array(), mdatoms);
-            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
-            if (MASTER(cr))
-            {
-                copy_df_history(state_global->dfhist, state->dfhist);
-            }
-        }
-
-        // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
-        // coordinates have not already been copied for i) search or ii) CPU force tasks.
-        if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
-            && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
-                || checkpointHandler->isCheckpointingStep()))
-        {
-            stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-        }
-        // Copy velocities if needed for the output/checkpointing.
-        // NOTE: Copy on the search steps is done at the beginning of the step.
-        if (useGpuForUpdate && !bNS
-            && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
-        {
-            stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-            stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-        }
-        // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
-        // and update is offloaded hence forces are kept on the GPU for update and have not been
-        // already transferred in do_force().
-        // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
-        //       when the forces are ready on the GPU -- the same synchronizer should be used as the one
-        //       prior to GPU update.
-        // TODO: When the output flags will be included in step workload, this copy can be combined with the
-        //       copy call in do_force(...).
-        // NOTE: The forces should not be copied here if the vsites are present, since they were modified
-        //       on host after the D2H copy in do_force(...).
-        if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
-            && do_per_step(step, ir->nstfout))
-        {
-            stateGpu->copyForcesFromGpu(ArrayRef<RVec>(f), AtomLocality::Local);
-            stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
-        }
-        /* Now we have the energies and forces corresponding to the
-         * coordinates at time t. We must output all of this before
-         * the update.
-         */
-        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
-                                 observablesHistory, top_global, fr, outf, energyOutput, ekind, f,
-                                 checkpointHandler->isCheckpointingStep(), bRerunMD, bLastStep,
-                                 mdrunOptions.writeConfout, bSumEkinhOld);
-        /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
-        bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
-
-        /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
-        if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
-            && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
-        {
-            copy_mat(state->svir_prev, shake_vir);
-            copy_mat(state->fvir_prev, force_vir);
-        }
-
-        stopHandler->setSignal();
-        resetHandler->setSignal(walltime_accounting);
-
-        if (bGStat || !PAR(cr))
-        {
-            /* In parallel we only have to check for checkpointing in steps
-             * where we do global communication,
-             *  otherwise the other nodes don't know.
-             */
-            checkpointHandler->setSignal(walltime_accounting);
-        }
-
-        /* #########   START SECOND UPDATE STEP ################# */
-
-        /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
-           controlled in preprocessing */
-
-        if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
-        {
-            gmx_bool bIfRandomize;
-            bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
-            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
-            if (constr && bIfRandomize)
-            {
-                constrain_velocities(step, nullptr, state, tmp_vir, constr, bCalcVir, do_log, do_ene);
-            }
-        }
-        /* Box is changed in update() when we do pressure coupling,
-         * but we should still use the old box for energy corrections and when
-         * writing it to the energy file, so it matches the trajectory files for
-         * the same timestep above. Make a copy in a separate array.
-         */
-        copy_mat(state->box, lastbox);
-
-        dvdl_constr = 0;
-
-        wallcycle_start(wcycle, ewcUPDATE);
-        /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
-        if (bTrotter)
-        {
-            trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
-            /* We can only do Berendsen coupling after we have summed
-             * the kinetic energy or virial. Since the happens
-             * in global_state after update, we should only do it at
-             * step % nstlist = 1 with bGStatEveryStep=FALSE.
-             */
-        }
-        else
-        {
-            update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
-            update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
-        }
-
-        if (EI_VV(ir->eI))
-        {
-            /* velocity half-step update */
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtVELOCITY2, cr, constr);
-        }
-
-        /* Above, initialize just copies ekinh into ekin,
-         * it doesn't copy position (for VV),
-         * and entire integrator for MD.
-         */
-
-        if (ir->eI == eiVVAK)
-        {
-            cbuf.resize(state->x.size());
-            std::copy(state->x.begin(), state->x.end(), cbuf.begin());
-        }
-
-        /* With leap-frog type integrators we compute the kinetic energy
-         * at a whole time step as the average of the half-time step kinetic
-         * energies of two subsequent steps. Therefore we need to compute the
-         * half step kinetic energy also if we need energies at the next step.
-         */
-        const bool needHalfStepKineticEnergy =
-                (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
-
-        // Parrinello-Rahman requires the pressure to be availible before the update to compute
-        // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
-        const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
-                                         && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
-
-        if (useGpuForUpdate)
-        {
-            if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
-            {
-                integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
-                                stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
-
-                // Copy data to the GPU after buffers might have being reinitialized
-                stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
-            }
-
-            // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
-            if (!runScheduleWork->stepWork.useGpuFBufferOps)
-            {
-                stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::Local);
-            }
-
-            const bool doTemperatureScaling =
-                    (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
-
-            // This applies Leap-Frog, LINCS and SETTLE in succession
-            integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(
-                                          AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
-                                  ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling,
-                                  ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
-
-            // Copy velocities D2H after update if:
-            // - Globals are computed this step (includes the energy output steps).
-            // - Temperature is needed for the next step.
-            if (bGStat || needHalfStepKineticEnergy)
-            {
-                stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
-                stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
-            }
-        }
-        else
-        {
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtPOSITION, cr, constr);
-
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            constrain_coordinates(step, &dvdl_constr, state, shake_vir, &upd, constr, bCalcVir,
-                                  do_log, do_ene);
-
-            update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
-                                  constr, do_log, do_ene);
-            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
-        }
-
-        if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
-        {
-            updatePrevStepPullCom(pull_work, state);
-        }
-
-        if (ir->eI == eiVVAK)
-        {
-            /* erase F_EKIN and F_TEMP here? */
-            /* just compute the kinetic energy at the half step to perform a trotter step */
-            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                            force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller, lastbox,
-                            nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
-            wallcycle_start(wcycle, ewcUPDATE);
-            trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
-            /* now we know the scaling, we can compute the positions again */
-            std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
-
-            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
-                          etrtPOSITION, cr, constr);
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
-            /* are the small terms in the shake_vir here due
-             * to numerical errors, or are they important
-             * physically? I'm thinking they are just errors, but not completely sure.
-             * For now, will call without actually constraining, constr=NULL*/
-            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
-        }
-        if (EI_VV(ir->eI))
-        {
-            /* this factor or 2 correction is necessary
-               because half of the constraint force is removed
-               in the vv step, so we have to double it.  See
-               the Redmine issue #1255.  It is not yet clear
-               if the factor of 2 is exact, or just a very
-               good approximation, and this will be
-               investigated.  The next step is to see if this
-               can be done adding a dhdl contribution from the
-               rattle step, but this is somewhat more
-               complicated with the current code. Will be
-               investigated, hopefully for 4.6.3. However,
-               this current solution is much better than
-               having it completely wrong.
-             */
-            enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr;
-        }
-        else
-        {
-            enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        }
-
-        if (vsite != nullptr)
-        {
-            wallcycle_start(wcycle, ewcVSITECONSTR);
-            if (graph != nullptr)
-            {
-                shift_self(graph, state->box, state->x.rvec_array());
-            }
-            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
-                             top.idef.iparams, top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
-
-            if (graph != nullptr)
-            {
-                unshift_self(graph, state->box, state->x.rvec_array());
-            }
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
-        }
-
-        /* ############## IF NOT VV, Calculate globals HERE  ############ */
-        /* With Leap-Frog we can skip compute_globals at
-         * non-communication steps, but we need to calculate
-         * the kinetic energy one step before communication.
-         */
-        {
-            // Organize to do inter-simulation signalling on steps if
-            // and when algorithms require it.
-            const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
-
-            if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
-            {
-                // Copy coordinates when needed to stop the CM motion.
-                if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
-                {
-                    stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
-                    stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
-                }
-                // Since we're already communicating at this step, we
-                // can propagate intra-simulation signals. Note that
-                // check_nstglobalcomm has the responsibility for
-                // choosing the value of nstglobalcomm that is one way
-                // bGStat becomes true, so we can't get into a
-                // situation where e.g. checkpointing can't be
-                // signalled.
-                bool                doIntraSimSignal = true;
-                SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
-
-                compute_globals(
-                        gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
-                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
-                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &signaller, lastbox,
-                        &totalNumberOfBondedInteractions, &bSumEkinhOld,
-                        (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
-                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
-                                | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
-                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
-                                                                         : 0));
-                checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
-                                                top_global, &top, state->x.rvec_array(), state->box,
-                                                &shouldCheckNumberOfBondedInteractions);
-                if (!EI_VV(ir->eI) && bStopCM)
-                {
-                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
-                                           state->v.rvec_array());
-                    inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
-
-                    // TODO: The special case of removing CM motion should be dealt more gracefully
-                    if (useGpuForUpdate)
-                    {
-                        stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
-                        // Here we block until the H2D copy completes because event sync with the
-                        // force kernels that use the coordinates on the next steps is not implemented
-                        // (not because of a race on state->x being modified on the CPU while H2D is in progress).
-                        stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
-                        // If the COM removal changed the velocities on the CPU, this has to be accounted for.
-                        if (vcm.mode != ecmNO)
-                        {
-                            stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
-                        }
-                    }
-                }
-            }
-        }
-
-        /* #############  END CALC EKIN AND PRESSURE ################# */
-
-        /* Note: this is OK, but there are some numerical precision issues with using the convergence of
-           the virial that should probably be addressed eventually. state->veta has better properies,
-           but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
-           generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
-
-        if (ir->efep != efepNO && !EI_VV(ir->eI))
-        {
-            /* Sum up the foreign energy and dhdl terms for md and sd.
-               Currently done every step so that dhdl is correct in the .edr */
-            sum_dhdl(enerd, state->lambda, *ir->fepvals);
-        }
-
-        update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
-                                         pressureCouplingMu, state, nrnb, &upd, !useGpuForUpdate);
-
-        const bool doBerendsenPressureCoupling =
-                (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
-        if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
-        {
-            integrator->scaleCoordinates(pressureCouplingMu);
-            t_pbc pbc;
-            set_pbc(&pbc, epbcXYZ, state->box);
-            integrator->setPbc(&pbc);
-        }
-
-        /* ################# END UPDATE STEP 2 ################# */
-        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
-
-        /* The coordinates (x) were unshifted in update */
-        if (!bGStat)
-        {
-            /* We will not sum ekinh_old,
-             * so signal that we still have to do it.
-             */
-            bSumEkinhOld = TRUE;
-        }
-
-        if (bCalcEner)
-        {
-            /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-
-            /* use the directly determined last velocity, not actually the averaged half steps */
-            if (bTrotter && ir->eI == eiVV)
-            {
-                enerd->term[F_EKIN] = last_ekin;
-            }
-            enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
-
-            if (integratorHasConservedEnergyQuantity(ir))
-            {
-                if (EI_VV(ir->eI))
-                {
-                    enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
-                }
-                else
-                {
-                    enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
-                }
-            }
-            /* #########  END PREPARING EDR OUTPUT  ###########  */
-        }
-
-        /* Output stuff */
-        if (MASTER(cr))
-        {
-            if (fplog && do_log && bDoExpanded)
-            {
-                /* only needed if doing expanded ensemble */
-                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
-                                          ir->bSimTemp ? ir->simtempvals : nullptr,
-                                          state_global->dfhist, state->fep_state, ir->nstlog, step);
-            }
-            if (bCalcEner)
-            {
-                energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, state,
-                                                 ir->fepvals, ir->expandedvals, lastbox, shake_vir,
-                                                 force_vir, total_vir, pres, ekind, mu_tot, constr);
-            }
-            else
-            {
-                energyOutput.recordNonEnergyStep();
-            }
-
-            gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
-            gmx_bool do_or = do_per_step(step, ir->nstorireout);
-
-            if (doSimulatedAnnealing)
-            {
-                energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
-            }
-            if (do_log || do_ene || do_dr || do_or)
-            {
-                energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
-                                                   do_log ? fplog : nullptr, step, t, fcd, awh.get());
-            }
-
-            if (ir->bPull)
-            {
-                pull_print_output(pull_work, step, t);
-            }
-
-            if (do_per_step(step, ir->nstlog))
-            {
-                if (fflush(fplog) != 0)
-                {
-                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
-                }
-            }
-        }
-        if (bDoExpanded)
-        {
-            /* Have to do this part _after_ outputting the logfile and the edr file */
-            /* Gets written into the state at the beginning of next loop*/
-            state->fep_state = lamnew;
-        }
-        /* Print the remaining wall clock time for the run */
-        if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
-        {
-            if (shellfc)
-            {
-                fprintf(stderr, "\n");
-            }
-            print_time(stderr, walltime_accounting, step, ir, cr);
-        }
-
-        /* Ion/water position swapping.
-         * Not done in last step since trajectory writing happens before this call
-         * in the MD loop and exchanges would be lost anyway. */
-        bNeedRepartition = FALSE;
-        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
-        {
-            bNeedRepartition =
-                    do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
-                                  state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
-
-            if (bNeedRepartition && DOMAINDECOMP(cr))
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-        }
-
-        /* Replica exchange */
-        bExchanged = FALSE;
-        if (bDoReplEx)
-        {
-            bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
-        }
-
-        if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
-        {
-            dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
-                                imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
-                                nrnb, wcycle, FALSE);
-            shouldCheckNumberOfBondedInteractions = true;
-            upd.setNumAtoms(state->natoms);
-        }
-
-        bFirstStep = FALSE;
-        bInitStep  = FALSE;
-
-        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* With all integrators, except VV, we need to retain the pressure
-         * at the current step for coupling at the next step.
-         */
-        if ((state->flags & (1U << estPRES_PREV))
-            && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
-        {
-            /* Store the pressure in t_state for pressure coupling
-             * at the next MD step.
-             */
-            copy_mat(pres, state->pres_prev);
-        }
-
-        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
-
-        if ((membed != nullptr) && (!bLastStep))
-        {
-            rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
-        }
-
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
-        if (DOMAINDECOMP(cr) && wcycle)
-        {
-            dd_cycles_add(cr->dd, cycles, ddCyclStep);
-        }
-
-        /* increase the MD step number */
-        step++;
-        step_rel++;
-
-#if GMX_FAHCORE
-        if (MASTER(cr))
-        {
-            fcReportProgress(ir->nsteps + ir->init_step, step);
-        }
-#endif
-
-        resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
-                                    fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
-
-        /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
-        imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
-    }
-    /* End of main MD loop */
-
-    /* Closing TNG files can include compressing data. Therefore it is good to do that
-     * before stopping the time measurements. */
-    mdoutf_tng_close(outf);
-
-    /* Stop measuring walltime */
-    walltime_accounting_end_time(walltime_accounting);
-
-    if (!thisRankHasDuty(cr, DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    if (MASTER(cr))
-    {
-        if (ir->nstcalcenergy > 0)
-        {
-            energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
-            energyOutput.printAverages(fplog, groups);
-        }
-    }
-    done_mdoutf(outf);
-
-    if (bPMETune)
-    {
-        pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
-    }
-
-    done_shellfc(fplog, shellfc, step_rel);
-
-    if (useReplicaExchange && MASTER(cr))
-    {
-        print_replica_exchange_statistics(fplog, repl_ex);
-    }
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
-
-    global_stat_destroy(gstat);
-}
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp
deleted file mode 100644
index 33a8a544ce..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp
+++ /dev/null
@@ -1,2951 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief This file defines integrators for energy minimization
- *
- * \author Berk Hess <hess@kth.se>
- * \author Erik Lindahl <erik@kth.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <cmath>
-#include <cstring>
-#include <ctime>
-
-#include <algorithm>
-#include <vector>
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/collect.h"
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/mdsetup.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/mtxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/linearalgebra/sparsematrix.h"
-#include "gromacs/listed_forces/manage_threading.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/dispersioncorrection.h"
-#include "gromacs/mdlib/ebin.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/energyoutput.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/stat.h"
-#include "gromacs/mdlib/tgroup.h"
-#include "gromacs/mdlib/trajectory_writing.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/vsite.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/multisim.h" /*PLUMED*/
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "legacysimulator.h"
-#include "shellfc.h"
-
-using gmx::MdrunScheduleWorkload;
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-//! Utility structure for manipulating states during EM
-typedef struct
-{
-    //! Copy of the global state
-    t_state s;
-    //! Force array
-    PaddedHostVector<gmx::RVec> f;
-    //! Potential energy
-    real epot;
-    //! Norm of the force
-    real fnorm;
-    //! Maximum force
-    real fmax;
-    //! Direction
-    int a_fmax;
-} em_state_t;
-
-//! Print the EM starting conditions
-static void print_em_start(FILE*                     fplog,
-                           const t_commrec*          cr,
-                           gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
-                           const char*               name)
-{
-    walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, name);
-}
-
-//! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
-{
-    wallcycle_stop(wcycle, ewcRUN);
-
-    walltime_accounting_end_time(walltime_accounting);
-}
-
-//! Printing a log file and console header
-static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps)
-{
-    fprintf(out, "\n");
-    fprintf(out, "%s:\n", minimizer);
-    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
-    fprintf(out, "   Number of steps    = %12d\n", nsteps);
-}
-
-//! Print warning message
-static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain)
-{
-    constexpr bool realIsDouble = GMX_DOUBLE;
-    char           buffer[2048];
-
-    if (!std::isfinite(fmax))
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped because the force "
-                "on at least one atom is not finite. This usually means "
-                "atoms are overlapping. Modify the input coordinates to "
-                "remove atom overlap or use soft-core potentials with "
-                "the free energy code to avoid infinite forces.\n%s",
-                !realIsDouble ? "You could also be lucky that switching to double precision "
-                                "is sufficient to obtain finite forces.\n"
-                              : "");
-    }
-    else if (bLastStep)
-    {
-        sprintf(buffer,
-                "\nEnergy minimization reached the maximum number "
-                "of steps before the forces reached the requested "
-                "precision Fmax < %g.\n",
-                ftol);
-    }
-    else
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped, but the forces have "
-                "not converged to the requested precision Fmax < %g (which "
-                "may not be possible for your system). It stopped "
-                "because the algorithm tried to make a new step whose size "
-                "was too small, or there was no change in the energy since "
-                "last step. Either way, we regard the minimization as "
-                "converged to within the available machine precision, "
-                "given your starting configuration and EM parameters.\n%s%s",
-                ftol,
-                !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but "
-                                "this is often not needed for preparing to run molecular "
-                                "dynamics.\n"
-                              : "",
-                bConstrain ? "You might need to increase your constraint accuracy, or turn\n"
-                             "off constraints altogether (set constraints = none in mdp file)\n"
-                           : "");
-    }
-
-    fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
-    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
-}
-
-//! Print message about convergence of the EM
-static void print_converged(FILE*             fp,
-                            const char*       alg,
-                            real              ftol,
-                            int64_t           count,
-                            gmx_bool          bDone,
-                            int64_t           nsteps,
-                            const em_state_t* ems,
-                            double            sqrtNumAtoms)
-{
-    char buf[STEPSTRSIZE];
-
-    if (bDone)
-    {
-        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf));
-    }
-    else if (count < nsteps)
-    {
-        fprintf(fp,
-                "\n%s converged to machine precision in %s steps,\n"
-                "but did not reach the requested Fmax < %g.\n",
-                alg, gmx_step_str(count, buf), ftol);
-    }
-    else
-    {
-        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol,
-                gmx_step_str(count, buf));
-    }
-
-#if GMX_DOUBLE
-    fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
-    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm / sqrtNumAtoms);
-#else
-    fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
-    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm / sqrtNumAtoms);
-#endif
-}
-
-//! Compute the norm and max of the force array in parallel
-static void get_f_norm_max(const t_commrec* cr,
-                           t_grpopts*       opts,
-                           t_mdatoms*       mdatoms,
-                           const rvec*      f,
-                           real*            fnorm,
-                           real*            fmax,
-                           int*             a_fmax)
-{
-    double fnorm2, *sum;
-    real   fmax2, fam;
-    int    la_max, a_max, start, end, i, m, gf;
-
-    /* This routine finds the largest force and returns it.
-     * On parallel machines the global max is taken.
-     */
-    fnorm2 = 0;
-    fmax2  = 0;
-    la_max = -1;
-    start  = 0;
-    end    = mdatoms->homenr;
-    if (mdatoms->cFREEZE)
-    {
-        for (i = start; i < end; i++)
-        {
-            gf  = mdatoms->cFREEZE[i];
-            fam = 0;
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    fam += gmx::square(f[i][m]);
-                }
-            }
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-    else
-    {
-        for (i = start; i < end; i++)
-        {
-            fam = norm2(f[i]);
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-
-    if (la_max >= 0 && DOMAINDECOMP(cr))
-    {
-        a_max = cr->dd->globalAtomIndices[la_max];
-    }
-    else
-    {
-        a_max = la_max;
-    }
-    if (PAR(cr))
-    {
-        snew(sum, 2 * cr->nnodes + 1);
-        sum[2 * cr->nodeid]     = fmax2;
-        sum[2 * cr->nodeid + 1] = a_max;
-        sum[2 * cr->nnodes]     = fnorm2;
-        gmx_sumd(2 * cr->nnodes + 1, sum, cr);
-        fnorm2 = sum[2 * cr->nnodes];
-        /* Determine the global maximum */
-        for (i = 0; i < cr->nnodes; i++)
-        {
-            if (sum[2 * i] > fmax2)
-            {
-                fmax2 = sum[2 * i];
-                a_max = gmx::roundToInt(sum[2 * i + 1]);
-            }
-        }
-        sfree(sum);
-    }
-
-    if (fnorm)
-    {
-        *fnorm = sqrt(fnorm2);
-    }
-    if (fmax)
-    {
-        *fmax = sqrt(fmax2);
-    }
-    if (a_fmax)
-    {
-        *a_fmax = a_max;
-    }
-}
-
-//! Compute the norm of the force
-static void get_state_f_norm_max(const t_commrec* cr, t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems)
-{
-    get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(), &ems->fnorm, &ems->fmax, &ems->a_fmax);
-}
-
-//! Initialize the energy minimization
-static void init_em(FILE*                fplog,
-                    const gmx::MDLogger& mdlog,
-                    const char*          title,
-                    const t_commrec*     cr,
-                    const gmx_multisim_t *ms, /* PLUMED */
-                    t_inputrec*          ir,
-                    gmx::ImdSession*     imdSession,
-                    pull_t*              pull_work,
-                    t_state*             state_global,
-                    gmx_mtop_t*          top_global,
-                    em_state_t*          ems,
-                    gmx_localtop_t*      top,
-                    t_nrnb*              nrnb,
-                    t_forcerec*          fr,
-                    t_graph**            graph,
-                    gmx::MDAtoms*        mdAtoms,
-                    gmx_global_stat_t*   gstat,
-                    gmx_vsite_t*         vsite,
-                    gmx::Constraints*    constr,
-                    gmx_shellfc_t**      shellfc)
-{
-    real dvdl_constr;
-
-    if (fplog)
-    {
-        fprintf(fplog, "Initiating %s\n", title);
-    }
-
-    if (MASTER(cr))
-    {
-        state_global->ngtc = 0;
-    }
-    initialize_lambdas(fplog, *ir, MASTER(cr), &(state_global->fep_state), state_global->lambda, nullptr);
-
-    if (ir->eI == eiNM)
-    {
-        GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
-
-        *shellfc = init_shell_flexcon(stdout, top_global, constr ? constr->numFlexibleConstraints() : 0,
-                                      ir->nstcalcenergy, DOMAINDECOMP(cr));
-    }
-    else
-    {
-        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI),
-                   "This else currently only handles energy minimizers, consider if your algorithm "
-                   "needs shell/flexible-constraint support");
-
-        /* With energy minimization, shells and flexible constraints are
-         * automatically minimized when treated like normal DOFS.
-         */
-        if (shellfc != nullptr)
-        {
-            *shellfc = nullptr;
-        }
-    }
-
-    auto mdatoms = mdAtoms->mdatoms();
-    if (DOMAINDECOMP(cr))
-    {
-        top->useInDomainDecomp_ = true;
-        dd_init_local_top(*top_global, top);
-
-        dd_init_local_state(cr->dd, state_global, &ems->s);
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
-                            imdSession, pull_work, &ems->s, &ems->f, mdAtoms, top, fr, vsite,
-                            constr, nrnb, nullptr, FALSE);
-        dd_store_state(cr->dd, &ems->s);
-
-        *graph = nullptr;
-    }
-    else
-    {
-        state_change_natoms(state_global, state_global->natoms);
-        /* Just copy the state */
-        ems->s = *state_global;
-        state_change_natoms(&ems->s, ems->s.natoms);
-        ems->f.resizeWithPadding(ems->s.natoms);
-
-        mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, graph, mdAtoms, constr, vsite,
-                                  shellfc ? *shellfc : nullptr);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, top, mdatoms);
-        }
-    }
-
-    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);
-
-    if (constr)
-    {
-        // TODO how should this cross-module support dependency be managed?
-        if (ir->eConstrAlg == econtSHAKE && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
-        {
-            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
-                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
-        }
-
-        if (!ir->bContinuation)
-        {
-            /* Constrain the starting coordinates */
-            dvdl_constr = 0;
-            constr->apply(TRUE, TRUE, -1, 0, 1.0, ems->s.x.rvec_array(), ems->s.x.rvec_array(),
-                          nullptr, ems->s.box, ems->s.lambda[efptFEP], &dvdl_constr, nullptr,
-                          nullptr, gmx::ConstraintVariable::Positions);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        *gstat = global_stat_init(ir);
-    }
-    else
-    {
-        *gstat = nullptr;
-    }
-
-    calc_shifts(ems->s.box, fr->shift_vec);
-
-    /* PLUMED */
-    if(plumedswitch){
-      if(ms && ms->nsim>1) {
-        if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mpi_comm_masters);
-        if(PAR(cr)){
-          if(DOMAINDECOMP(cr)) {
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
-          }else{
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
-          }
-        }
-        plumed_cmd(plumedmain,"GREX init",NULL);
-      }
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
-        }else{
-          plumed_cmd(plumedmain,"setMPIComm",&cr->mpi_comm_mysim);
-        }
-      }
-      plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
-      plumed_cmd(plumedmain,"setMDEngine","gromacs");
-      plumed_cmd(plumedmain,"setLog",fplog);
-      real real_delta_t;
-      real_delta_t=ir->delta_t;
-      plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
-      plumed_cmd(plumedmain,"init",NULL);
-
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          int nat_home = dd_numHomeAtoms(*cr->dd);
-          plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
-          plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
-
-        }
-      }
-    }
-    /* END PLUMED */
-}
-
-//! Finalize the minimization
-static void finish_em(const t_commrec*          cr,
-                      gmx_mdoutf_t              outf,
-                      gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t           wcycle)
-{
-    if (!thisRankHasDuty(cr, DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    done_mdoutf(outf);
-
-    em_time_end(walltime_accounting, wcycle);
-}
-
-//! Swap two different EM states during minimization
-static void swap_em_state(em_state_t** ems1, em_state_t** ems2)
-{
-    em_state_t* tmp;
-
-    tmp   = *ems1;
-    *ems1 = *ems2;
-    *ems2 = tmp;
-}
-
-//! Save the EM trajectory
-static void write_em_traj(FILE*               fplog,
-                          const t_commrec*    cr,
-                          gmx_mdoutf_t        outf,
-                          gmx_bool            bX,
-                          gmx_bool            bF,
-                          const char*         confout,
-                          gmx_mtop_t*         top_global,
-                          t_inputrec*         ir,
-                          int64_t             step,
-                          em_state_t*         state,
-                          t_state*            state_global,
-                          ObservablesHistory* observablesHistory)
-{
-    int mdof_flags = 0;
-
-    if (bX)
-    {
-        mdof_flags |= MDOF_X;
-    }
-    if (bF)
-    {
-        mdof_flags |= MDOF_F;
-    }
-
-    /* If we want IMD output, set appropriate MDOF flag */
-    if (ir->bIMD)
-    {
-        mdof_flags |= MDOF_IMD;
-    }
-
-    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
-                                     static_cast<double>(step), &state->s, state_global,
-                                     observablesHistory, state->f);
-
-    if (confout != nullptr)
-    {
-        if (DOMAINDECOMP(cr))
-        {
-            /* If bX=true, x was collected to state_global in the call above */
-            if (!bX)
-            {
-                auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
-                dd_collect_vec(cr->dd, &state->s, state->s.x, globalXRef);
-            }
-        }
-        else
-        {
-            /* Copy the local state pointer */
-            state_global = &state->s;
-        }
-
-        if (MASTER(cr))
-        {
-            if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
-            {
-                /* Make molecules whole only for confout writing */
-                do_pbc_mtop(ir->ePBC, state->s.box, top_global, state_global->x.rvec_array());
-            }
-
-            write_sto_conf_mtop(confout, *top_global->name, top_global,
-                                state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box);
-        }
-    }
-}
-
-//! \brief Do one minimization step
-//
-// \returns true when the step succeeded, false when a constraint error occurred
-static bool do_em_step(const t_commrec*                   cr,
-                       t_inputrec*                        ir,
-                       t_mdatoms*                         md,
-                       em_state_t*                        ems1,
-                       real                               a,
-                       const PaddedHostVector<gmx::RVec>* force,
-                       em_state_t*                        ems2,
-                       gmx::Constraints*                  constr,
-                       int64_t                            count)
-
-{
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
-    int nthreads gmx_unused;
-
-    bool validStep = true;
-
-    s1 = &ems1->s;
-    s2 = &ems2->s;
-
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-    {
-        gmx_incons("state mismatch in do_em_step");
-    }
-
-    s2->flags = s1->flags;
-
-    if (s2->natoms != s1->natoms)
-    {
-        state_change_natoms(s2, s1->natoms);
-        ems2->f.resizeWithPadding(s2->natoms);
-    }
-    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
-    {
-        s2->cg_gl.resize(s1->cg_gl.size());
-    }
-
-    copy_mat(s1->box, s2->box);
-    /* Copy free energy state */
-    s2->lambda = s1->lambda;
-    copy_mat(s1->box, s2->box);
-
-    start = 0;
-    end   = md->homenr;
-
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
-#pragma omp parallel num_threads(nthreads)
-    {
-        const rvec* x1 = s1->x.rvec_array();
-        rvec*       x2 = s2->x.rvec_array();
-        const rvec* f  = force->rvec_array();
-
-        int gf = 0;
-#pragma omp for schedule(static) nowait
-        for (int i = start; i < end; i++)
-        {
-            try
-            {
-                if (md->cFREEZE)
-                {
-                    gf = md->cFREEZE[i];
-                }
-                for (int m = 0; m < DIM; m++)
-                {
-                    if (ir->opts.nFreeze[gf][m])
-                    {
-                        x2[i][m] = x1[i][m];
-                    }
-                    else
-                    {
-                        x2[i][m] = x1[i][m] + a * f[i][m];
-                    }
-                }
-            }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-        }
-
-        if (s2->flags & (1 << estCGP))
-        {
-            /* Copy the CG p vector */
-            const rvec* p1 = s1->cg_p.rvec_array();
-            rvec*       p2 = s2->cg_p.rvec_array();
-#pragma omp for schedule(static) nowait
-            for (int i = start; i < end; i++)
-            {
-                // Trivial OpenMP block that does not throw
-                copy_rvec(p1[i], p2[i]);
-            }
-        }
-
-        if (DOMAINDECOMP(cr))
-        {
-            /* OpenMP does not supported unsigned loop variables */
-#pragma omp for schedule(static) nowait
-            for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++)
-            {
-                s2->cg_gl[i] = s1->cg_gl[i];
-            }
-        }
-    }
-
-    if (DOMAINDECOMP(cr))
-    {
-        s2->ddp_count       = s1->ddp_count;
-        s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-    }
-
-    if (constr)
-    {
-        dvdl_constr = 0;
-        validStep = constr->apply(TRUE, TRUE, count, 0, 1.0, s1->x.rvec_array(), s2->x.rvec_array(),
-                                  nullptr, s2->box, s2->lambda[efptBONDED], &dvdl_constr, nullptr,
-                                  nullptr, gmx::ConstraintVariable::Positions);
-
-        if (cr->nnodes > 1)
-        {
-            /* This global reduction will affect performance at high
-             * parallelization, but we can not really avoid it.
-             * But usually EM is not run at high parallelization.
-             */
-            int reductionBuffer = static_cast<int>(!validStep);
-            gmx_sumi(1, &reductionBuffer, cr);
-            validStep = (reductionBuffer == 0);
-        }
-
-        // We should move this check to the different minimizers
-        if (!validStep && ir->eI != eiSteep)
-        {
-            gmx_fatal(FARGS,
-                      "The coordinates could not be constrained. Minimizer '%s' can not handle "
-                      "constraint failures, use minimizer '%s' before using '%s'.",
-                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
-        }
-    }
-
-    return validStep;
-}
-
-//! Prepare EM for using domain decomposition parallellization
-static void em_dd_partition_system(FILE*                fplog,
-                                   const gmx::MDLogger& mdlog,
-                                   int                  step,
-                                   const t_commrec*     cr,
-                                   gmx_mtop_t*          top_global,
-                                   t_inputrec*          ir,
-                                   gmx::ImdSession*     imdSession,
-                                   pull_t*              pull_work,
-                                   em_state_t*          ems,
-                                   gmx_localtop_t*      top,
-                                   gmx::MDAtoms*        mdAtoms,
-                                   t_forcerec*          fr,
-                                   gmx_vsite_t*         vsite,
-                                   gmx::Constraints*    constr,
-                                   t_nrnb*              nrnb,
-                                   gmx_wallcycle_t      wcycle)
-{
-    /* Repartition the domain decomposition */
-    dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, nullptr, *top_global, ir, imdSession, pull_work,
-                        &ems->s, &ems->f, mdAtoms, top, fr, vsite, constr, nrnb, wcycle, FALSE);
-    dd_store_state(cr->dd, &ems->s);
-}
-
-namespace
-{
-
-/*! \brief Class to handle the work of setting and doing an energy evaluation.
- *
- * This class is a mere aggregate of parameters to pass to evaluate an
- * energy, so that future changes to names and types of them consume
- * less time when refactoring other code.
- *
- * Aggregate initialization is used, for which the chief risk is that
- * if a member is added at the end and not all initializer lists are
- * updated, then the member will be value initialized, which will
- * typically mean initialization to zero.
- *
- * Use a braced initializer list to construct one of these. */
-class EnergyEvaluator
-{
-public:
-    /*! \brief Evaluates an energy on the state in \c ems.
-     *
-     * \todo In practice, the same objects mu_tot, vir, and pres
-     * are always passed to this function, so we would rather have
-     * them as data members. However, their C-array types are
-     * unsuited for aggregate initialization. When the types
-     * improve, the call signature of this method can be reduced.
-     */
-    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
-    //! Handles logging (deprecated).
-    FILE* fplog;
-    //! Handles logging.
-    const gmx::MDLogger& mdlog;
-    //! Handles communication.
-    const t_commrec* cr;
-    //! Coordinates multi-simulations.
-    const gmx_multisim_t* ms;
-    //! Holds the simulation topology.
-    gmx_mtop_t* top_global;
-    //! Holds the domain topology.
-    gmx_localtop_t* top;
-    //! User input options.
-    t_inputrec* inputrec;
-    //! The Interactive Molecular Dynamics session.
-    gmx::ImdSession* imdSession;
-    //! The pull work object.
-    pull_t* pull_work;
-    //! Manages flop accounting.
-    t_nrnb* nrnb;
-    //! Manages wall cycle accounting.
-    gmx_wallcycle_t wcycle;
-    //! Coordinates global reduction.
-    gmx_global_stat_t gstat;
-    //! Handles virtual sites.
-    gmx_vsite_t* vsite;
-    //! Handles constraints.
-    gmx::Constraints* constr;
-    //! Handles strange things.
-    t_fcdata* fcd;
-    //! Molecular graph for SHAKE.
-    t_graph* graph;
-    //! Per-atom data for this domain.
-    gmx::MDAtoms* mdAtoms;
-    //! Handles how to calculate the forces.
-    t_forcerec* fr;
-    //! Schedule of force-calculation work each step for this task.
-    MdrunScheduleWorkload* runScheduleWork;
-    //! Stores the computed energies.
-    gmx_enerdata_t* enerd;
-};
-
-void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
-{
-    real     t;
-    gmx_bool bNS;
-    tensor   force_vir, shake_vir, ekin;
-    real     dvdl_constr;
-    real     terminate = 0;
-
-    /* Set the time to the initial time, the time does not change during EM */
-    t = inputrec->init_t;
-
-    if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
-    {
-        /* This is the first state or an old state used before the last ns */
-        bNS = TRUE;
-    }
-    else
-    {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr, top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
-    {
-        /* Repartition the domain decomposition */
-        em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work,
-                               ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-    }
-
-    /* Calc force & energy on new trial position  */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    /* PLUMED */
-    int plumedNeedsEnergy=0;
-    matrix plumed_vir;
-    if(plumedswitch){
-      long int lstep=count; plumed_cmd(plumedmain,"setStepLong",&lstep);
-      plumed_cmd(plumedmain,"setPositions",&ems->s.x[0][0]);
-      plumed_cmd(plumedmain,"setMasses",&mdAtoms->mdatoms()->massT[0]);
-      plumed_cmd(plumedmain,"setCharges",&mdAtoms->mdatoms()->chargeA[0]);
-      plumed_cmd(plumedmain,"setBox",&ems->s.box[0][0]);
-      plumed_cmd(plumedmain,"prepareCalc",NULL);
-      plumed_cmd(plumedmain,"setForces",&ems->f[0][0]);
-      plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-      clear_mat(plumed_vir);
-      plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]);
-    }
-    /* END PLUMED */
-
-    do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession, pull_work, count, nrnb, wcycle,
-             top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist,
-             ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd, ems->s.lambda,
-             graph, fr, runScheduleWork, vsite, mu_tot, t, nullptr,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
-                     | (bNS ? GMX_FORCE_NS : 0),
-             DDBalanceRegionHandler(cr));
-
-    /* PLUMED */
-    if(plumedswitch){
-      if(plumedNeedsEnergy) {
-        msmul(force_vir,2.0,plumed_vir);
-        plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
-        plumed_cmd(plumedmain,"performCalc",NULL);
-        msmul(plumed_vir,0.5,force_vir);
-      } else {
-        msmul(plumed_vir,0.5,plumed_vir);
-        m_add(force_vir,plumed_vir,force_vir);
-      }
-    }
-    /* END PLUMED */
-
-    /* Clear the unused shake virial and pressure */
-    clear_mat(shake_vir);
-    clear_mat(pres);
-
-    /* Communicate stuff when parallel */
-    if (PAR(cr) && inputrec->eI != eiNM)
-    {
-        wallcycle_start(wcycle, ewcMoveE);
-
-        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, inputrec, nullptr, nullptr, nullptr,
-                    1, &terminate, nullptr, FALSE, CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
-
-        wallcycle_stop(wcycle, ewcMoveE);
-    }
-
-    if (fr->dispersionCorrection)
-    {
-        /* Calculate long range corrections to pressure and energy */
-        const DispersionCorrection::Correction correction =
-                fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]);
-
-        enerd->term[F_DISPCORR] = correction.energy;
-        enerd->term[F_EPOT] += correction.energy;
-        enerd->term[F_PRES] += correction.pressure;
-        enerd->term[F_DVDL] += correction.dvdl;
-    }
-    else
-    {
-        enerd->term[F_DISPCORR] = 0;
-    }
-
-    ems->epot = enerd->term[F_EPOT];
-
-    if (constr)
-    {
-        /* Project out the constraint components of the force */
-        dvdl_constr  = 0;
-        rvec* f_rvec = ems->f.rvec_array();
-        constr->apply(FALSE, FALSE, count, 0, 1.0, ems->s.x.rvec_array(), f_rvec, f_rvec,
-                      ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr, nullptr, &shake_vir,
-                      gmx::ConstraintVariable::ForceDispl);
-        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        m_add(force_vir, shake_vir, vir);
-    }
-    else
-    {
-        copy_mat(force_vir, vir);
-    }
-
-    clear_mat(ekin);
-    enerd->term[F_PRES] = calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
-
-    sum_dhdl(enerd, ems->s.lambda, *inputrec->fepvals);
-
-    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
-    {
-        get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
-    }
-}
-
-} // namespace
-
-//! Parallel utility summing energies and forces
-static double reorder_partsum(const t_commrec* cr,
-                              t_grpopts*       opts,
-                              gmx_mtop_t*      top_global,
-                              em_state_t*      s_min,
-                              em_state_t*      s_b)
-{
-    if (debug)
-    {
-        fprintf(debug, "Doing reorder_partsum\n");
-    }
-
-    const rvec* fm = s_min->f.rvec_array();
-    const rvec* fb = s_b->f.rvec_array();
-
-    /* Collect fm in a global vector fmg.
-     * This conflicts with the spirit of domain decomposition,
-     * but to fully optimize this a much more complicated algorithm is required.
-     */
-    const int natoms = top_global->natoms;
-    rvec*     fmg;
-    snew(fmg, natoms);
-
-    gmx::ArrayRef<const int> indicesMin = s_min->s.cg_gl;
-    int                      i          = 0;
-    for (int a : indicesMin)
-    {
-        copy_rvec(fm[i], fmg[a]);
-        i++;
-    }
-    gmx_sum(top_global->natoms * 3, fmg[0], cr);
-
-    /* Now we will determine the part of the sum for the cgs in state s_b */
-    gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
-
-    double partsum                  = 0;
-    i                               = 0;
-    int                          gf = 0;
-    gmx::ArrayRef<unsigned char> grpnrFREEZE =
-            top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
-    for (int a : indicesB)
-    {
-        if (!grpnrFREEZE.empty())
-        {
-            gf = grpnrFREEZE[i];
-        }
-        for (int m = 0; m < DIM; m++)
-        {
-            if (!opts->nFreeze[gf][m])
-            {
-                partsum += (fb[i][m] - fmg[a][m]) * fb[i][m];
-            }
-        }
-        i++;
-    }
-
-    sfree(fmg);
-
-    return partsum;
-}
-
-//! Print some stuff, like beta, whatever that means.
-static real pr_beta(const t_commrec* cr,
-                    t_grpopts*       opts,
-                    t_mdatoms*       mdatoms,
-                    gmx_mtop_t*      top_global,
-                    em_state_t*      s_min,
-                    em_state_t*      s_b)
-{
-    double sum;
-
-    /* This is just the classical Polak-Ribiere calculation of beta;
-     * it looks a bit complicated since we take freeze groups into account,
-     * and might have to sum it in parallel runs.
-     */
-
-    if (!DOMAINDECOMP(cr)
-        || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
-    {
-        const rvec* fm = s_min->f.rvec_array();
-        const rvec* fb = s_b->f.rvec_array();
-        sum            = 0;
-        int gf         = 0;
-        /* This part of code can be incorrect with DD,
-         * since the atom ordering in s_b and s_min might differ.
-         */
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (int m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    sum += (fb[i][m] - fm[i][m]) * fb[i][m];
-                }
-            }
-        }
-    }
-    else
-    {
-        /* We need to reorder cgs while summing */
-        sum = reorder_partsum(cr, opts, top_global, s_min, s_b);
-    }
-    if (PAR(cr))
-    {
-        gmx_sumd(1, &sum, cr);
-    }
-
-    return sum / gmx::square(s_min->fnorm);
-}
-
-namespace gmx
-{
-
-void LegacySimulator::do_cg()
-{
-    const char* CG = "Polak-Ribiere Conjugate Gradients";
-
-    gmx_localtop_t    top;
-    gmx_global_stat_t gstat;
-    t_graph*          graph;
-    double            tmp, minstep;
-    real              stepsize;
-    real              a, b, c, beta = 0.0;
-    real              epot_repl = 0;
-    real              pnorm;
-    gmx_bool          converged, foundlower;
-    rvec              mu_tot = { 0 };
-    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
-    tensor            vir, pres;
-    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
-    int               m, step, nminstep;
-    auto              mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating conjugate gradient energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    step = 0;
-
-    if (MASTER(cr))
-    {
-        // In CG, the state is extended with a search direction
-        state_global->flags |= (1 << estCGP);
-
-        // Ensure the extra per-atom state array gets allocated
-        state_change_natoms(state_global, state_global->natoms);
-
-        // Initialize the search direction to zero
-        for (RVec& cg_p : state_global->cg_p)
-        {
-            cg_p = { 0, 0, 0 };
-        }
-    }
-
-    /* Create 4 states on the stack and extract pointers that we will swap */
-    em_state_t  s0{}, s1{}, s2{}, s3{};
-    em_state_t* s_min = &s0;
-    em_state_t* s_a   = &s1;
-    em_state_t* s_b   = &s2;
-    em_state_t* s_c   = &s3;
-
-    /* Init em and store the local state in s_min */
-    init_em(fplog, mdlog, CG, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_min,
-            &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        sp_header(stderr, CG, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, CG, inputrec->em_tol, number_steps);
-    }
-
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                         enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                         nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
-                                           step, fcd, nullptr);
-    }
-
-    /* Estimate/guess the initial stepsize */
-    stepsize = inputrec->em_stepsize / s_min->fnorm;
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-    /* Start the loop over CG steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
-    {
-
-        /* start taking steps in a new direction
-         * First time we enter the routine, beta=0, and the direction is
-         * simply the negative gradient.
-         */
-
-        /* Calculate the new direction in p, and the gradient in this direction, gpa */
-        rvec*       pm  = s_min->s.cg_p.rvec_array();
-        const rvec* sfm = s_min->f.rvec_array();
-        double      gpa = 0;
-        int         gf  = 0;
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!inputrec->opts.nFreeze[gf][m])
-                {
-                    pm[i][m] = sfm[i][m] + beta * pm[i][m];
-                    gpa -= pm[i][m] * sfm[i][m];
-                    /* f is negative gradient, thus the sign */
-                }
-                else
-                {
-                    pm[i][m] = 0;
-                }
-            }
-        }
-
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpa, cr);
-        }
-
-        /* Calculate the norm of the search vector */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);
-
-        /* Just in case stepsize reaches zero due to numerical precision... */
-        if (stepsize <= 0)
-        {
-            stepsize = inputrec->em_stepsize / pnorm;
-        }
-
-        /*
-         * Double check the value of the derivative in the search direction.
-         * If it is positive it must be due to the old information in the
-         * CG formula, so just remove that and start over with beta=0.
-         * This corresponds to a steepest descent step.
-         */
-        if (gpa > 0)
-        {
-            beta = 0;
-            step--;   /* Don't count this step since we are restarting */
-            continue; /* Go back to the beginning of the big for-loop */
-        }
-
-        /* Calculate minimum allowed stepsize, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        minstep      = 0;
-        auto s_min_x = makeArrayRef(s_min->s.x);
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                tmp = fabs(s_min_x[i][m]);
-                if (tmp < 1.0)
-                {
-                    tmp = 1.0;
-                }
-                tmp = pm[i][m] / tmp;
-                minstep += tmp * tmp;
-            }
-        }
-        /* Add up from all CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &minstep, cr);
-        }
-
-        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms));
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min,
-                      state_global, observablesHistory);
-
-        /* Take a step downhill.
-         * In theory, we should minimize the function along this direction.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new CG step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * the continue straight to the next CG step without trying to find any minimum.
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to even accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-        s_a->epot = s_min->epot;
-        a         = 0.0;
-        c         = a + stepsize; /* reference position along line is zero */
-
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
-        {
-            em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession,
-                                   pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-        }
-
-        /* Take a trial step (new coords in s_c) */
-        do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c, constr, -1);
-
-        neval++;
-        /* Calculate energy for the trial step */
-        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
-
-        /* Calc derivative along line */
-        const rvec* pc  = s_c->s.cg_p.rvec_array();
-        const rvec* sfc = s_c->f.rvec_array();
-        double      gpc = 0;
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */
-            }
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        /* This is the max amount of increase in energy we tolerate */
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot);
-
-        /* Accept the step if the energy is lower, or if it is not significantly higher
-         * and the line derivative is still negative.
-         */
-        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
-        {
-            foundlower = TRUE;
-            /* Great, we found a better energy. Increase step for next iteration
-             * if we are still going down, decrease it otherwise
-             */
-            if (gpc < 0)
-            {
-                stepsize *= 1.618034; /* The golden section */
-            }
-            else
-            {
-                stepsize *= 0.618034; /* 1/golden section */
-            }
-        }
-        else
-        {
-            /* New energy is the same or higher. We will have to do some work
-             * to find a smaller value in the interval. Take smaller step next time!
-             */
-            foundlower = FALSE;
-            stepsize *= 0.618034;
-        }
-
-
-        /* OK, if we didn't find a lower value we will have to locate one now - there must
-         * be one in the interval [a=0,c].
-         * The same thing is valid here, though: Don't spend dozens of iterations to find
-         * the line minimum. We try to interpolate based on the derivative at the endpoints,
-         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
-         *
-         * I also have a safeguard for potentially really pathological functions so we never
-         * take more than 20 steps before we give up ...
-         *
-         * If we already found a lower value we just skip this step and continue to the update.
-         */
-        double gpb;
-        if (!foundlower)
-        {
-            nminstep = 0;
-
-            do
-            {
-                /* Select a new trial point.
-                 * If the derivatives at points a & c have different sign we interpolate to zero,
-                 * otherwise just do a bisection.
-                 */
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa * (a - c) / (gpc - gpa);
-                }
-                else
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-                {
-                    /* Reload the old state */
-                    em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession, pull_work,
-                                           s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-                }
-
-                /* Take a trial step to this new point - new coords in s_b */
-                do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b, constr, -1);
-
-                neval++;
-                /* Calculate energy for the trial step */
-                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
-
-                /* p does not change within a step, but since the domain decomposition
-                 * might change, we have to use cg_p of s_b here.
-                 */
-                const rvec* pb  = s_b->s.cg_p.rvec_array();
-                const rvec* sfb = s_b->f.rvec_array();
-                gpb             = 0;
-                for (int i = 0; i < mdatoms->homenr; i++)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */
-                    }
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                if (debug)
-                {
-                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot,
-                            s_c->epot, gpb);
-                }
-
-                epot_repl = s_b->epot;
-
-                /* Keep one of the intervals based on the value of the derivative at the new point */
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    swap_em_state(&s_b, &s_c);
-                    c   = b;
-                    gpc = gpb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    swap_em_state(&s_b, &s_a);
-                    a   = b;
-                    gpa = gpb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20));
-
-            if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If beta==0 this was steepest descent, and then we give up.
-                 * If not, set beta=0 and restart with steepest descent before quitting.
-                 */
-                if (beta == 0.0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory before giving up */
-                    beta = 0.0;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in B.
-             */
-            if (s_c->epot < s_a->epot)
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot,
-                            s_a->epot);
-                }
-                swap_em_state(&s_b, &s_c);
-                gpb = gpc;
-            }
-            else
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot,
-                            s_c->epot);
-                }
-                swap_em_state(&s_b, &s_a);
-                gpb = gpa;
-            }
-        }
-        else
-        {
-            if (debug)
-            {
-                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot);
-            }
-            swap_em_state(&s_b, &s_c);
-            gpb = gpc;
-        }
-
-        /* new search direction */
-        /* beta = 0 means forget all memory and restart with steepest descents. */
-        if (nstcg && ((step % nstcg) == 0))
-        {
-            beta = 0.0;
-        }
-        else
-        {
-            /* s_min->fnorm cannot be zero, because then we would have converged
-             * and broken out.
-             */
-
-            /* Polak-Ribiere update.
-             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
-             */
-            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
-        }
-        /* Limit beta to prevent oscillations */
-        if (fabs(beta) > 5.0)
-        {
-            beta = 0.0;
-        }
-
-
-        /* update positions */
-        swap_em_state(&s_min, &s_b);
-        gpa = gpb;
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
-                        s_min->epot, s_min->fnorm / sqrtNumAtoms, s_min->fmax, s_min->a_fmax + 1);
-                fflush(stderr);
-            }
-            /* Store the new (lower) energies */
-            matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                             enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                             nullptr, vir, pres, nullptr, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            imdSession->fillEnergyRecord(step, TRUE);
-
-            if (do_log)
-            {
-                energyOutput.printHeader(fplog, step, step);
-            }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-
-        /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (s_min->fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-    }
-    if (s_min->fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    if (MASTER(cr))
-    {
-        /* If we printed energy and/or logfile last step (which was the last step)
-         * we don't have to do it again, but otherwise print the final values.
-         */
-        if (!do_log)
-        {
-            /* Write final value to log since we didn't do anything the last step */
-            energyOutput.printHeader(fplog, step, step);
-        }
-        if (!do_ene || !do_log)
-        {
-            /* Write final energy file entries */
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                               !do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    /* Note that with 0 < nstfout != nstxout we can end up with two frames
-     * in the trajectory with the same step number.
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
-
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
-                  step, s_min, state_global, observablesHistory);
-
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
-        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-}
-
-
-void LegacySimulator::do_lbfgs()
-{
-    static const char* LBFGS = "Low-Memory BFGS Minimizer";
-    em_state_t         ems;
-    gmx_localtop_t     top;
-    gmx_global_stat_t  gstat;
-    t_graph*           graph;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real *             rho, *alpha, *p, *s, **dx, **dg;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0;
-    real               dgdx, dgdg, sq, yr, beta;
-    gmx_bool           converged;
-    rvec               mu_tot = { 0 };
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    int                i, k, m, n, gf, step;
-    int                mdof_flags;
-    auto               mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating L-BFGS energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    if (PAR(cr))
-    {
-        gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
-    }
-
-    if (nullptr != constr)
-    {
-        gmx_fatal(
-                FARGS,
-                "The combination of constraints and L-BFGS minimization is not implemented. Either "
-                "do not use constraints, or use another minimizer (e.g. steepest descent).");
-    }
-
-    n        = 3 * state_global->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
-
-    snew(frozen, n);
-
-    snew(p, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dx[i], n);
-    }
-
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dg[i], n);
-    }
-
-    step  = 0;
-    neval = 0;
-
-    /* Init em */
-    init_em(fplog, mdlog, LBFGS, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global,
-            &ems, &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    start = 0;
-    end   = mdatoms->homenr;
-
-    /* We need 4 working states */
-    em_state_t  s0{}, s1{}, s2{}, s3{};
-    em_state_t* sa   = &s0;
-    em_state_t* sb   = &s1;
-    em_state_t* sc   = &s2;
-    em_state_t* last = &s3;
-    /* Initialize by copying the state from ems (we could skip x and f here) */
-    *sa = ems;
-    *sb = ems;
-    *sc = ems;
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
-
-    do_log = do_ene = do_x = do_f = TRUE;
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
-    {
-        if (mdatoms->cFREEZE)
-        {
-            gf = mdatoms->cFREEZE[i];
-        }
-        for (m = 0; m < DIM; m++)
-        {
-            frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
-        }
-    }
-    if (MASTER(cr))
-    {
-        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr, top.idef.iparams,
-                         top.idef.il, fr->ePBC, fr->bMolPBC, cr, state_global->box);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole
-     */
-    neval++;
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                         enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                         nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
-                                           step, fcd, nullptr);
-    }
-
-    /* Set the initial step.
-     * since it will be multiplied by the non-normalized search direction
-     * vector (force vector the first time), we scale it by the
-     * norm of the force.
-     */
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-
-    // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
-
-    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
-    real* fInit = static_cast<real*>(ems.f.rvec_array()[0]);
-    for (i = 0; i < n; i++)
-    {
-        if (!frozen[i])
-        {
-            dx[point][i] = fInit[i]; /* Initial search direction */
-        }
-        else
-        {
-            dx[point][i] = 0;
-        }
-    }
-
-    // Stepsize will be modified during the search, and actually it is not critical
-    // (the main efficiency in the algorithm comes from changing directions), but
-    // we still need an initial value, so estimate it as the inverse of the norm
-    // so we take small steps where the potential fluctuates a lot.
-    stepsize = 1.0 / ems.fnorm;
-
-    /* Start the loop over BFGS steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-
-    ncorr = 0;
-
-    /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
-    {
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        mdof_flags = 0;
-        if (do_x)
-        {
-            mdof_flags |= MDOF_X;
-        }
-
-        if (do_f)
-        {
-            mdof_flags |= MDOF_F;
-        }
-
-        if (inputrec->bIMD)
-        {
-            mdof_flags |= MDOF_IMD;
-        }
-
-        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
-                                         static_cast<real>(step), &ems.s, state_global,
-                                         observablesHistory, ems.f);
-
-        /* Do the linesearching in the direction dx[point][0..(n-1)] */
-
-        /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
-
-        real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
-        real* ff = static_cast<real*>(ems.f.rvec_array()[0]);
-
-        // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
-        {
-            gpa -= s[i] * ff[i];
-        }
-
-        /* Calculate minimum allowed stepsize along the line, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        for (minstep = 0, i = 0; i < n; i++)
-        {
-            tmp = fabs(xx[i]);
-            if (tmp < 1.0)
-            {
-                tmp = 1.0;
-            }
-            tmp = s[i] / tmp;
-            minstep += tmp * tmp;
-        }
-        minstep = GMX_REAL_EPS / sqrt(minstep / n);
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        // Before taking any steps along the line, store the old position
-        *last       = ems;
-        real* lastx = static_cast<real*>(last->s.x.data()[0]);
-        real* lastf = static_cast<real*>(last->f.data()[0]);
-        Epot0       = ems.epot;
-
-        *sa = ems;
-
-        /* Take a step downhill.
-         * In theory, we should find the actual minimum of the function in this
-         * direction, somewhere along the line.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new BFGS step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * continue straight to the next BFGS step without trying to find any minimum,
-         * i.e. we change the search direction too. If the line was smooth, it is
-         * likely we are in a smooth region, and then it makes sense to take longer
-         * steps in the modified search direction too.
-         *
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one. Then we need to start by finding a lower
-         * value before we change search direction. Since the energy was apparently
-         * quite rough, we need to decrease the step size.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-
-        // State "A" is the first position along the line.
-        // reference position along line is initially zero
-        a = 0.0;
-
-        // Check stepsize first. We do not allow displacements
-        // larger than emstep.
-        //
-        do
-        {
-            // Pick a new position C by adding stepsize to A.
-            c = a + stepsize;
-
-            // Calculate what the largest change in any individual coordinate
-            // would be (translation along line * gradient along line)
-            maxdelta = 0;
-            for (i = 0; i < n; i++)
-            {
-                delta = c * s[i];
-                if (delta > maxdelta)
-                {
-                    maxdelta = delta;
-                }
-            }
-            // If any displacement is larger than the stepsize limit, reduce the step
-            if (maxdelta > inputrec->em_stepsize)
-            {
-                stepsize *= 0.1;
-            }
-        } while (maxdelta > inputrec->em_stepsize);
-
-        // Take a trial step and move the coordinate array xc[] to position C
-        real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
-        for (i = 0; i < n; i++)
-        {
-            xc[i] = lastx[i] + c * s[i];
-        }
-
-        neval++;
-        // Calculate energy for the trial step in position C
-        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
-
-        // Calc line gradient in position C
-        real* fc = static_cast<real*>(sc->f.rvec_array()[0]);
-        for (gpc = 0, i = 0; i < n; i++)
-        {
-            gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        // This is the max amount of increase in energy we tolerate.
-        // By allowing VERY small changes (close to numerical precision) we
-        // frequently find even better (lower) final energies.
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
-
-        // Accept the step if the energy is lower in the new position C (compared to A),
-        // or if it is not significantly higher and the line derivative is still negative.
-        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
-        // If true, great, we found a better energy. We no longer try to alter the
-        // stepsize, but simply accept this new better position. The we select a new
-        // search direction instead, which will be much more efficient than continuing
-        // to take smaller steps along a line. Set fnorm based on the new C position,
-        // which will be used to update the stepsize to 1/fnorm further down.
-
-        // If false, the energy is NOT lower in point C, i.e. it will be the same
-        // or higher than in point A. In this case it is pointless to move to point C,
-        // so we will have to do more iterations along the same line to find a smaller
-        // value in the interval [A=0.0,C].
-        // Here, A is still 0.0, but that will change when we do a search in the interval
-        // [0.0,C] below. That search we will do by interpolation or bisection rather
-        // than with the stepsize, so no need to modify it. For the next search direction
-        // it will be reset to 1/fnorm anyway.
-
-        if (!foundlower)
-        {
-            // OK, if we didn't find a lower value we will have to locate one now - there must
-            // be one in the interval [a,c].
-            // The same thing is valid here, though: Don't spend dozens of iterations to find
-            // the line minimum. We try to interpolate based on the derivative at the endpoints,
-            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
-            // I also have a safeguard for potentially really pathological functions so we never
-            // take more than 20 steps before we give up.
-            // If we already found a lower value we just skip this step and continue to the update.
-            real fnorm = 0;
-            nminstep   = 0;
-            do
-            {
-                // Select a new trial point B in the interval [A,C].
-                // If the derivatives at points a & c have different sign we interpolate to zero,
-                // otherwise just do a bisection since there might be multiple minima/maxima
-                // inside the interval.
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa * (a - c) / (gpc - gpa);
-                }
-                else
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                // Take a trial step to point B
-                real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
-                for (i = 0; i < n; i++)
-                {
-                    xb[i] = lastx[i] + b * s[i];
-                }
-
-                neval++;
-                // Calculate energy for the trial step in point B
-                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
-                fnorm = sb->fnorm;
-
-                // Calculate gradient in point B
-                real* fb = static_cast<real*>(sb->f.rvec_array()[0]);
-                for (gpb = 0, i = 0; i < n; i++)
-                {
-                    gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
-                // at the new point B, and rename the endpoints of this new interval A and C.
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    c = b;
-                    /* copy state b to c */
-                    *sc = *sb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    a = b;
-                    /* copy state b to a */
-                    *sa = *sb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints,
-                 * or if the tolerance is below machine precision.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
-
-            if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If ncorr==0 this was steepest descent, and then we give up.
-                 * If not, reset memory to restart as steepest descent before quitting.
-                 */
-                if (ncorr == 0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory */
-                    ncorr = 0;
-                    /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
-                    {
-                        dx[point][i] = ff[i];
-                    }
-                    /* Reset stepsize */
-                    stepsize = 1.0 / fnorm;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in xx/ff/Epot
-             */
-            if (sc->epot < sa->epot)
-            {
-                /* Use state C */
-                ems        = *sc;
-                step_taken = c;
-            }
-            else
-            {
-                /* Use state A */
-                ems        = *sa;
-                step_taken = a;
-            }
-        }
-        else
-        {
-            /* found lower */
-            /* Use state C */
-            ems        = *sc;
-            step_taken = c;
-        }
-
-        /* Update the memory information, and calculate a new
-         * approximation of the inverse hessian
-         */
-
-        /* Have new data in Epot, xx, ff */
-        if (ncorr < nmaxcorr)
-        {
-            ncorr++;
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            dg[point][i] = lastf[i] - ff[i];
-            dx[point][i] *= step_taken;
-        }
-
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
-        {
-            dgdg += dg[point][i] * dg[point][i];
-            dgdx += dg[point][i] * dx[point][i];
-        }
-
-        diag = dgdx / dgdg;
-
-        rho[point] = 1.0 / dgdx;
-        point++;
-
-        if (point >= nmaxcorr)
-        {
-            point = 0;
-        }
-
-        /* Update */
-        for (i = 0; i < n; i++)
-        {
-            p[i] = ff[i];
-        }
-
-        cp = point;
-
-        /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
-        {
-            cp--;
-            if (cp < 0)
-            {
-                cp = ncorr - 1;
-            }
-
-            sq = 0;
-            for (i = 0; i < n; i++)
-            {
-                sq += dx[cp][i] * p[i];
-            }
-
-            alpha[cp] = rho[cp] * sq;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] -= alpha[cp] * dg[cp][i];
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            p[i] *= diag;
-        }
-
-        /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
-        {
-            yr = 0;
-            for (i = 0; i < n; i++)
-            {
-                yr += p[i] * dg[cp][i];
-            }
-
-            beta = rho[cp] * yr;
-            beta = alpha[cp] - beta;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] += beta * dx[cp][i];
-            }
-
-            cp++;
-            if (cp >= ncorr)
-            {
-                cp = 0;
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            if (!frozen[i])
-            {
-                dx[point][i] = p[i];
-            }
-            else
-            {
-                dx[point][i] = 0;
-            }
-        }
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
-                        ems.epot, ems.fnorm / sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);
-                fflush(stderr);
-            }
-            /* Store the new (lower) energies */
-            matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                             enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                             nullptr, vir, pres, nullptr, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            imdSession->fillEnergyRecord(step, TRUE);
-
-            if (do_log)
-            {
-                energyOutput.printHeader(fplog, step, step);
-            }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-
-        /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        // Reset stepsize in we are doing more iterations
-        stepsize = 1.0;
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (ems.fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-    }
-    if (ems.fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    /* If we printed energy and/or logfile last step (which was the last step)
-     * we don't have to do it again, but otherwise print the final values.
-     */
-    if (!do_log) /* Write final value to log since we didn't do anythin last step */
-    {
-        energyOutput.printHeader(fplog, step, step);
-    }
-    if (!do_ene || !do_log) /* Write final energy file entries */
-    {
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                           !do_log ? fplog : nullptr, step, step, fcd, nullptr);
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
-                  step, &ems, state_global, observablesHistory);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
-        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-}
-
-void LegacySimulator::do_steep()
-{
-    const char*       SD = "Steepest Descents";
-    gmx_localtop_t    top;
-    gmx_global_stat_t gstat;
-    t_graph*          graph;
-    real              stepsize;
-    real              ustep;
-    gmx_bool          bDone, bAbort, do_x, do_f;
-    tensor            vir, pres;
-    rvec              mu_tot = { 0 };
-    int               nsteps;
-    int               count          = 0;
-    int               steps_accepted = 0;
-    auto              mdatoms        = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating steepest-descent energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    /* Create 2 states on the stack and extract pointers that we will swap */
-    em_state_t  s0{}, s1{};
-    em_state_t* s_min = &s0;
-    em_state_t* s_try = &s1;
-
-    /* Init em and store the local state in s_try */
-    init_em(fplog, mdlog, SD, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_try,
-            &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    /* Print to log file  */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
-
-    /* Set variables for stepsize (in nm). This is the largest
-     * step that we are going to make in any direction.
-     */
-    ustep    = inputrec->em_stepsize;
-    stepsize = 0;
-
-    /* Max number of steps  */
-    nsteps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        /* Print to the screen  */
-        sp_header(stderr, SD, inputrec->em_tol, nsteps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, SD, inputrec->em_tol, nsteps);
-    }
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-
-    /**** HERE STARTS THE LOOP ****
-     * count is the counter for the number of steps
-     * bDone will be TRUE when the minimization has converged
-     * bAbort will be TRUE when nsteps steps have been performed or when
-     * the stepsize becomes smaller than is reasonable for machine precision
-     */
-    count  = 0;
-    bDone  = FALSE;
-    bAbort = FALSE;
-    while (!bDone && !bAbort)
-    {
-        bAbort = (nsteps >= 0) && (count == nsteps);
-
-        /* set new coordinates, except for first step */
-        bool validStep = true;
-        if (count > 0)
-        {
-            validStep = do_em_step(cr, inputrec, mdatoms, s_min, stepsize, &s_min->f, s_try, constr, count);
-        }
-
-        if (validStep)
-        {
-            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
-        }
-        else
-        {
-            // Signal constraint error during stepping with energy=inf
-            s_try->epot = std::numeric_limits<real>::infinity();
-        }
-
-        if (MASTER(cr))
-        {
-            energyOutput.printHeader(fplog, count, count);
-        }
-
-        if (count == 0)
-        {
-            s_min->epot = s_try->epot;
-        }
-
-        /* Print it if necessary  */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
-                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax + 1,
-                        ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r');
-                fflush(stderr);
-            }
-
-            if ((count == 0) || (s_try->epot < s_min->epot))
-            {
-                /* Store the new (lower) energies  */
-                matrix nullBox = {};
-                energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(count), mdatoms->tmass,
-                                                 enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                                 nullptr, vir, pres, nullptr, mu_tot, constr);
-
-                imdSession->fillEnergyRecord(count, TRUE);
-
-                const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout);
-                const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout);
-                energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or,
-                                                   fplog, count, count, fcd, nullptr);
-                fflush(fplog);
-            }
-        }
-
-        /* Now if the new energy is smaller than the previous...
-         * or if this is the first step!
-         * or if we did random steps!
-         */
-
-        if ((count == 0) || (s_try->epot < s_min->epot))
-        {
-            steps_accepted++;
-
-            /* Test whether the convergence criterion is met...  */
-            bDone = (s_try->fmax < inputrec->em_tol);
-
-            /* Copy the arrays for force, positions and energy  */
-            /* The 'Min' array always holds the coords and forces of the minimal
-               sampled energy  */
-            swap_em_state(&s_min, &s_try);
-            if (count > 0)
-            {
-                ustep *= 1.2;
-            }
-
-            /* Write to trn, if necessary */
-            do_x = do_per_step(steps_accepted, inputrec->nstxout);
-            do_f = do_per_step(steps_accepted, inputrec->nstfout);
-            write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min,
-                          state_global, observablesHistory);
-        }
-        else
-        {
-            /* If energy is not smaller make the step smaller...  */
-            ustep *= 0.5;
-
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-            {
-                /* Reload the old state */
-                em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession,
-                                       pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-            }
-        }
-
-        // If the force is very small after finishing minimization,
-        // we risk dividing by zero when calculating the step size.
-        // So we check first if the minimization has stopped before
-        // trying to obtain a new step size.
-        if (!bDone)
-        {
-            /* Determine new step  */
-            stepsize = ustep / s_min->fmax;
-        }
-
-        /* Check if stepsize is too small, with 1 nm as a characteristic length */
-#if GMX_DOUBLE
-        if (count == nsteps || ustep < 1e-12)
-#else
-        if (count == nsteps || ustep < 1e-6)
-#endif
-        {
-            if (MASTER(cr))
-            {
-                warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr);
-            }
-            bAbort = TRUE;
-        }
-
-        /* Send IMD energies and positions, if bIMD is TRUE. */
-        if (imdSession->run(count, TRUE, state_global->box,
-                            MASTER(cr) ? state_global->x.rvec_array() : nullptr, 0)
-            && MASTER(cr))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        count++;
-    } /* End of the loop  */
-
-    /* Print some data...  */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, count, s_min, state_global, observablesHistory);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-
-        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
-        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    inputrec->nsteps = count;
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, count);
-}
-
-void LegacySimulator::do_nm()
-{
-    const char*         NM = "Normal Mode Analysis";
-    int                 nnodes;
-    gmx_localtop_t      top;
-    gmx_global_stat_t   gstat;
-    t_graph*            graph;
-    tensor              vir, pres;
-    rvec                mu_tot = { 0 };
-    rvec*               dfdx;
-    gmx_bool            bSparse; /* use sparse matrix storage format */
-    size_t              sz;
-    gmx_sparsematrix_t* sparse_matrix = nullptr;
-    real*               full_matrix   = nullptr;
-
-    /* added with respect to mdrun */
-    int  row, col;
-    real der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
-    real x_min;
-    bool bIsMaster = MASTER(cr);
-    auto mdatoms   = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating normal-mode analysis via the integrator "
-                    ".mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx normal-modes.");
-
-    if (constr != nullptr)
-    {
-        gmx_fatal(
-                FARGS,
-                "Constraints present with Normal Mode Analysis, this combination is not supported");
-    }
-
-    gmx_shellfc_t* shellfc;
-
-    em_state_t state_work{};
-
-    /* Init em and store the local state in state_minimum */
-    init_em(fplog, mdlog, NM, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global,
-            &state_work, &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, &shellfc);
-    const bool  simulationsShareState = false;
-    gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-
-    std::vector<int>       atom_index = get_atom_index(top_global);
-    std::vector<gmx::RVec> fneg(atom_index.size(), { 0, 0, 0 });
-    snew(dfdx, atom_index.size());
-
-#if !GMX_DOUBLE
-    if (bIsMaster)
-    {
-        fprintf(stderr,
-                "NOTE: This version of GROMACS has been compiled in single precision,\n"
-                "      which MIGHT not be accurate enough for normal mode analysis.\n"
-                "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
-                "      are fairly modest even if you recompile in double precision.\n\n");
-    }
-#endif
-
-    /* Check if we can/should use sparse storage format.
-     *
-     * Sparse format is only useful when the Hessian itself is sparse, which it
-     * will be when we use a cutoff.
-     * For small systems (n<1000) it is easier to always use full matrix format, though.
-     */
-    if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
-        bSparse = FALSE;
-    }
-    else if (atom_index.size() < 1000)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
-                                     atom_index.size());
-        bSparse = FALSE;
-    }
-    else
-    {
-        GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
-        bSparse = TRUE;
-    }
-
-    /* Number of dimensions, based on real atoms, that is not vsites or shell */
-    sz = DIM * atom_index.size();
-
-    fprintf(stderr, "Allocating Hessian memory...\n\n");
-
-    if (bSparse)
-    {
-        sparse_matrix                       = gmx_sparsematrix_init(sz);
-        sparse_matrix->compressed_symmetric = TRUE;
-    }
-    else
-    {
-        snew(full_matrix, sz * sz);
-    }
-
-    /* Write start time and temperature */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
-
-    /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = atom_index.size() * 2;
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
-                *(top_global->name), inputrec->nsteps);
-    }
-
-    nnodes = cr->nnodes;
-
-    /* Make evaluate_energy do a single node force calculation */
-    cr->nnodes = 1;
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
-    cr->nnodes = nnodes;
-
-    /* if forces are not small, warn user */
-    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work);
-
-    GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
-    if (state_work.fmax > 1.0e-3)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendText(
-                        "The force is probably not small enough to "
-                        "ensure that you are at a minimum.\n"
-                        "Be aware that negative eigenvalues may occur\n"
-                        "when the resulting matrix is diagonalized.");
-    }
-
-    /***********************************************************
-     *
-     *      Loop over all pairs in matrix
-     *
-     *      do_force called twice. Once with positive and
-     *      once with negative displacement
-     *
-     ************************************************************/
-
-    /* Steps are divided one by one over the nodes */
-    bool bNS          = true;
-    auto state_work_x = makeArrayRef(state_work.s.x);
-    auto state_work_f = makeArrayRef(state_work.f);
-    for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes)
-    {
-        size_t atom = atom_index[aid];
-        for (size_t d = 0; d < DIM; d++)
-        {
-            int64_t step        = 0;
-            int     force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
-            double  t           = 0;
-
-            x_min = state_work_x[atom][d];
-
-            for (unsigned int dx = 0; (dx < 2); dx++)
-            {
-                if (dx == 0)
-                {
-                    state_work_x[atom][d] = x_min - der_range;
-                }
-                else
-                {
-                    state_work_x[atom][d] = x_min + der_range;
-                }
-
-                /* Make evaluate_energy do a single node force calculation */
-                cr->nnodes = 1;
-                if (shellfc)
-                {
-                    /* Now is the time to relax the shells */
-                    relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, nullptr, step, inputrec,
-                                        imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
-                                        fcd, state_work.s.natoms, state_work.s.x.arrayRefWithPadding(),
-                                        state_work.s.v.arrayRefWithPadding(), state_work.s.box,
-                                        state_work.s.lambda, &state_work.s.hist,
-                                        state_work.f.arrayRefWithPadding(), vir, mdatoms, nrnb,
-                                        wcycle, graph, shellfc, fr, runScheduleWork, t, mu_tot,
-                                        vsite, DDBalanceRegionHandler(nullptr));
-                    bNS = false;
-                    step++;
-                }
-                else
-                {
-                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
-                }
-
-                cr->nnodes = nnodes;
-
-                if (dx == 0)
-                {
-                    std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(),
-                              fneg.begin());
-                }
-            }
-
-            /* x is restored to original */
-            state_work_x[atom][d] = x_min;
-
-            for (size_t j = 0; j < atom_index.size(); j++)
-            {
-                for (size_t k = 0; (k < DIM); k++)
-                {
-                    dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range);
-                }
-            }
-
-            if (!bIsMaster)
-            {
-#if GMX_MPI
-#    define mpi_type GMX_MPI_REAL
-                MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid,
-                         cr->mpi_comm_mygroup);
-#endif
-            }
-            else
-            {
-                for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++)
-                {
-                    if (node > 0)
-                    {
-#if GMX_MPI
-                        MPI_Status stat;
-                        MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node,
-                                 cr->mpi_comm_mygroup, &stat);
-#    undef mpi_type
-#endif
-                    }
-
-                    row = (aid + node) * DIM + d;
-
-                    for (size_t j = 0; j < atom_index.size(); j++)
-                    {
-                        for (size_t k = 0; k < DIM; k++)
-                        {
-                            col = j * DIM + k;
-
-                            if (bSparse)
-                            {
-                                if (col >= row && dfdx[j][k] != 0.0)
-                                {
-                                    gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]);
-                                }
-                            }
-                            else
-                            {
-                                full_matrix[row * sz + col] = dfdx[j][k];
-                            }
-                        }
-                    }
-                }
-            }
-
-            if (mdrunOptions.verbose && fplog)
-            {
-                fflush(fplog);
-            }
-        }
-        /* write progress */
-        if (bIsMaster && mdrunOptions.verbose)
-        {
-            fprintf(stderr, "\rFinished step %d out of %td",
-                    std::min<int>(atom + nnodes, atom_index.size()), ssize(atom_index));
-            fflush(stderr);
-        }
-    }
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "\n\nWriting Hessian...\n");
-        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2);
-}
-
-} // namespace gmx
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed
deleted file mode 100644
index d90af4f9d7..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/minimize.cpp.preplumed
+++ /dev/null
@@ -1,2873 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief This file defines integrators for energy minimization
- *
- * \author Berk Hess <hess@kth.se>
- * \author Erik Lindahl <erik@kth.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <cmath>
-#include <cstring>
-#include <ctime>
-
-#include <algorithm>
-#include <vector>
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/collect.h"
-#include "gromacs/domdec/dlbtiming.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/mdsetup.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/mtxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/linearalgebra/sparsematrix.h"
-#include "gromacs/listed_forces/manage_threading.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/dispersioncorrection.h"
-#include "gromacs/mdlib/ebin.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/energyoutput.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/stat.h"
-#include "gromacs/mdlib/tgroup.h"
-#include "gromacs/mdlib/trajectory_writing.h"
-#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/vsite.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "legacysimulator.h"
-#include "shellfc.h"
-
-using gmx::MdrunScheduleWorkload;
-
-//! Utility structure for manipulating states during EM
-typedef struct
-{
-    //! Copy of the global state
-    t_state s;
-    //! Force array
-    PaddedHostVector<gmx::RVec> f;
-    //! Potential energy
-    real epot;
-    //! Norm of the force
-    real fnorm;
-    //! Maximum force
-    real fmax;
-    //! Direction
-    int a_fmax;
-} em_state_t;
-
-//! Print the EM starting conditions
-static void print_em_start(FILE*                     fplog,
-                           const t_commrec*          cr,
-                           gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
-                           const char*               name)
-{
-    walltime_accounting_start_time(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, name);
-}
-
-//! Stop counting time for EM
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
-{
-    wallcycle_stop(wcycle, ewcRUN);
-
-    walltime_accounting_end_time(walltime_accounting);
-}
-
-//! Printing a log file and console header
-static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps)
-{
-    fprintf(out, "\n");
-    fprintf(out, "%s:\n", minimizer);
-    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
-    fprintf(out, "   Number of steps    = %12d\n", nsteps);
-}
-
-//! Print warning message
-static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain)
-{
-    constexpr bool realIsDouble = GMX_DOUBLE;
-    char           buffer[2048];
-
-    if (!std::isfinite(fmax))
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped because the force "
-                "on at least one atom is not finite. This usually means "
-                "atoms are overlapping. Modify the input coordinates to "
-                "remove atom overlap or use soft-core potentials with "
-                "the free energy code to avoid infinite forces.\n%s",
-                !realIsDouble ? "You could also be lucky that switching to double precision "
-                                "is sufficient to obtain finite forces.\n"
-                              : "");
-    }
-    else if (bLastStep)
-    {
-        sprintf(buffer,
-                "\nEnergy minimization reached the maximum number "
-                "of steps before the forces reached the requested "
-                "precision Fmax < %g.\n",
-                ftol);
-    }
-    else
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped, but the forces have "
-                "not converged to the requested precision Fmax < %g (which "
-                "may not be possible for your system). It stopped "
-                "because the algorithm tried to make a new step whose size "
-                "was too small, or there was no change in the energy since "
-                "last step. Either way, we regard the minimization as "
-                "converged to within the available machine precision, "
-                "given your starting configuration and EM parameters.\n%s%s",
-                ftol,
-                !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but "
-                                "this is often not needed for preparing to run molecular "
-                                "dynamics.\n"
-                              : "",
-                bConstrain ? "You might need to increase your constraint accuracy, or turn\n"
-                             "off constraints altogether (set constraints = none in mdp file)\n"
-                           : "");
-    }
-
-    fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
-    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
-}
-
-//! Print message about convergence of the EM
-static void print_converged(FILE*             fp,
-                            const char*       alg,
-                            real              ftol,
-                            int64_t           count,
-                            gmx_bool          bDone,
-                            int64_t           nsteps,
-                            const em_state_t* ems,
-                            double            sqrtNumAtoms)
-{
-    char buf[STEPSTRSIZE];
-
-    if (bDone)
-    {
-        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf));
-    }
-    else if (count < nsteps)
-    {
-        fprintf(fp,
-                "\n%s converged to machine precision in %s steps,\n"
-                "but did not reach the requested Fmax < %g.\n",
-                alg, gmx_step_str(count, buf), ftol);
-    }
-    else
-    {
-        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol,
-                gmx_step_str(count, buf));
-    }
-
-#if GMX_DOUBLE
-    fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
-    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm / sqrtNumAtoms);
-#else
-    fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
-    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
-    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm / sqrtNumAtoms);
-#endif
-}
-
-//! Compute the norm and max of the force array in parallel
-static void get_f_norm_max(const t_commrec* cr,
-                           t_grpopts*       opts,
-                           t_mdatoms*       mdatoms,
-                           const rvec*      f,
-                           real*            fnorm,
-                           real*            fmax,
-                           int*             a_fmax)
-{
-    double fnorm2, *sum;
-    real   fmax2, fam;
-    int    la_max, a_max, start, end, i, m, gf;
-
-    /* This routine finds the largest force and returns it.
-     * On parallel machines the global max is taken.
-     */
-    fnorm2 = 0;
-    fmax2  = 0;
-    la_max = -1;
-    start  = 0;
-    end    = mdatoms->homenr;
-    if (mdatoms->cFREEZE)
-    {
-        for (i = start; i < end; i++)
-        {
-            gf  = mdatoms->cFREEZE[i];
-            fam = 0;
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    fam += gmx::square(f[i][m]);
-                }
-            }
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-    else
-    {
-        for (i = start; i < end; i++)
-        {
-            fam = norm2(f[i]);
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-
-    if (la_max >= 0 && DOMAINDECOMP(cr))
-    {
-        a_max = cr->dd->globalAtomIndices[la_max];
-    }
-    else
-    {
-        a_max = la_max;
-    }
-    if (PAR(cr))
-    {
-        snew(sum, 2 * cr->nnodes + 1);
-        sum[2 * cr->nodeid]     = fmax2;
-        sum[2 * cr->nodeid + 1] = a_max;
-        sum[2 * cr->nnodes]     = fnorm2;
-        gmx_sumd(2 * cr->nnodes + 1, sum, cr);
-        fnorm2 = sum[2 * cr->nnodes];
-        /* Determine the global maximum */
-        for (i = 0; i < cr->nnodes; i++)
-        {
-            if (sum[2 * i] > fmax2)
-            {
-                fmax2 = sum[2 * i];
-                a_max = gmx::roundToInt(sum[2 * i + 1]);
-            }
-        }
-        sfree(sum);
-    }
-
-    if (fnorm)
-    {
-        *fnorm = sqrt(fnorm2);
-    }
-    if (fmax)
-    {
-        *fmax = sqrt(fmax2);
-    }
-    if (a_fmax)
-    {
-        *a_fmax = a_max;
-    }
-}
-
-//! Compute the norm of the force
-static void get_state_f_norm_max(const t_commrec* cr, t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems)
-{
-    get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(), &ems->fnorm, &ems->fmax, &ems->a_fmax);
-}
-
-//! Initialize the energy minimization
-static void init_em(FILE*                fplog,
-                    const gmx::MDLogger& mdlog,
-                    const char*          title,
-                    const t_commrec*     cr,
-                    t_inputrec*          ir,
-                    gmx::ImdSession*     imdSession,
-                    pull_t*              pull_work,
-                    t_state*             state_global,
-                    gmx_mtop_t*          top_global,
-                    em_state_t*          ems,
-                    gmx_localtop_t*      top,
-                    t_nrnb*              nrnb,
-                    t_forcerec*          fr,
-                    t_graph**            graph,
-                    gmx::MDAtoms*        mdAtoms,
-                    gmx_global_stat_t*   gstat,
-                    gmx_vsite_t*         vsite,
-                    gmx::Constraints*    constr,
-                    gmx_shellfc_t**      shellfc)
-{
-    real dvdl_constr;
-
-    if (fplog)
-    {
-        fprintf(fplog, "Initiating %s\n", title);
-    }
-
-    if (MASTER(cr))
-    {
-        state_global->ngtc = 0;
-    }
-    initialize_lambdas(fplog, *ir, MASTER(cr), &(state_global->fep_state), state_global->lambda, nullptr);
-
-    if (ir->eI == eiNM)
-    {
-        GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
-
-        *shellfc = init_shell_flexcon(stdout, top_global, constr ? constr->numFlexibleConstraints() : 0,
-                                      ir->nstcalcenergy, DOMAINDECOMP(cr));
-    }
-    else
-    {
-        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI),
-                   "This else currently only handles energy minimizers, consider if your algorithm "
-                   "needs shell/flexible-constraint support");
-
-        /* With energy minimization, shells and flexible constraints are
-         * automatically minimized when treated like normal DOFS.
-         */
-        if (shellfc != nullptr)
-        {
-            *shellfc = nullptr;
-        }
-    }
-
-    auto mdatoms = mdAtoms->mdatoms();
-    if (DOMAINDECOMP(cr))
-    {
-        top->useInDomainDecomp_ = true;
-        dd_init_local_top(*top_global, top);
-
-        dd_init_local_state(cr->dd, state_global, &ems->s);
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
-                            imdSession, pull_work, &ems->s, &ems->f, mdAtoms, top, fr, vsite,
-                            constr, nrnb, nullptr, FALSE);
-        dd_store_state(cr->dd, &ems->s);
-
-        *graph = nullptr;
-    }
-    else
-    {
-        state_change_natoms(state_global, state_global->natoms);
-        /* Just copy the state */
-        ems->s = *state_global;
-        state_change_natoms(&ems->s, ems->s.natoms);
-        ems->f.resizeWithPadding(ems->s.natoms);
-
-        mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, graph, mdAtoms, constr, vsite,
-                                  shellfc ? *shellfc : nullptr);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, top, mdatoms);
-        }
-    }
-
-    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);
-
-    if (constr)
-    {
-        // TODO how should this cross-module support dependency be managed?
-        if (ir->eConstrAlg == econtSHAKE && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
-        {
-            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
-                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
-        }
-
-        if (!ir->bContinuation)
-        {
-            /* Constrain the starting coordinates */
-            dvdl_constr = 0;
-            constr->apply(TRUE, TRUE, -1, 0, 1.0, ems->s.x.rvec_array(), ems->s.x.rvec_array(),
-                          nullptr, ems->s.box, ems->s.lambda[efptFEP], &dvdl_constr, nullptr,
-                          nullptr, gmx::ConstraintVariable::Positions);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        *gstat = global_stat_init(ir);
-    }
-    else
-    {
-        *gstat = nullptr;
-    }
-
-    calc_shifts(ems->s.box, fr->shift_vec);
-}
-
-//! Finalize the minimization
-static void finish_em(const t_commrec*          cr,
-                      gmx_mdoutf_t              outf,
-                      gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t           wcycle)
-{
-    if (!thisRankHasDuty(cr, DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    done_mdoutf(outf);
-
-    em_time_end(walltime_accounting, wcycle);
-}
-
-//! Swap two different EM states during minimization
-static void swap_em_state(em_state_t** ems1, em_state_t** ems2)
-{
-    em_state_t* tmp;
-
-    tmp   = *ems1;
-    *ems1 = *ems2;
-    *ems2 = tmp;
-}
-
-//! Save the EM trajectory
-static void write_em_traj(FILE*               fplog,
-                          const t_commrec*    cr,
-                          gmx_mdoutf_t        outf,
-                          gmx_bool            bX,
-                          gmx_bool            bF,
-                          const char*         confout,
-                          gmx_mtop_t*         top_global,
-                          t_inputrec*         ir,
-                          int64_t             step,
-                          em_state_t*         state,
-                          t_state*            state_global,
-                          ObservablesHistory* observablesHistory)
-{
-    int mdof_flags = 0;
-
-    if (bX)
-    {
-        mdof_flags |= MDOF_X;
-    }
-    if (bF)
-    {
-        mdof_flags |= MDOF_F;
-    }
-
-    /* If we want IMD output, set appropriate MDOF flag */
-    if (ir->bIMD)
-    {
-        mdof_flags |= MDOF_IMD;
-    }
-
-    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
-                                     static_cast<double>(step), &state->s, state_global,
-                                     observablesHistory, state->f);
-
-    if (confout != nullptr)
-    {
-        if (DOMAINDECOMP(cr))
-        {
-            /* If bX=true, x was collected to state_global in the call above */
-            if (!bX)
-            {
-                auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
-                dd_collect_vec(cr->dd, &state->s, state->s.x, globalXRef);
-            }
-        }
-        else
-        {
-            /* Copy the local state pointer */
-            state_global = &state->s;
-        }
-
-        if (MASTER(cr))
-        {
-            if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
-            {
-                /* Make molecules whole only for confout writing */
-                do_pbc_mtop(ir->ePBC, state->s.box, top_global, state_global->x.rvec_array());
-            }
-
-            write_sto_conf_mtop(confout, *top_global->name, top_global,
-                                state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box);
-        }
-    }
-}
-
-//! \brief Do one minimization step
-//
-// \returns true when the step succeeded, false when a constraint error occurred
-static bool do_em_step(const t_commrec*                   cr,
-                       t_inputrec*                        ir,
-                       t_mdatoms*                         md,
-                       em_state_t*                        ems1,
-                       real                               a,
-                       const PaddedHostVector<gmx::RVec>* force,
-                       em_state_t*                        ems2,
-                       gmx::Constraints*                  constr,
-                       int64_t                            count)
-
-{
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
-    int nthreads gmx_unused;
-
-    bool validStep = true;
-
-    s1 = &ems1->s;
-    s2 = &ems2->s;
-
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-    {
-        gmx_incons("state mismatch in do_em_step");
-    }
-
-    s2->flags = s1->flags;
-
-    if (s2->natoms != s1->natoms)
-    {
-        state_change_natoms(s2, s1->natoms);
-        ems2->f.resizeWithPadding(s2->natoms);
-    }
-    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
-    {
-        s2->cg_gl.resize(s1->cg_gl.size());
-    }
-
-    copy_mat(s1->box, s2->box);
-    /* Copy free energy state */
-    s2->lambda = s1->lambda;
-    copy_mat(s1->box, s2->box);
-
-    start = 0;
-    end   = md->homenr;
-
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
-#pragma omp parallel num_threads(nthreads)
-    {
-        const rvec* x1 = s1->x.rvec_array();
-        rvec*       x2 = s2->x.rvec_array();
-        const rvec* f  = force->rvec_array();
-
-        int gf = 0;
-#pragma omp for schedule(static) nowait
-        for (int i = start; i < end; i++)
-        {
-            try
-            {
-                if (md->cFREEZE)
-                {
-                    gf = md->cFREEZE[i];
-                }
-                for (int m = 0; m < DIM; m++)
-                {
-                    if (ir->opts.nFreeze[gf][m])
-                    {
-                        x2[i][m] = x1[i][m];
-                    }
-                    else
-                    {
-                        x2[i][m] = x1[i][m] + a * f[i][m];
-                    }
-                }
-            }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-        }
-
-        if (s2->flags & (1 << estCGP))
-        {
-            /* Copy the CG p vector */
-            const rvec* p1 = s1->cg_p.rvec_array();
-            rvec*       p2 = s2->cg_p.rvec_array();
-#pragma omp for schedule(static) nowait
-            for (int i = start; i < end; i++)
-            {
-                // Trivial OpenMP block that does not throw
-                copy_rvec(p1[i], p2[i]);
-            }
-        }
-
-        if (DOMAINDECOMP(cr))
-        {
-            /* OpenMP does not supported unsigned loop variables */
-#pragma omp for schedule(static) nowait
-            for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++)
-            {
-                s2->cg_gl[i] = s1->cg_gl[i];
-            }
-        }
-    }
-
-    if (DOMAINDECOMP(cr))
-    {
-        s2->ddp_count       = s1->ddp_count;
-        s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-    }
-
-    if (constr)
-    {
-        dvdl_constr = 0;
-        validStep = constr->apply(TRUE, TRUE, count, 0, 1.0, s1->x.rvec_array(), s2->x.rvec_array(),
-                                  nullptr, s2->box, s2->lambda[efptBONDED], &dvdl_constr, nullptr,
-                                  nullptr, gmx::ConstraintVariable::Positions);
-
-        if (cr->nnodes > 1)
-        {
-            /* This global reduction will affect performance at high
-             * parallelization, but we can not really avoid it.
-             * But usually EM is not run at high parallelization.
-             */
-            int reductionBuffer = static_cast<int>(!validStep);
-            gmx_sumi(1, &reductionBuffer, cr);
-            validStep = (reductionBuffer == 0);
-        }
-
-        // We should move this check to the different minimizers
-        if (!validStep && ir->eI != eiSteep)
-        {
-            gmx_fatal(FARGS,
-                      "The coordinates could not be constrained. Minimizer '%s' can not handle "
-                      "constraint failures, use minimizer '%s' before using '%s'.",
-                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
-        }
-    }
-
-    return validStep;
-}
-
-//! Prepare EM for using domain decomposition parallellization
-static void em_dd_partition_system(FILE*                fplog,
-                                   const gmx::MDLogger& mdlog,
-                                   int                  step,
-                                   const t_commrec*     cr,
-                                   gmx_mtop_t*          top_global,
-                                   t_inputrec*          ir,
-                                   gmx::ImdSession*     imdSession,
-                                   pull_t*              pull_work,
-                                   em_state_t*          ems,
-                                   gmx_localtop_t*      top,
-                                   gmx::MDAtoms*        mdAtoms,
-                                   t_forcerec*          fr,
-                                   gmx_vsite_t*         vsite,
-                                   gmx::Constraints*    constr,
-                                   t_nrnb*              nrnb,
-                                   gmx_wallcycle_t      wcycle)
-{
-    /* Repartition the domain decomposition */
-    dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, nullptr, *top_global, ir, imdSession, pull_work,
-                        &ems->s, &ems->f, mdAtoms, top, fr, vsite, constr, nrnb, wcycle, FALSE);
-    dd_store_state(cr->dd, &ems->s);
-}
-
-namespace
-{
-
-/*! \brief Class to handle the work of setting and doing an energy evaluation.
- *
- * This class is a mere aggregate of parameters to pass to evaluate an
- * energy, so that future changes to names and types of them consume
- * less time when refactoring other code.
- *
- * Aggregate initialization is used, for which the chief risk is that
- * if a member is added at the end and not all initializer lists are
- * updated, then the member will be value initialized, which will
- * typically mean initialization to zero.
- *
- * Use a braced initializer list to construct one of these. */
-class EnergyEvaluator
-{
-public:
-    /*! \brief Evaluates an energy on the state in \c ems.
-     *
-     * \todo In practice, the same objects mu_tot, vir, and pres
-     * are always passed to this function, so we would rather have
-     * them as data members. However, their C-array types are
-     * unsuited for aggregate initialization. When the types
-     * improve, the call signature of this method can be reduced.
-     */
-    void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
-    //! Handles logging (deprecated).
-    FILE* fplog;
-    //! Handles logging.
-    const gmx::MDLogger& mdlog;
-    //! Handles communication.
-    const t_commrec* cr;
-    //! Coordinates multi-simulations.
-    const gmx_multisim_t* ms;
-    //! Holds the simulation topology.
-    gmx_mtop_t* top_global;
-    //! Holds the domain topology.
-    gmx_localtop_t* top;
-    //! User input options.
-    t_inputrec* inputrec;
-    //! The Interactive Molecular Dynamics session.
-    gmx::ImdSession* imdSession;
-    //! The pull work object.
-    pull_t* pull_work;
-    //! Manages flop accounting.
-    t_nrnb* nrnb;
-    //! Manages wall cycle accounting.
-    gmx_wallcycle_t wcycle;
-    //! Coordinates global reduction.
-    gmx_global_stat_t gstat;
-    //! Handles virtual sites.
-    gmx_vsite_t* vsite;
-    //! Handles constraints.
-    gmx::Constraints* constr;
-    //! Handles strange things.
-    t_fcdata* fcd;
-    //! Molecular graph for SHAKE.
-    t_graph* graph;
-    //! Per-atom data for this domain.
-    gmx::MDAtoms* mdAtoms;
-    //! Handles how to calculate the forces.
-    t_forcerec* fr;
-    //! Schedule of force-calculation work each step for this task.
-    MdrunScheduleWorkload* runScheduleWork;
-    //! Stores the computed energies.
-    gmx_enerdata_t* enerd;
-};
-
-void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
-{
-    real     t;
-    gmx_bool bNS;
-    tensor   force_vir, shake_vir, ekin;
-    real     dvdl_constr;
-    real     terminate = 0;
-
-    /* Set the time to the initial time, the time does not change during EM */
-    t = inputrec->init_t;
-
-    if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
-    {
-        /* This is the first state or an old state used before the last ns */
-        bNS = TRUE;
-    }
-    else
-    {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr, top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
-    {
-        /* Repartition the domain decomposition */
-        em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work,
-                               ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-    }
-
-    /* Calc force & energy on new trial position  */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession, pull_work, count, nrnb, wcycle,
-             top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist,
-             ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd, ems->s.lambda,
-             graph, fr, runScheduleWork, vsite, mu_tot, t, nullptr,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
-                     | (bNS ? GMX_FORCE_NS : 0),
-             DDBalanceRegionHandler(cr));
-
-    /* Clear the unused shake virial and pressure */
-    clear_mat(shake_vir);
-    clear_mat(pres);
-
-    /* Communicate stuff when parallel */
-    if (PAR(cr) && inputrec->eI != eiNM)
-    {
-        wallcycle_start(wcycle, ewcMoveE);
-
-        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, inputrec, nullptr, nullptr, nullptr,
-                    1, &terminate, nullptr, FALSE, CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
-
-        wallcycle_stop(wcycle, ewcMoveE);
-    }
-
-    if (fr->dispersionCorrection)
-    {
-        /* Calculate long range corrections to pressure and energy */
-        const DispersionCorrection::Correction correction =
-                fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]);
-
-        enerd->term[F_DISPCORR] = correction.energy;
-        enerd->term[F_EPOT] += correction.energy;
-        enerd->term[F_PRES] += correction.pressure;
-        enerd->term[F_DVDL] += correction.dvdl;
-    }
-    else
-    {
-        enerd->term[F_DISPCORR] = 0;
-    }
-
-    ems->epot = enerd->term[F_EPOT];
-
-    if (constr)
-    {
-        /* Project out the constraint components of the force */
-        dvdl_constr  = 0;
-        rvec* f_rvec = ems->f.rvec_array();
-        constr->apply(FALSE, FALSE, count, 0, 1.0, ems->s.x.rvec_array(), f_rvec, f_rvec,
-                      ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr, nullptr, &shake_vir,
-                      gmx::ConstraintVariable::ForceDispl);
-        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        m_add(force_vir, shake_vir, vir);
-    }
-    else
-    {
-        copy_mat(force_vir, vir);
-    }
-
-    clear_mat(ekin);
-    enerd->term[F_PRES] = calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
-
-    sum_dhdl(enerd, ems->s.lambda, *inputrec->fepvals);
-
-    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
-    {
-        get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
-    }
-}
-
-} // namespace
-
-//! Parallel utility summing energies and forces
-static double reorder_partsum(const t_commrec* cr,
-                              t_grpopts*       opts,
-                              gmx_mtop_t*      top_global,
-                              em_state_t*      s_min,
-                              em_state_t*      s_b)
-{
-    if (debug)
-    {
-        fprintf(debug, "Doing reorder_partsum\n");
-    }
-
-    const rvec* fm = s_min->f.rvec_array();
-    const rvec* fb = s_b->f.rvec_array();
-
-    /* Collect fm in a global vector fmg.
-     * This conflicts with the spirit of domain decomposition,
-     * but to fully optimize this a much more complicated algorithm is required.
-     */
-    const int natoms = top_global->natoms;
-    rvec*     fmg;
-    snew(fmg, natoms);
-
-    gmx::ArrayRef<const int> indicesMin = s_min->s.cg_gl;
-    int                      i          = 0;
-    for (int a : indicesMin)
-    {
-        copy_rvec(fm[i], fmg[a]);
-        i++;
-    }
-    gmx_sum(top_global->natoms * 3, fmg[0], cr);
-
-    /* Now we will determine the part of the sum for the cgs in state s_b */
-    gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
-
-    double partsum                  = 0;
-    i                               = 0;
-    int                          gf = 0;
-    gmx::ArrayRef<unsigned char> grpnrFREEZE =
-            top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
-    for (int a : indicesB)
-    {
-        if (!grpnrFREEZE.empty())
-        {
-            gf = grpnrFREEZE[i];
-        }
-        for (int m = 0; m < DIM; m++)
-        {
-            if (!opts->nFreeze[gf][m])
-            {
-                partsum += (fb[i][m] - fmg[a][m]) * fb[i][m];
-            }
-        }
-        i++;
-    }
-
-    sfree(fmg);
-
-    return partsum;
-}
-
-//! Print some stuff, like beta, whatever that means.
-static real pr_beta(const t_commrec* cr,
-                    t_grpopts*       opts,
-                    t_mdatoms*       mdatoms,
-                    gmx_mtop_t*      top_global,
-                    em_state_t*      s_min,
-                    em_state_t*      s_b)
-{
-    double sum;
-
-    /* This is just the classical Polak-Ribiere calculation of beta;
-     * it looks a bit complicated since we take freeze groups into account,
-     * and might have to sum it in parallel runs.
-     */
-
-    if (!DOMAINDECOMP(cr)
-        || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
-    {
-        const rvec* fm = s_min->f.rvec_array();
-        const rvec* fb = s_b->f.rvec_array();
-        sum            = 0;
-        int gf         = 0;
-        /* This part of code can be incorrect with DD,
-         * since the atom ordering in s_b and s_min might differ.
-         */
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (int m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    sum += (fb[i][m] - fm[i][m]) * fb[i][m];
-                }
-            }
-        }
-    }
-    else
-    {
-        /* We need to reorder cgs while summing */
-        sum = reorder_partsum(cr, opts, top_global, s_min, s_b);
-    }
-    if (PAR(cr))
-    {
-        gmx_sumd(1, &sum, cr);
-    }
-
-    return sum / gmx::square(s_min->fnorm);
-}
-
-namespace gmx
-{
-
-void LegacySimulator::do_cg()
-{
-    const char* CG = "Polak-Ribiere Conjugate Gradients";
-
-    gmx_localtop_t    top;
-    gmx_global_stat_t gstat;
-    t_graph*          graph;
-    double            tmp, minstep;
-    real              stepsize;
-    real              a, b, c, beta = 0.0;
-    real              epot_repl = 0;
-    real              pnorm;
-    gmx_bool          converged, foundlower;
-    rvec              mu_tot = { 0 };
-    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
-    tensor            vir, pres;
-    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
-    int               m, step, nminstep;
-    auto              mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating conjugate gradient energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    step = 0;
-
-    if (MASTER(cr))
-    {
-        // In CG, the state is extended with a search direction
-        state_global->flags |= (1 << estCGP);
-
-        // Ensure the extra per-atom state array gets allocated
-        state_change_natoms(state_global, state_global->natoms);
-
-        // Initialize the search direction to zero
-        for (RVec& cg_p : state_global->cg_p)
-        {
-            cg_p = { 0, 0, 0 };
-        }
-    }
-
-    /* Create 4 states on the stack and extract pointers that we will swap */
-    em_state_t  s0{}, s1{}, s2{}, s3{};
-    em_state_t* s_min = &s0;
-    em_state_t* s_a   = &s1;
-    em_state_t* s_b   = &s2;
-    em_state_t* s_c   = &s3;
-
-    /* Init em and store the local state in s_min */
-    init_em(fplog, mdlog, CG, cr, inputrec, imdSession, pull_work, state_global, top_global, s_min,
-            &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        sp_header(stderr, CG, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, CG, inputrec->em_tol, number_steps);
-    }
-
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                         enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                         nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
-                                           step, fcd, nullptr);
-    }
-
-    /* Estimate/guess the initial stepsize */
-    stepsize = inputrec->em_stepsize / s_min->fnorm;
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-    /* Start the loop over CG steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
-    {
-
-        /* start taking steps in a new direction
-         * First time we enter the routine, beta=0, and the direction is
-         * simply the negative gradient.
-         */
-
-        /* Calculate the new direction in p, and the gradient in this direction, gpa */
-        rvec*       pm  = s_min->s.cg_p.rvec_array();
-        const rvec* sfm = s_min->f.rvec_array();
-        double      gpa = 0;
-        int         gf  = 0;
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!inputrec->opts.nFreeze[gf][m])
-                {
-                    pm[i][m] = sfm[i][m] + beta * pm[i][m];
-                    gpa -= pm[i][m] * sfm[i][m];
-                    /* f is negative gradient, thus the sign */
-                }
-                else
-                {
-                    pm[i][m] = 0;
-                }
-            }
-        }
-
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpa, cr);
-        }
-
-        /* Calculate the norm of the search vector */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);
-
-        /* Just in case stepsize reaches zero due to numerical precision... */
-        if (stepsize <= 0)
-        {
-            stepsize = inputrec->em_stepsize / pnorm;
-        }
-
-        /*
-         * Double check the value of the derivative in the search direction.
-         * If it is positive it must be due to the old information in the
-         * CG formula, so just remove that and start over with beta=0.
-         * This corresponds to a steepest descent step.
-         */
-        if (gpa > 0)
-        {
-            beta = 0;
-            step--;   /* Don't count this step since we are restarting */
-            continue; /* Go back to the beginning of the big for-loop */
-        }
-
-        /* Calculate minimum allowed stepsize, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        minstep      = 0;
-        auto s_min_x = makeArrayRef(s_min->s.x);
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                tmp = fabs(s_min_x[i][m]);
-                if (tmp < 1.0)
-                {
-                    tmp = 1.0;
-                }
-                tmp = pm[i][m] / tmp;
-                minstep += tmp * tmp;
-            }
-        }
-        /* Add up from all CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &minstep, cr);
-        }
-
-        minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms));
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min,
-                      state_global, observablesHistory);
-
-        /* Take a step downhill.
-         * In theory, we should minimize the function along this direction.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new CG step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * the continue straight to the next CG step without trying to find any minimum.
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to even accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-        s_a->epot = s_min->epot;
-        a         = 0.0;
-        c         = a + stepsize; /* reference position along line is zero */
-
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
-        {
-            em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession,
-                                   pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-        }
-
-        /* Take a trial step (new coords in s_c) */
-        do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c, constr, -1);
-
-        neval++;
-        /* Calculate energy for the trial step */
-        energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
-
-        /* Calc derivative along line */
-        const rvec* pc  = s_c->s.cg_p.rvec_array();
-        const rvec* sfc = s_c->f.rvec_array();
-        double      gpc = 0;
-        for (int i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */
-            }
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        /* This is the max amount of increase in energy we tolerate */
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot);
-
-        /* Accept the step if the energy is lower, or if it is not significantly higher
-         * and the line derivative is still negative.
-         */
-        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
-        {
-            foundlower = TRUE;
-            /* Great, we found a better energy. Increase step for next iteration
-             * if we are still going down, decrease it otherwise
-             */
-            if (gpc < 0)
-            {
-                stepsize *= 1.618034; /* The golden section */
-            }
-            else
-            {
-                stepsize *= 0.618034; /* 1/golden section */
-            }
-        }
-        else
-        {
-            /* New energy is the same or higher. We will have to do some work
-             * to find a smaller value in the interval. Take smaller step next time!
-             */
-            foundlower = FALSE;
-            stepsize *= 0.618034;
-        }
-
-
-        /* OK, if we didn't find a lower value we will have to locate one now - there must
-         * be one in the interval [a=0,c].
-         * The same thing is valid here, though: Don't spend dozens of iterations to find
-         * the line minimum. We try to interpolate based on the derivative at the endpoints,
-         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
-         *
-         * I also have a safeguard for potentially really pathological functions so we never
-         * take more than 20 steps before we give up ...
-         *
-         * If we already found a lower value we just skip this step and continue to the update.
-         */
-        double gpb;
-        if (!foundlower)
-        {
-            nminstep = 0;
-
-            do
-            {
-                /* Select a new trial point.
-                 * If the derivatives at points a & c have different sign we interpolate to zero,
-                 * otherwise just do a bisection.
-                 */
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa * (a - c) / (gpc - gpa);
-                }
-                else
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-                {
-                    /* Reload the old state */
-                    em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession, pull_work,
-                                           s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-                }
-
-                /* Take a trial step to this new point - new coords in s_b */
-                do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b, constr, -1);
-
-                neval++;
-                /* Calculate energy for the trial step */
-                energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
-
-                /* p does not change within a step, but since the domain decomposition
-                 * might change, we have to use cg_p of s_b here.
-                 */
-                const rvec* pb  = s_b->s.cg_p.rvec_array();
-                const rvec* sfb = s_b->f.rvec_array();
-                gpb             = 0;
-                for (int i = 0; i < mdatoms->homenr; i++)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */
-                    }
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                if (debug)
-                {
-                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot,
-                            s_c->epot, gpb);
-                }
-
-                epot_repl = s_b->epot;
-
-                /* Keep one of the intervals based on the value of the derivative at the new point */
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    swap_em_state(&s_b, &s_c);
-                    c   = b;
-                    gpc = gpb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    swap_em_state(&s_b, &s_a);
-                    a   = b;
-                    gpa = gpb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20));
-
-            if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If beta==0 this was steepest descent, and then we give up.
-                 * If not, set beta=0 and restart with steepest descent before quitting.
-                 */
-                if (beta == 0.0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory before giving up */
-                    beta = 0.0;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in B.
-             */
-            if (s_c->epot < s_a->epot)
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot,
-                            s_a->epot);
-                }
-                swap_em_state(&s_b, &s_c);
-                gpb = gpc;
-            }
-            else
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot,
-                            s_c->epot);
-                }
-                swap_em_state(&s_b, &s_a);
-                gpb = gpa;
-            }
-        }
-        else
-        {
-            if (debug)
-            {
-                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot);
-            }
-            swap_em_state(&s_b, &s_c);
-            gpb = gpc;
-        }
-
-        /* new search direction */
-        /* beta = 0 means forget all memory and restart with steepest descents. */
-        if (nstcg && ((step % nstcg) == 0))
-        {
-            beta = 0.0;
-        }
-        else
-        {
-            /* s_min->fnorm cannot be zero, because then we would have converged
-             * and broken out.
-             */
-
-            /* Polak-Ribiere update.
-             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
-             */
-            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
-        }
-        /* Limit beta to prevent oscillations */
-        if (fabs(beta) > 5.0)
-        {
-            beta = 0.0;
-        }
-
-
-        /* update positions */
-        swap_em_state(&s_min, &s_b);
-        gpa = gpb;
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
-                        s_min->epot, s_min->fnorm / sqrtNumAtoms, s_min->fmax, s_min->a_fmax + 1);
-                fflush(stderr);
-            }
-            /* Store the new (lower) energies */
-            matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                             enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                             nullptr, vir, pres, nullptr, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            imdSession->fillEnergyRecord(step, TRUE);
-
-            if (do_log)
-            {
-                energyOutput.printHeader(fplog, step, step);
-            }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-
-        /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (s_min->fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-    }
-    if (s_min->fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    if (MASTER(cr))
-    {
-        /* If we printed energy and/or logfile last step (which was the last step)
-         * we don't have to do it again, but otherwise print the final values.
-         */
-        if (!do_log)
-        {
-            /* Write final value to log since we didn't do anything the last step */
-            energyOutput.printHeader(fplog, step, step);
-        }
-        if (!do_ene || !do_log)
-        {
-            /* Write final energy file entries */
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                               !do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    /* Note that with 0 < nstfout != nstxout we can end up with two frames
-     * in the trajectory with the same step number.
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
-
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
-                  step, s_min, state_global, observablesHistory);
-
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
-        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-}
-
-
-void LegacySimulator::do_lbfgs()
-{
-    static const char* LBFGS = "Low-Memory BFGS Minimizer";
-    em_state_t         ems;
-    gmx_localtop_t     top;
-    gmx_global_stat_t  gstat;
-    t_graph*           graph;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real *             rho, *alpha, *p, *s, **dx, **dg;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0;
-    real               dgdx, dgdg, sq, yr, beta;
-    gmx_bool           converged;
-    rvec               mu_tot = { 0 };
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    int                i, k, m, n, gf, step;
-    int                mdof_flags;
-    auto               mdatoms = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating L-BFGS energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    if (PAR(cr))
-    {
-        gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
-    }
-
-    if (nullptr != constr)
-    {
-        gmx_fatal(
-                FARGS,
-                "The combination of constraints and L-BFGS minimization is not implemented. Either "
-                "do not use constraints, or use another minimizer (e.g. steepest descent).");
-    }
-
-    n        = 3 * state_global->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
-
-    snew(frozen, n);
-
-    snew(p, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dx[i], n);
-    }
-
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dg[i], n);
-    }
-
-    step  = 0;
-    neval = 0;
-
-    /* Init em */
-    init_em(fplog, mdlog, LBFGS, cr, inputrec, imdSession, pull_work, state_global, top_global,
-            &ems, &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    start = 0;
-    end   = mdatoms->homenr;
-
-    /* We need 4 working states */
-    em_state_t  s0{}, s1{}, s2{}, s3{};
-    em_state_t* sa   = &s0;
-    em_state_t* sb   = &s1;
-    em_state_t* sc   = &s2;
-    em_state_t* last = &s3;
-    /* Initialize by copying the state from ems (we could skip x and f here) */
-    *sa = ems;
-    *sb = ems;
-    *sc = ems;
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
-
-    do_log = do_ene = do_x = do_f = TRUE;
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
-    {
-        if (mdatoms->cFREEZE)
-        {
-            gf = mdatoms->cFREEZE[i];
-        }
-        for (m = 0; m < DIM; m++)
-        {
-            frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
-        }
-    }
-    if (MASTER(cr))
-    {
-        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr, top.idef.iparams,
-                         top.idef.il, fr->ePBC, fr->bMolPBC, cr, state_global->box);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole
-     */
-    neval++;
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        matrix nullBox = {};
-        energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                         enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                         nullptr, vir, pres, nullptr, mu_tot, constr);
-
-        energyOutput.printHeader(fplog, step, step);
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
-                                           step, fcd, nullptr);
-    }
-
-    /* Set the initial step.
-     * since it will be multiplied by the non-normalized search direction
-     * vector (force vector the first time), we scale it by the
-     * norm of the force.
-     */
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-
-    // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
-
-    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
-    real* fInit = static_cast<real*>(ems.f.rvec_array()[0]);
-    for (i = 0; i < n; i++)
-    {
-        if (!frozen[i])
-        {
-            dx[point][i] = fInit[i]; /* Initial search direction */
-        }
-        else
-        {
-            dx[point][i] = 0;
-        }
-    }
-
-    // Stepsize will be modified during the search, and actually it is not critical
-    // (the main efficiency in the algorithm comes from changing directions), but
-    // we still need an initial value, so estimate it as the inverse of the norm
-    // so we take small steps where the potential fluctuates a lot.
-    stepsize = 1.0 / ems.fnorm;
-
-    /* Start the loop over BFGS steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-
-    ncorr = 0;
-
-    /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
-    {
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        mdof_flags = 0;
-        if (do_x)
-        {
-            mdof_flags |= MDOF_X;
-        }
-
-        if (do_f)
-        {
-            mdof_flags |= MDOF_F;
-        }
-
-        if (inputrec->bIMD)
-        {
-            mdof_flags |= MDOF_IMD;
-        }
-
-        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
-                                         static_cast<real>(step), &ems.s, state_global,
-                                         observablesHistory, ems.f);
-
-        /* Do the linesearching in the direction dx[point][0..(n-1)] */
-
-        /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
-
-        real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
-        real* ff = static_cast<real*>(ems.f.rvec_array()[0]);
-
-        // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
-        {
-            gpa -= s[i] * ff[i];
-        }
-
-        /* Calculate minimum allowed stepsize along the line, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        for (minstep = 0, i = 0; i < n; i++)
-        {
-            tmp = fabs(xx[i]);
-            if (tmp < 1.0)
-            {
-                tmp = 1.0;
-            }
-            tmp = s[i] / tmp;
-            minstep += tmp * tmp;
-        }
-        minstep = GMX_REAL_EPS / sqrt(minstep / n);
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        // Before taking any steps along the line, store the old position
-        *last       = ems;
-        real* lastx = static_cast<real*>(last->s.x.data()[0]);
-        real* lastf = static_cast<real*>(last->f.data()[0]);
-        Epot0       = ems.epot;
-
-        *sa = ems;
-
-        /* Take a step downhill.
-         * In theory, we should find the actual minimum of the function in this
-         * direction, somewhere along the line.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new BFGS step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * continue straight to the next BFGS step without trying to find any minimum,
-         * i.e. we change the search direction too. If the line was smooth, it is
-         * likely we are in a smooth region, and then it makes sense to take longer
-         * steps in the modified search direction too.
-         *
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one. Then we need to start by finding a lower
-         * value before we change search direction. Since the energy was apparently
-         * quite rough, we need to decrease the step size.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-
-        // State "A" is the first position along the line.
-        // reference position along line is initially zero
-        a = 0.0;
-
-        // Check stepsize first. We do not allow displacements
-        // larger than emstep.
-        //
-        do
-        {
-            // Pick a new position C by adding stepsize to A.
-            c = a + stepsize;
-
-            // Calculate what the largest change in any individual coordinate
-            // would be (translation along line * gradient along line)
-            maxdelta = 0;
-            for (i = 0; i < n; i++)
-            {
-                delta = c * s[i];
-                if (delta > maxdelta)
-                {
-                    maxdelta = delta;
-                }
-            }
-            // If any displacement is larger than the stepsize limit, reduce the step
-            if (maxdelta > inputrec->em_stepsize)
-            {
-                stepsize *= 0.1;
-            }
-        } while (maxdelta > inputrec->em_stepsize);
-
-        // Take a trial step and move the coordinate array xc[] to position C
-        real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
-        for (i = 0; i < n; i++)
-        {
-            xc[i] = lastx[i] + c * s[i];
-        }
-
-        neval++;
-        // Calculate energy for the trial step in position C
-        energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
-
-        // Calc line gradient in position C
-        real* fc = static_cast<real*>(sc->f.rvec_array()[0]);
-        for (gpc = 0, i = 0; i < n; i++)
-        {
-            gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        // This is the max amount of increase in energy we tolerate.
-        // By allowing VERY small changes (close to numerical precision) we
-        // frequently find even better (lower) final energies.
-        tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
-
-        // Accept the step if the energy is lower in the new position C (compared to A),
-        // or if it is not significantly higher and the line derivative is still negative.
-        foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
-        // If true, great, we found a better energy. We no longer try to alter the
-        // stepsize, but simply accept this new better position. The we select a new
-        // search direction instead, which will be much more efficient than continuing
-        // to take smaller steps along a line. Set fnorm based on the new C position,
-        // which will be used to update the stepsize to 1/fnorm further down.
-
-        // If false, the energy is NOT lower in point C, i.e. it will be the same
-        // or higher than in point A. In this case it is pointless to move to point C,
-        // so we will have to do more iterations along the same line to find a smaller
-        // value in the interval [A=0.0,C].
-        // Here, A is still 0.0, but that will change when we do a search in the interval
-        // [0.0,C] below. That search we will do by interpolation or bisection rather
-        // than with the stepsize, so no need to modify it. For the next search direction
-        // it will be reset to 1/fnorm anyway.
-
-        if (!foundlower)
-        {
-            // OK, if we didn't find a lower value we will have to locate one now - there must
-            // be one in the interval [a,c].
-            // The same thing is valid here, though: Don't spend dozens of iterations to find
-            // the line minimum. We try to interpolate based on the derivative at the endpoints,
-            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
-            // I also have a safeguard for potentially really pathological functions so we never
-            // take more than 20 steps before we give up.
-            // If we already found a lower value we just skip this step and continue to the update.
-            real fnorm = 0;
-            nminstep   = 0;
-            do
-            {
-                // Select a new trial point B in the interval [A,C].
-                // If the derivatives at points a & c have different sign we interpolate to zero,
-                // otherwise just do a bisection since there might be multiple minima/maxima
-                // inside the interval.
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa * (a - c) / (gpc - gpa);
-                }
-                else
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5 * (a + c);
-                }
-
-                // Take a trial step to point B
-                real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
-                for (i = 0; i < n; i++)
-                {
-                    xb[i] = lastx[i] + b * s[i];
-                }
-
-                neval++;
-                // Calculate energy for the trial step in point B
-                energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
-                fnorm = sb->fnorm;
-
-                // Calculate gradient in point B
-                real* fb = static_cast<real*>(sb->f.rvec_array()[0]);
-                for (gpb = 0, i = 0; i < n; i++)
-                {
-                    gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
-                // at the new point B, and rename the endpoints of this new interval A and C.
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    c = b;
-                    /* copy state b to c */
-                    *sc = *sb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    a = b;
-                    /* copy state b to a */
-                    *sa = *sb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints,
-                 * or if the tolerance is below machine precision.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
-
-            if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If ncorr==0 this was steepest descent, and then we give up.
-                 * If not, reset memory to restart as steepest descent before quitting.
-                 */
-                if (ncorr == 0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory */
-                    ncorr = 0;
-                    /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
-                    {
-                        dx[point][i] = ff[i];
-                    }
-                    /* Reset stepsize */
-                    stepsize = 1.0 / fnorm;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in xx/ff/Epot
-             */
-            if (sc->epot < sa->epot)
-            {
-                /* Use state C */
-                ems        = *sc;
-                step_taken = c;
-            }
-            else
-            {
-                /* Use state A */
-                ems        = *sa;
-                step_taken = a;
-            }
-        }
-        else
-        {
-            /* found lower */
-            /* Use state C */
-            ems        = *sc;
-            step_taken = c;
-        }
-
-        /* Update the memory information, and calculate a new
-         * approximation of the inverse hessian
-         */
-
-        /* Have new data in Epot, xx, ff */
-        if (ncorr < nmaxcorr)
-        {
-            ncorr++;
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            dg[point][i] = lastf[i] - ff[i];
-            dx[point][i] *= step_taken;
-        }
-
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
-        {
-            dgdg += dg[point][i] * dg[point][i];
-            dgdx += dg[point][i] * dx[point][i];
-        }
-
-        diag = dgdx / dgdg;
-
-        rho[point] = 1.0 / dgdx;
-        point++;
-
-        if (point >= nmaxcorr)
-        {
-            point = 0;
-        }
-
-        /* Update */
-        for (i = 0; i < n; i++)
-        {
-            p[i] = ff[i];
-        }
-
-        cp = point;
-
-        /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
-        {
-            cp--;
-            if (cp < 0)
-            {
-                cp = ncorr - 1;
-            }
-
-            sq = 0;
-            for (i = 0; i < n; i++)
-            {
-                sq += dx[cp][i] * p[i];
-            }
-
-            alpha[cp] = rho[cp] * sq;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] -= alpha[cp] * dg[cp][i];
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            p[i] *= diag;
-        }
-
-        /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
-        {
-            yr = 0;
-            for (i = 0; i < n; i++)
-            {
-                yr += p[i] * dg[cp][i];
-            }
-
-            beta = rho[cp] * yr;
-            beta = alpha[cp] - beta;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] += beta * dx[cp][i];
-            }
-
-            cp++;
-            if (cp >= ncorr)
-            {
-                cp = 0;
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            if (!frozen[i])
-            {
-                dx[point][i] = p[i];
-            }
-            else
-            {
-                dx[point][i] = 0;
-            }
-        }
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
-                        ems.epot, ems.fnorm / sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);
-                fflush(stderr);
-            }
-            /* Store the new (lower) energies */
-            matrix nullBox = {};
-            energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
-                                             enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                             nullptr, vir, pres, nullptr, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            imdSession->fillEnergyRecord(step, TRUE);
-
-            if (do_log)
-            {
-                energyOutput.printHeader(fplog, step, step);
-            }
-            energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                                               do_log ? fplog : nullptr, step, step, fcd, nullptr);
-        }
-
-        /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        // Reset stepsize in we are doing more iterations
-        stepsize = 1.0;
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (ems.fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-    }
-    if (ems.fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    /* If we printed energy and/or logfile last step (which was the last step)
-     * we don't have to do it again, but otherwise print the final values.
-     */
-    if (!do_log) /* Write final value to log since we didn't do anythin last step */
-    {
-        energyOutput.printHeader(fplog, step, step);
-    }
-    if (!do_ene || !do_log) /* Write final energy file entries */
-    {
-        energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                                           !do_log ? fplog : nullptr, step, step, fcd, nullptr);
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
-                  step, &ems, state_global, observablesHistory);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
-        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-}
-
-void LegacySimulator::do_steep()
-{
-    const char*       SD = "Steepest Descents";
-    gmx_localtop_t    top;
-    gmx_global_stat_t gstat;
-    t_graph*          graph;
-    real              stepsize;
-    real              ustep;
-    gmx_bool          bDone, bAbort, do_x, do_f;
-    tensor            vir, pres;
-    rvec              mu_tot = { 0 };
-    int               nsteps;
-    int               count          = 0;
-    int               steps_accepted = 0;
-    auto              mdatoms        = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating steepest-descent energy minimization via the "
-                    "integrator .mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx minimize and an .mdp option.");
-
-    /* Create 2 states on the stack and extract pointers that we will swap */
-    em_state_t  s0{}, s1{};
-    em_state_t* s_min = &s0;
-    em_state_t* s_try = &s1;
-
-    /* Init em and store the local state in s_try */
-    init_em(fplog, mdlog, SD, cr, inputrec, imdSession, pull_work, state_global, top_global, s_try,
-            &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, nullptr);
-    const bool        simulationsShareState = false;
-    gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-    gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work, nullptr,
-                                   false, StartingBehavior::NewSimulation, mdModulesNotifier);
-
-    /* Print to log file  */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
-
-    /* Set variables for stepsize (in nm). This is the largest
-     * step that we are going to make in any direction.
-     */
-    ustep    = inputrec->em_stepsize;
-    stepsize = 0;
-
-    /* Max number of steps  */
-    nsteps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        /* Print to the screen  */
-        sp_header(stderr, SD, inputrec->em_tol, nsteps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, SD, inputrec->em_tol, nsteps);
-    }
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-
-    /**** HERE STARTS THE LOOP ****
-     * count is the counter for the number of steps
-     * bDone will be TRUE when the minimization has converged
-     * bAbort will be TRUE when nsteps steps have been performed or when
-     * the stepsize becomes smaller than is reasonable for machine precision
-     */
-    count  = 0;
-    bDone  = FALSE;
-    bAbort = FALSE;
-    while (!bDone && !bAbort)
-    {
-        bAbort = (nsteps >= 0) && (count == nsteps);
-
-        /* set new coordinates, except for first step */
-        bool validStep = true;
-        if (count > 0)
-        {
-            validStep = do_em_step(cr, inputrec, mdatoms, s_min, stepsize, &s_min->f, s_try, constr, count);
-        }
-
-        if (validStep)
-        {
-            energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
-        }
-        else
-        {
-            // Signal constraint error during stepping with energy=inf
-            s_try->epot = std::numeric_limits<real>::infinity();
-        }
-
-        if (MASTER(cr))
-        {
-            energyOutput.printHeader(fplog, count, count);
-        }
-
-        if (count == 0)
-        {
-            s_min->epot = s_try->epot;
-        }
-
-        /* Print it if necessary  */
-        if (MASTER(cr))
-        {
-            if (mdrunOptions.verbose)
-            {
-                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
-                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax + 1,
-                        ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r');
-                fflush(stderr);
-            }
-
-            if ((count == 0) || (s_try->epot < s_min->epot))
-            {
-                /* Store the new (lower) energies  */
-                matrix nullBox = {};
-                energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(count), mdatoms->tmass,
-                                                 enerd, nullptr, nullptr, nullptr, nullBox, nullptr,
-                                                 nullptr, vir, pres, nullptr, mu_tot, constr);
-
-                imdSession->fillEnergyRecord(count, TRUE);
-
-                const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout);
-                const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout);
-                energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or,
-                                                   fplog, count, count, fcd, nullptr);
-                fflush(fplog);
-            }
-        }
-
-        /* Now if the new energy is smaller than the previous...
-         * or if this is the first step!
-         * or if we did random steps!
-         */
-
-        if ((count == 0) || (s_try->epot < s_min->epot))
-        {
-            steps_accepted++;
-
-            /* Test whether the convergence criterion is met...  */
-            bDone = (s_try->fmax < inputrec->em_tol);
-
-            /* Copy the arrays for force, positions and energy  */
-            /* The 'Min' array always holds the coords and forces of the minimal
-               sampled energy  */
-            swap_em_state(&s_min, &s_try);
-            if (count > 0)
-            {
-                ustep *= 1.2;
-            }
-
-            /* Write to trn, if necessary */
-            do_x = do_per_step(steps_accepted, inputrec->nstxout);
-            do_f = do_per_step(steps_accepted, inputrec->nstfout);
-            write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min,
-                          state_global, observablesHistory);
-        }
-        else
-        {
-            /* If energy is not smaller make the step smaller...  */
-            ustep *= 0.5;
-
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-            {
-                /* Reload the old state */
-                em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession,
-                                       pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
-            }
-        }
-
-        // If the force is very small after finishing minimization,
-        // we risk dividing by zero when calculating the step size.
-        // So we check first if the minimization has stopped before
-        // trying to obtain a new step size.
-        if (!bDone)
-        {
-            /* Determine new step  */
-            stepsize = ustep / s_min->fmax;
-        }
-
-        /* Check if stepsize is too small, with 1 nm as a characteristic length */
-#if GMX_DOUBLE
-        if (count == nsteps || ustep < 1e-12)
-#else
-        if (count == nsteps || ustep < 1e-6)
-#endif
-        {
-            if (MASTER(cr))
-            {
-                warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr);
-            }
-            bAbort = TRUE;
-        }
-
-        /* Send IMD energies and positions, if bIMD is TRUE. */
-        if (imdSession->run(count, TRUE, state_global->box,
-                            MASTER(cr) ? state_global->x.rvec_array() : nullptr, 0)
-            && MASTER(cr))
-        {
-            imdSession->sendPositionsAndEnergies();
-        }
-
-        count++;
-    } /* End of the loop  */
-
-    /* Print some data...  */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, count, s_min, state_global, observablesHistory);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-
-        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
-        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    inputrec->nsteps = count;
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, count);
-}
-
-void LegacySimulator::do_nm()
-{
-    const char*         NM = "Normal Mode Analysis";
-    int                 nnodes;
-    gmx_localtop_t      top;
-    gmx_global_stat_t   gstat;
-    t_graph*            graph;
-    tensor              vir, pres;
-    rvec                mu_tot = { 0 };
-    rvec*               dfdx;
-    gmx_bool            bSparse; /* use sparse matrix storage format */
-    size_t              sz;
-    gmx_sparsematrix_t* sparse_matrix = nullptr;
-    real*               full_matrix   = nullptr;
-
-    /* added with respect to mdrun */
-    int  row, col;
-    real der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
-    real x_min;
-    bool bIsMaster = MASTER(cr);
-    auto mdatoms   = mdAtoms->mdatoms();
-
-    GMX_LOG(mdlog.info)
-            .asParagraph()
-            .appendText(
-                    "Note that activating normal-mode analysis via the integrator "
-                    ".mdp option and the command gmx mdrun may "
-                    "be available in a different form in a future version of GROMACS, "
-                    "e.g. gmx normal-modes.");
-
-    if (constr != nullptr)
-    {
-        gmx_fatal(
-                FARGS,
-                "Constraints present with Normal Mode Analysis, this combination is not supported");
-    }
-
-    gmx_shellfc_t* shellfc;
-
-    em_state_t state_work{};
-
-    /* Init em and store the local state in state_minimum */
-    init_em(fplog, mdlog, NM, cr, inputrec, imdSession, pull_work, state_global, top_global,
-            &state_work, &top, nrnb, fr, &graph, mdAtoms, &gstat, vsite, constr, &shellfc);
-    const bool  simulationsShareState = false;
-    gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
-                                   mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
-                                   StartingBehavior::NewSimulation, simulationsShareState, ms);
-
-    std::vector<int>       atom_index = get_atom_index(top_global);
-    std::vector<gmx::RVec> fneg(atom_index.size(), { 0, 0, 0 });
-    snew(dfdx, atom_index.size());
-
-#if !GMX_DOUBLE
-    if (bIsMaster)
-    {
-        fprintf(stderr,
-                "NOTE: This version of GROMACS has been compiled in single precision,\n"
-                "      which MIGHT not be accurate enough for normal mode analysis.\n"
-                "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
-                "      are fairly modest even if you recompile in double precision.\n\n");
-    }
-#endif
-
-    /* Check if we can/should use sparse storage format.
-     *
-     * Sparse format is only useful when the Hessian itself is sparse, which it
-     * will be when we use a cutoff.
-     * For small systems (n<1000) it is easier to always use full matrix format, though.
-     */
-    if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
-        bSparse = FALSE;
-    }
-    else if (atom_index.size() < 1000)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
-                                     atom_index.size());
-        bSparse = FALSE;
-    }
-    else
-    {
-        GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
-        bSparse = TRUE;
-    }
-
-    /* Number of dimensions, based on real atoms, that is not vsites or shell */
-    sz = DIM * atom_index.size();
-
-    fprintf(stderr, "Allocating Hessian memory...\n\n");
-
-    if (bSparse)
-    {
-        sparse_matrix                       = gmx_sparsematrix_init(sz);
-        sparse_matrix->compressed_symmetric = TRUE;
-    }
-    else
-    {
-        snew(full_matrix, sz * sz);
-    }
-
-    /* Write start time and temperature */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
-
-    /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = atom_index.size() * 2;
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
-                *(top_global->name), inputrec->nsteps);
-    }
-
-    nnodes = cr->nnodes;
-
-    /* Make evaluate_energy do a single node force calculation */
-    cr->nnodes = 1;
-    EnergyEvaluator energyEvaluator{
-        fplog,      mdlog,     cr,      ms,     top_global,      &top,  inputrec,
-        imdSession, pull_work, nrnb,    wcycle, gstat,           vsite, constr,
-        fcd,        graph,     mdAtoms, fr,     runScheduleWork, enerd
-    };
-    energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
-    cr->nnodes = nnodes;
-
-    /* if forces are not small, warn user */
-    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work);
-
-    GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
-    if (state_work.fmax > 1.0e-3)
-    {
-        GMX_LOG(mdlog.warning)
-                .appendText(
-                        "The force is probably not small enough to "
-                        "ensure that you are at a minimum.\n"
-                        "Be aware that negative eigenvalues may occur\n"
-                        "when the resulting matrix is diagonalized.");
-    }
-
-    /***********************************************************
-     *
-     *      Loop over all pairs in matrix
-     *
-     *      do_force called twice. Once with positive and
-     *      once with negative displacement
-     *
-     ************************************************************/
-
-    /* Steps are divided one by one over the nodes */
-    bool bNS          = true;
-    auto state_work_x = makeArrayRef(state_work.s.x);
-    auto state_work_f = makeArrayRef(state_work.f);
-    for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes)
-    {
-        size_t atom = atom_index[aid];
-        for (size_t d = 0; d < DIM; d++)
-        {
-            int64_t step        = 0;
-            int     force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
-            double  t           = 0;
-
-            x_min = state_work_x[atom][d];
-
-            for (unsigned int dx = 0; (dx < 2); dx++)
-            {
-                if (dx == 0)
-                {
-                    state_work_x[atom][d] = x_min - der_range;
-                }
-                else
-                {
-                    state_work_x[atom][d] = x_min + der_range;
-                }
-
-                /* Make evaluate_energy do a single node force calculation */
-                cr->nnodes = 1;
-                if (shellfc)
-                {
-                    /* Now is the time to relax the shells */
-                    relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, nullptr, step, inputrec,
-                                        imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
-                                        fcd, state_work.s.natoms, state_work.s.x.arrayRefWithPadding(),
-                                        state_work.s.v.arrayRefWithPadding(), state_work.s.box,
-                                        state_work.s.lambda, &state_work.s.hist,
-                                        state_work.f.arrayRefWithPadding(), vir, mdatoms, nrnb,
-                                        wcycle, graph, shellfc, fr, runScheduleWork, t, mu_tot,
-                                        vsite, DDBalanceRegionHandler(nullptr));
-                    bNS = false;
-                    step++;
-                }
-                else
-                {
-                    energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
-                }
-
-                cr->nnodes = nnodes;
-
-                if (dx == 0)
-                {
-                    std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(),
-                              fneg.begin());
-                }
-            }
-
-            /* x is restored to original */
-            state_work_x[atom][d] = x_min;
-
-            for (size_t j = 0; j < atom_index.size(); j++)
-            {
-                for (size_t k = 0; (k < DIM); k++)
-                {
-                    dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range);
-                }
-            }
-
-            if (!bIsMaster)
-            {
-#if GMX_MPI
-#    define mpi_type GMX_MPI_REAL
-                MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid,
-                         cr->mpi_comm_mygroup);
-#endif
-            }
-            else
-            {
-                for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++)
-                {
-                    if (node > 0)
-                    {
-#if GMX_MPI
-                        MPI_Status stat;
-                        MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node,
-                                 cr->mpi_comm_mygroup, &stat);
-#    undef mpi_type
-#endif
-                    }
-
-                    row = (aid + node) * DIM + d;
-
-                    for (size_t j = 0; j < atom_index.size(); j++)
-                    {
-                        for (size_t k = 0; k < DIM; k++)
-                        {
-                            col = j * DIM + k;
-
-                            if (bSparse)
-                            {
-                                if (col >= row && dfdx[j][k] != 0.0)
-                                {
-                                    gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]);
-                                }
-                            }
-                            else
-                            {
-                                full_matrix[row * sz + col] = dfdx[j][k];
-                            }
-                        }
-                    }
-                }
-            }
-
-            if (mdrunOptions.verbose && fplog)
-            {
-                fflush(fplog);
-            }
-        }
-        /* write progress */
-        if (bIsMaster && mdrunOptions.verbose)
-        {
-            fprintf(stderr, "\rFinished step %d out of %td",
-                    std::min<int>(atom + nnodes, atom_index.size()), ssize(atom_index));
-            fflush(stderr);
-        }
-    }
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "\n\nWriting Hessian...\n");
-        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2);
-}
-
-} // namespace gmx
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp
deleted file mode 100644
index 4e0029462e..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp
+++ /dev/null
@@ -1,1495 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-/*! \internal \file
- *
- * \brief Implements the replica exchange routines.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "replicaexchange.h"
-
-#include "config.h"
-
-#include <cmath>
-
-#include <random>
-
-#include "gromacs/domdec/collect.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/random/threefry.h"
-#include "gromacs/random/uniformintdistribution.h"
-#include "gromacs/random/uniformrealdistribution.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/pleasecite.h"
-#include "gromacs/utility/smalloc.h"
-
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-/* PLUMED HREX */
-int plumed_hrex;
-/* END PLUMED HREX */
-
-//! Helps cut off probability values.
-constexpr int c_probabilityCutoff = 100;
-
-/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
-
-//! Rank in the multisimulation
-#define MSRANK(ms, nodeid) (nodeid)
-
-//! Enum for replica exchange flavours
-enum
-{
-    ereTEMP,
-    ereLAMBDA,
-    ereENDSINGLE,
-    ereTL,
-    ereNR
-};
-/*! \brief Strings describing replica exchange flavours.
- *
- *  end_single_marker merely notes the end of single variable replica
- *  exchange. All types higher than it are multiple replica exchange
- *  methods.
- *
- * Eventually, should add 'pressure', 'temperature and pressure',
- *  'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait
- *  until we feel better about the pressure control methods giving
- *  exact ensembles.  Right now, we assume constant pressure */
-static const char* erename[ereNR] = { "temperature", "lambda", "end_single_marker",
-                                      "temperature and lambda" };
-
-//! Working data for replica exchange.
-struct gmx_repl_ex
-{
-    //! Replica ID
-    int repl;
-    //! Total number of replica
-    int nrepl;
-    //! Temperature
-    real temp;
-    //! Replica exchange type from ere enum
-    int type;
-    //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID
-    real** q;
-    //! Use constant pressure and temperature
-    gmx_bool bNPT;
-    //! Replica pressures
-    real* pres;
-    //! Replica indices
-    int* ind;
-    //! Used for keeping track of all the replica swaps
-    int* allswaps;
-    //! Replica exchange interval (number of steps)
-    int nst;
-    //! Number of exchanges per interval
-    int nex;
-    //! Random seed
-    int seed;
-    //! Number of even and odd replica change attempts
-    int nattempt[2];
-    //! Sum of probabilities
-    real* prob_sum;
-    //! Number of moves between replicas i and j
-    int** nmoves;
-    //! i-th element of the array is the number of exchanges between replica i-1 and i
-    int* nexchange;
-
-    /*! \brief Helper arrays for replica exchange; allocated here
-     * so they don't have to be allocated each time */
-    //! \{
-    int*      destinations;
-    int**     cyclic;
-    int**     order;
-    int*      tmpswap;
-    gmx_bool* incycle;
-    gmx_bool* bEx;
-    //! \}
-
-    //! Helper arrays to hold the quantities that are exchanged.
-    //! \{
-    real*  prob;
-    real*  Epot;
-    real*  beta;
-    real*  Vol;
-    real** de;
-    //! \}
-};
-
-// TODO We should add Doxygen here some time.
-//! \cond
-
-static gmx_bool repl_quantity(const gmx_multisim_t* ms, struct gmx_repl_ex* re, int ere, real q)
-{
-    real*    qall;
-    gmx_bool bDiff;
-    int      s;
-
-    snew(qall, ms->nsim);
-    qall[re->repl] = q;
-    gmx_sum_sim(ms->nsim, qall, ms);
-
-    /* PLUMED */
-    //bDiff = FALSE;
-    //for (s = 1; s < ms->nsim; s++)
-    //{
-    //    if (qall[s] != qall[0])
-    //    {
-              bDiff = TRUE;
-    //    }
-    //}
-    /* END PLUMED */
-
-    if (bDiff)
-    {
-        /* Set the replica exchange type and quantities */
-        re->type = ere;
-
-        snew(re->q[ere], re->nrepl);
-        for (s = 0; s < ms->nsim; s++)
-        {
-            re->q[ere][s] = qall[s];
-        }
-    }
-    sfree(qall);
-    return bDiff;
-}
-
-gmx_repl_ex_t init_replica_exchange(FILE*                            fplog,
-                                    const gmx_multisim_t*            ms,
-                                    int                              numAtomsInSystem,
-                                    const t_inputrec*                ir,
-                                    const ReplicaExchangeParameters& replExParams)
-{
-    real                pres;
-    int                 i, j;
-    struct gmx_repl_ex* re;
-    gmx_bool            bTemp;
-    gmx_bool            bLambda = FALSE;
-
-    fprintf(fplog, "\nInitializing Replica Exchange\n");
-
-    if (!isMultiSim(ms) || ms->nsim == 1)
-    {
-        gmx_fatal(FARGS,
-                  "Nothing to exchange with only one replica, maybe you forgot to set the "
-                  "-multidir option of mdrun?");
-    }
-    if (replExParams.numExchanges < 0)
-    {
-        gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive");
-    }
-
-    if (!EI_DYNAMICS(ir->eI))
-    {
-        gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations");
-        /* Note that PAR(cr) is defined by cr->nnodes > 1, which is
-         * distinct from isMultiSim(ms). A multi-simulation only runs
-         * with real MPI parallelism, but this does not imply PAR(cr)
-         * is true!
-         *
-         * Since we are using a dynamical integrator, the only
-         * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are
-         * synonymous. The only way for cr->nnodes > 1 to be true is
-         * if we are using DD. */
-    }
-
-    snew(re, 1);
-
-    re->repl  = ms->sim;
-    re->nrepl = ms->nsim;
-    snew(re->q, ereENDSINGLE);
-
-    fprintf(fplog, "Repl  There are %d replicas:\n", re->nrepl);
-
-    /* We only check that the number of atoms in the systms match.
-     * This, of course, do not guarantee that the systems are the same,
-     * but it does guarantee that we can perform replica exchange.
-     */
-    check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE);
-    check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE);
-    check_multi_int64(fplog, ms, ir->init_step + ir->nsteps, "init_step+nsteps", FALSE);
-    const int nst = replExParams.exchangeInterval;
-    check_multi_int64(fplog, ms, (ir->init_step + nst - 1) / nst,
-                      "first exchange step: init_step/-replex", FALSE);
-    check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE);
-    check_multi_int(fplog, ms, ir->opts.ngtc, "the number of temperature coupling groups", FALSE);
-    check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE);
-    check_multi_int(fplog, ms, ir->efep, "free energy", FALSE);
-    check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE);
-
-    re->temp = ir->opts.ref_t[0];
-    for (i = 1; (i < ir->opts.ngtc); i++)
-    {
-        if (ir->opts.ref_t[i] != re->temp)
-        {
-            fprintf(fplog,
-                    "\nWARNING: The temperatures of the different temperature coupling groups are "
-                    "not identical\n\n");
-            fprintf(stderr,
-                    "\nWARNING: The temperatures of the different temperature coupling groups are "
-                    "not identical\n\n");
-        }
-    }
-
-    re->type = -1;
-    bTemp    = repl_quantity(ms, re, ereTEMP, re->temp);
-    if (ir->efep != efepNO)
-    {
-        bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast<real>(ir->fepvals->init_fep_state));
-    }
-    if (re->type == -1) /* nothing was assigned */
-    {
-        gmx_fatal(FARGS,
-                  "The properties of the %d systems are all the same, there is nothing to exchange",
-                  re->nrepl);
-    }
-    if (bLambda && bTemp)
-    {
-        re->type = ereTL;
-    }
-
-    if (bTemp)
-    {
-        please_cite(fplog, "Sugita1999a");
-        if (ir->epc != epcNO)
-        {
-            re->bNPT = TRUE;
-            fprintf(fplog, "Repl  Using Constant Pressure REMD.\n");
-            please_cite(fplog, "Okabe2001a");
-        }
-        if (ir->etc == etcBERENDSEN)
-        {
-            gmx_fatal(FARGS,
-                      "REMD with the %s thermostat does not produce correct potential energy "
-                      "distributions, consider using the %s thermostat instead",
-                      ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
-        }
-    }
-    if (bLambda)
-    {
-        if (ir->fepvals->delta_lambda != 0) /* check this? */
-        {
-            gmx_fatal(FARGS, "delta_lambda is not zero");
-        }
-    }
-    if (re->bNPT)
-    {
-        snew(re->pres, re->nrepl);
-        if (ir->epct == epctSURFACETENSION)
-        {
-            pres = ir->ref_p[ZZ][ZZ];
-        }
-        else
-        {
-            pres = 0;
-            j    = 0;
-            for (i = 0; i < DIM; i++)
-            {
-                if (ir->compress[i][i] != 0)
-                {
-                    pres += ir->ref_p[i][i];
-                    j++;
-                }
-            }
-            pres /= j;
-        }
-        re->pres[re->repl] = pres;
-        gmx_sum_sim(re->nrepl, re->pres, ms);
-    }
-
-    /* Make an index for increasing replica order */
-    /* only makes sense if one or the other is varying, not both!
-       if both are varying, we trust the order the person gave. */
-    snew(re->ind, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->ind[i] = i;
-    }
-
-    /* PLUMED */
-    // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD)
-    // in those cases replicas can share the same temperature.
-    /*
-    if (re->type < ereENDSINGLE)
-    {
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = i + 1; j < re->nrepl; j++)
-            {
-                if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
-                {*/
-                    /* Unordered replicas are supposed to work, but there
-                     * is still an issues somewhere.
-                     * Note that at this point still re->ind[i]=i.
-                     */
-                 /*
-                    gmx_fatal(FARGS,
-                              "Replicas with indices %d < %d have %ss %g > %g, please order your "
-                              "replicas on increasing %s",
-                              i, j, erename[re->type], re->q[re->type][i], re->q[re->type][j],
-                              erename[re->type]);
-                }
-                else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
-                {
-                    gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]);
-                }
-            }
-        }
-    }
-    */
-    /* END PLUMED */
-
-    /* keep track of all the swaps, starting with the initial placement. */
-    snew(re->allswaps, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->allswaps[i] = re->ind[i];
-    }
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            fprintf(fplog, "\nReplica exchange in temperature\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereLAMBDA:
-            fprintf(fplog, "\nReplica exchange in lambda\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %3d", static_cast<int>(re->q[re->type][re->ind[i]]));
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereTL:
-            fprintf(fplog, "\nReplica exchange in temperature and lambda state\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5d", static_cast<int>(re->q[ereLAMBDA][re->ind[i]]));
-            }
-            fprintf(fplog, "\n");
-            break;
-        default: gmx_incons("Unknown replica exchange quantity");
-    }
-    if (re->bNPT)
-    {
-        fprintf(fplog, "\nRepl  p");
-        for (i = 0; i < re->nrepl; i++)
-        {
-            fprintf(fplog, " %5.2f", re->pres[re->ind[i]]);
-        }
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i - 1]]))
-            {
-                fprintf(fplog,
-                        "\nWARNING: The reference pressures decrease with increasing "
-                        "temperatures\n\n");
-                fprintf(stderr,
-                        "\nWARNING: The reference pressures decrease with increasing "
-                        "temperatures\n\n");
-            }
-        }
-    }
-    re->nst = nst;
-    if (replExParams.randomSeed == -1)
-    {
-        if (isMasterSim(ms))
-        {
-            re->seed = static_cast<int>(gmx::makeRandomSeed());
-        }
-        else
-        {
-            re->seed = 0;
-        }
-        gmx_sumi_sim(1, &(re->seed), ms);
-    }
-    else
-    {
-        re->seed = replExParams.randomSeed;
-    }
-    fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst);
-    fprintf(fplog, "\nReplica random seed: %d\n", re->seed);
-
-    re->nattempt[0] = 0;
-    re->nattempt[1] = 0;
-
-    snew(re->prob_sum, re->nrepl);
-    snew(re->nexchange, re->nrepl);
-    snew(re->nmoves, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->nmoves[i], re->nrepl);
-    }
-    fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n");
-
-    /* generate space for the helper functions so we don't have to snew each time */
-
-    snew(re->destinations, re->nrepl);
-    snew(re->incycle, re->nrepl);
-    snew(re->tmpswap, re->nrepl);
-    snew(re->cyclic, re->nrepl);
-    snew(re->order, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->cyclic[i], re->nrepl + 1);
-        snew(re->order[i], re->nrepl);
-    }
-    /* allocate space for the functions storing the data for the replicas */
-    /* not all of these arrays needed in all cases, but they don't take
-       up much space, since the max size is nrepl**2 */
-    snew(re->prob, re->nrepl);
-    snew(re->bEx, re->nrepl);
-    snew(re->beta, re->nrepl);
-    snew(re->Vol, re->nrepl);
-    snew(re->Epot, re->nrepl);
-    snew(re->de, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->de[i], re->nrepl);
-    }
-    re->nex = replExParams.numExchanges;
-    return re;
-}
-
-static void exchange_reals(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, real* v, int n)
-{
-    real* buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-
-static void exchange_doubles(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, double* v, int n)
-{
-    double* buf;
-    int     i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_rvecs(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, rvec* v, int n)
-{
-    rvec* buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(buf[i], v[i]);
-        }
-        sfree(buf);
-    }
-}
-
-/* PLUMED HREX */
-void exchange_state(const gmx_multisim_t* ms, int b, t_state* state)
-/* END PLUMED HREX */
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres * state->nhchainlength;
-    exchange_rvecs(ms, b, state->box, DIM);
-    exchange_rvecs(ms, b, state->box_rel, DIM);
-    exchange_rvecs(ms, b, state->boxv, DIM);
-    exchange_reals(ms, b, &(state->veta), 1);
-    exchange_reals(ms, b, &(state->vol0), 1);
-    exchange_rvecs(ms, b, state->svir_prev, DIM);
-    exchange_rvecs(ms, b, state->fvir_prev, DIM);
-    exchange_rvecs(ms, b, state->pres_prev, DIM);
-    exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc);
-    exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc);
-    exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres);
-    exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres);
-    exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc);
-    exchange_doubles(ms, b, &state->baros_integral, 1);
-    exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms);
-    exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms);
-}
-
-/* PLUMED HREX */
-void copy_state_serial(const t_state* src, t_state* dest)
-/* END PLUMED HREX */
-{
-    if (dest != src)
-    {
-        /* Currently the local state is always a pointer to the global
-         * in serial, so we should never end up here.
-         * TODO: Implement a (trivial) t_state copy once converted to C++.
-         */
-        GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange");
-    }
-}
-
-static void scale_velocities(gmx::ArrayRef<gmx::RVec> velocities, real fac)
-{
-    for (auto& v : velocities)
-    {
-        v *= fac;
-    }
-}
-
-static void print_transition_matrix(FILE* fplog, int n, int** nmoves, const int* nattempt)
-{
-    int   i, j, ntot;
-    float Tprint;
-
-    ntot = nattempt[0] + nattempt[1];
-    fprintf(fplog, "\n");
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "    "); /* put the title closer to the center */
-    }
-    fprintf(fplog, "Empirical Transition Matrix\n");
-
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%8d", (i + 1));
-    }
-    fprintf(fplog, "\n");
-
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "Repl");
-        for (j = 0; j < n; j++)
-        {
-            Tprint = 0.0;
-            if (nmoves[i][j] > 0)
-            {
-                Tprint = nmoves[i][j] / (2.0 * ntot);
-            }
-            fprintf(fplog, "%8.4f", Tprint);
-        }
-        fprintf(fplog, "%3d\n", i);
-    }
-}
-
-static void print_ind(FILE* fplog, const char* leg, int n, int* ind, const gmx_bool* bEx)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s %2d", leg, ind[0]);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_allswitchind(FILE* fplog, int n, int* pind, int* allswaps, int* tmpswap)
-{
-    int i;
-
-    for (i = 0; i < n; i++)
-    {
-        tmpswap[i] = allswaps[i];
-    }
-    for (i = 0; i < n; i++)
-    {
-        allswaps[i] = tmpswap[pind[i]];
-    }
-
-    fprintf(fplog, "\nAccepted Exchanges:   ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", pind[i]);
-    }
-    fprintf(fplog, "\n");
-
-    /* the "Order After Exchange" is the state label corresponding to the configuration that
-       started in state listed in order, i.e.
-
-       3 0 1 2
-
-       means that the:
-       configuration starting in simulation 3 is now in simulation 0,
-       configuration starting in simulation 0 is now in simulation 1,
-       configuration starting in simulation 1 is now in simulation 2,
-       configuration starting in simulation 2 is now in simulation 3
-     */
-    fprintf(fplog, "Order After Exchange: ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", allswaps[i]);
-    }
-    fprintf(fplog, "\n\n");
-}
-
-static void print_prob(FILE* fplog, const char* leg, int n, real* prob)
-{
-    int  i;
-    char buf[8];
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        if (prob[i] >= 0)
-        {
-            sprintf(buf, "%4.2f", prob[i]);
-            fprintf(fplog, "  %3s", buf[0] == '1' ? "1.0" : buf + 1);
-        }
-        else
-        {
-            fprintf(fplog, "     ");
-        }
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_count(FILE* fplog, const char* leg, int n, int* count)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %4d", count[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static real calc_delta(FILE* fplog, gmx_bool bPrint, struct gmx_repl_ex* re, int a, int b, int ap, int bp)
-{
-
-    real   ediff, dpV, delta = 0;
-    real*  Epot = re->Epot;
-    real*  Vol  = re->Vol;
-    real** de   = re->de;
-    real*  beta = re->beta;
-
-    /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
-       to the non permuted case */
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            /*
-             * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
-             */
-            ediff = Epot[b] - Epot[a];
-            delta = -(beta[bp] - beta[ap]) * ediff;
-            break;
-        case ereLAMBDA:
-            /* two cases:  when we are permuted, and not.  */
-            /* non-permuted:
-               ediff =  E_new - E_old
-                     =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
-                     =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
-                     =  de[b][a] + de[a][b] */
-
-            /* permuted:
-               ediff =  E_new - E_old
-                     =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
-                     =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
-                     =  (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b])    */
-            /* but, in the current code implementation, we flip configurations, not indices . . .
-               So let's examine that.
-                     =  [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)]
-                     =  [H_b(x_ap) - H_a(x_ap)]  + [H_a(x_bp) - H_b(x_pb)]
-                     = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp]
-                     So, if we exchange b<=> bp and a<=> ap, we return to the same result.
-                     So the simple solution is to flip the
-                     position of perturbed and original indices in the tests.
-             */
-
-            ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]);
-            delta = ediff * beta[a]; /* assume all same temperature in this case */
-            break;
-        case ereTL:
-            /* not permuted:  */
-            /* delta =  reduced E_new - reduced E_old
-                     =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
-                     =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
-                        [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
-                        beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
-                     =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
-            /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
-            /* permuted (big breath!) */
-            /*   delta =  reduced E_new - reduced E_old
-                     =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                        - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
-                        - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
-                        [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
-             + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
-                        [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
-             + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
-                     =  ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b]  - beta_bp de[bp][b])
-             + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
-            delta = beta[bp] * (de[bp][a] - de[bp][b]) + beta[ap] * (de[ap][b] - de[ap][a])
-                    - (beta[bp] - beta[ap]) * (Epot[b] - Epot[a]);
-            break;
-        default: gmx_incons("Unknown replica exchange quantity");
-    }
-    if (bPrint)
-    {
-        fprintf(fplog, "Repl %d <-> %d  dE_term = %10.3e (kT)\n", a, b, delta);
-    }
-/* PLUMED HREX */
-/* this is necessary because with plumed HREX the energy contribution is
-   already taken into account */
-    if(plumed_hrex) delta=0.0;
-/* END PLUMED HREX */
-    if (re->bNPT)
-    {
-        /* revist the calculation for 5.0.  Might be some improvements. */
-        dpV = (beta[ap] * re->pres[ap] - beta[bp] * re->pres[bp]) * (Vol[b] - Vol[a]) / PRESFAC;
-        if (bPrint)
-        {
-            fprintf(fplog, "  dpV = %10.3e  d = %10.3e\n", dpV, delta + dpV);
-        }
-        delta += dpV;
-    }
-    return delta;
-}
-
-static void test_for_replica_exchange(FILE*                 fplog,
-                                      const gmx_multisim_t* ms,
-                                      struct gmx_repl_ex*   re,
-                                      const gmx_enerdata_t* enerd,
-                                      real                  vol,
-                                      int64_t               step,
-                                      real                  time)
-{
-    int                                m, i, j, a, b, ap, bp, i0, i1, tmp;
-    real                               delta = 0;
-    gmx_bool                           bPrint, bMultiEx;
-    gmx_bool*                          bEx      = re->bEx;
-    real*                              prob     = re->prob;
-    int*                               pind     = re->destinations; /* permuted index */
-    gmx_bool                           bEpot    = FALSE;
-    gmx_bool                           bDLambda = FALSE;
-    gmx_bool                           bVol     = FALSE;
-    gmx::ThreeFry2x64<64>              rng(re->seed, gmx::RandomDomain::ReplicaExchange);
-    gmx::UniformRealDistribution<real> uniformRealDist;
-    gmx::UniformIntDistribution<int>   uniformNreplDist(0, re->nrepl - 1);
-
-    bMultiEx = (re->nex > 1); /* multiple exchanges at each state */
-    fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time);
-
-    if (re->bNPT)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Vol[i] = 0;
-        }
-        bVol              = TRUE;
-        re->Vol[re->repl] = vol;
-    }
-    if ((re->type == ereTEMP || re->type == ereTL))
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Epot[i] = 0;
-        }
-        bEpot              = TRUE;
-        re->Epot[re->repl] = enerd->term[F_EPOT];
-        /* temperatures of different states*/
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0 / (re->q[ereTEMP][i] * BOLTZ);
-        }
-    }
-    else
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0 / (re->temp * BOLTZ); /* we have a single temperature */
-        }
-    }
-    if (re->type == ereLAMBDA || re->type == ereTL)
-    {
-        bDLambda = TRUE;
-        /* lambda differences. */
-        /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian
-           minus the energy of the jth simulation in the jth Hamiltonian */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->de[i][j] = 0;
-            }
-        }
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->de[i][re->repl] = (enerd->enerpart_lambda[static_cast<int>(re->q[ereLAMBDA][i]) + 1]
-                                   - enerd->enerpart_lambda[0]);
-        }
-    }
-
-    /* now actually do the communication */
-    if (bVol)
-    {
-        gmx_sum_sim(re->nrepl, re->Vol, ms);
-    }
-    if (bEpot)
-    {
-        gmx_sum_sim(re->nrepl, re->Epot, ms);
-    }
-    if (bDLambda)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            gmx_sum_sim(re->nrepl, re->de[i], ms);
-        }
-    }
-
-    /* make a duplicate set of indices for shuffling */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        pind[i] = re->ind[i];
-    }
-
-    rng.restart(step, 0);
-
-    /* PLUMED */
-    int plumed_test_exchange_pattern=0;
-    if(plumed_test_exchange_pattern && plumed_hrex) gmx_fatal(FARGS,"hrex not compatible with ad hoc exchange patterns");
-    /* END PLUMED */
-
-    if (bMultiEx)
-    {
-        /* multiple random switch exchange */
-        int nself = 0;
-
-
-        for (i = 0; i < re->nex + nself; i++)
-        {
-            // For now this is superfluous, but just in case we ever add more
-            // calls in different branches it is safer to always reset the distribution.
-            uniformNreplDist.reset();
-
-            /* randomly select a pair  */
-            /* in theory, could reduce this by identifying only which switches had a nonneglibible
-               probability of occurring (log p > -100) and only operate on those switches */
-            /* find out which state it is from, and what label that state currently has. Likely
-               more work that useful. */
-            i0 = uniformNreplDist(rng);
-            i1 = uniformNreplDist(rng);
-            if (i0 == i1)
-            {
-                nself++;
-                continue; /* self-exchange, back up and do it again */
-            }
-
-            a  = re->ind[i0]; /* what are the indices of these states? */
-            b  = re->ind[i1];
-            ap = pind[i0];
-            bp = pind[i1];
-
-            bPrint = FALSE; /* too noisy */
-            /* calculate the energy difference */
-            /* if the code changes to flip the STATES, rather than the configurations,
-               use the commented version of the code */
-            /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */
-            delta = calc_delta(fplog, bPrint, re, ap, bp, a, b);
-
-            /* we actually only use the first space in the prob and bEx array,
-               since there are actually many switches between pairs. */
-
-            if (delta <= 0)
-            {
-                /* accepted */
-                prob[0] = 1;
-                bEx[0]  = TRUE;
-            }
-            else
-            {
-                if (delta > c_probabilityCutoff)
-                {
-                    prob[0] = 0;
-                }
-                else
-                {
-                    prob[0] = exp(-delta);
-                }
-                // roll a number to determine if accepted. For now it is superfluous to
-                // reset, but just in case we ever add more calls in different branches
-                // it is safer to always reset the distribution.
-                uniformRealDist.reset();
-                bEx[0] = uniformRealDist(rng) < prob[0];
-            }
-            re->prob_sum[0] += prob[0];
-
-            if (bEx[0])
-            {
-                /* swap the states */
-                tmp      = pind[i0];
-                pind[i0] = pind[i1];
-                pind[i1] = tmp;
-            }
-        }
-        re->nattempt[0]++; /* keep track of total permutation trials here */
-        print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap);
-    }
-    else
-    {
-        /* standard nearest neighbor replica exchange */
-
-        m = (step / re->nst) % 2;
-        /* PLUMED */
-        if(plumedswitch){
-          int partner=re->repl;
-          plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern);
-          if(plumed_test_exchange_pattern>0){
-            int *list;
-            snew(list,re->nrepl);
-            plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl));
-            plumed_cmd(plumedmain,"getExchangesList",list);
-            for(i=0; i<re->nrepl; i++) re->ind[i]=list[i];
-            sfree(list);
-          }
-
-          for(i=1; i<re->nrepl; i++) {
-            if (i % 2 != m) continue;
-            a = re->ind[i-1];
-            b = re->ind[i];
-            if(re->repl==a) partner=b;
-            if(re->repl==b) partner=a;
-          }
-          plumed_cmd(plumedmain,"GREX setPartner",&partner);
-          plumed_cmd(plumedmain,"GREX calculate",NULL);
-          plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL);
-        }
-        /* END PLUMED */
-        for (i = 1; i < re->nrepl; i++)
-        {
-            a = re->ind[i - 1];
-            b = re->ind[i];
-
-            bPrint = (re->repl == a || re->repl == b);
-            if (i % 2 == m)
-            {
-                delta = calc_delta(fplog, bPrint, re, a, b, a, b);
-                /* PLUMED */
-                if(plumedswitch){
-                  real adb,bdb,dplumed;
-                  char buf[300];
-                  sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb);
-                  sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb);
-                  dplumed=adb*re->beta[a]+bdb*re->beta[b];
-                  delta+=dplumed;
-                  if (bPrint)
-                    fprintf(fplog,"dplumed = %10.3e  dE_Term = %10.3e (kT)\n",dplumed,delta);
-                }
-                /* END PLUMED */
-                if (delta <= 0)
-                {
-                    /* accepted */
-                    prob[i] = 1;
-                    bEx[i]  = TRUE;
-                }
-                else
-                {
-                    if (delta > c_probabilityCutoff)
-                    {
-                        prob[i] = 0;
-                    }
-                    else
-                    {
-                        prob[i] = exp(-delta);
-                    }
-                    // roll a number to determine if accepted. For now it is superfluous to
-                    // reset, but just in case we ever add more calls in different branches
-                    // it is safer to always reset the distribution.
-                    uniformRealDist.reset();
-                    bEx[i] = uniformRealDist(rng) < prob[i];
-                }
-                re->prob_sum[i] += prob[i];
-
-                if (bEx[i])
-                {
-                  /* PLUMED */
-                  if(!plumed_test_exchange_pattern) {
-                    /* standard neighbour swapping */
-                    /* swap these two */
-                    tmp         = pind[i - 1];
-                    pind[i - 1] = pind[i];
-                    pind[i]     = tmp;
-                    re->nexchange[i]++; /* statistics for back compatibility */
-                  } else {
-                    /* alternative swapping patterns */
-                    tmp       = pind[a];
-                    pind[a]   = pind[b];
-                    pind[b]   = tmp;
-                    re->nexchange[i]++;  /* statistics for back compatibility */
-                  }
-                  /* END PLUMED */
-                }
-            }
-            else
-            {
-                prob[i] = -1;
-                bEx[i]  = FALSE;
-            }
-        }
-        /* print some statistics */
-        print_ind(fplog, "ex", re->nrepl, re->ind, bEx);
-        print_prob(fplog, "pr", re->nrepl, prob);
-        fprintf(fplog, "\n");
-        re->nattempt[m]++;
-    }
-
-    /* PLUMED */
-    if(plumed_test_exchange_pattern>0) {
-      for (i = 0; i < re->nrepl; i++)
-      {
-          re->ind[i] = i;
-      }
-    }
-    /* END PLUMED */
-
-    /* record which moves were made and accepted */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->nmoves[re->ind[i]][pind[i]] += 1;
-        re->nmoves[pind[i]][re->ind[i]] += 1;
-    }
-    fflush(fplog); /* make sure we can see what the last exchange was */
-}
-
-static void cyclic_decomposition(const int* destinations, int** cyclic, gmx_bool* incycle, const int nrepl, int* nswap)
-{
-
-    int i, j, c, p;
-    int maxlen = 1;
-    for (i = 0; i < nrepl; i++)
-    {
-        incycle[i] = FALSE;
-    }
-    for (i = 0; i < nrepl; i++) /* one cycle for each replica */
-    {
-        if (incycle[i])
-        {
-            cyclic[i][0] = -1;
-            continue;
-        }
-        cyclic[i][0] = i;
-        incycle[i]   = TRUE;
-        c            = 1;
-        p            = i;
-        for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */
-        {
-            p = destinations[p]; /* start permuting */
-            if (p == i)
-            {
-                cyclic[i][c] = -1;
-                if (c > maxlen)
-                {
-                    maxlen = c;
-                }
-                break; /* we've reached the original element, the cycle is complete, and we marked the end. */
-            }
-            else
-            {
-                cyclic[i][c] = p; /* each permutation gives a new member of the cycle */
-                incycle[p]   = TRUE;
-                c++;
-            }
-        }
-    }
-    *nswap = maxlen - 1;
-
-    if (debug)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Cycle %d:", i);
-            for (j = 0; j < nrepl; j++)
-            {
-                if (cyclic[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", cyclic[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void compute_exchange_order(int** cyclic, int** order, const int nrepl, const int maxswap)
-{
-    int i, j;
-
-    for (j = 0; j < maxswap; j++)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            if (cyclic[i][j + 1] >= 0)
-            {
-                order[cyclic[i][j + 1]][j] = cyclic[i][j];
-                order[cyclic[i][j]][j]     = cyclic[i][j + 1];
-            }
-        }
-        for (i = 0; i < nrepl; i++)
-        {
-            if (order[i][j] < 0)
-            {
-                order[i][j] = i; /* if it's not exchanging, it should stay this round*/
-            }
-        }
-    }
-
-    if (debug)
-    {
-        fprintf(debug, "Replica Exchange Order\n");
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Replica %d:", i);
-            for (j = 0; j < maxswap; j++)
-            {
-                if (order[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", order[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void prepare_to_do_exchange(struct gmx_repl_ex* re, const int replica_id, int* maxswap, gmx_bool* bThisReplicaExchanged)
-{
-    int i, j;
-    /* Hold the cyclic decomposition of the (multiple) replica
-     * exchange. */
-    gmx_bool bAnyReplicaExchanged = FALSE;
-    *bThisReplicaExchanged        = FALSE;
-
-    for (i = 0; i < re->nrepl; i++)
-    {
-        if (re->destinations[i] != re->ind[i])
-        {
-            /* only mark as exchanged if the index has been shuffled */
-            bAnyReplicaExchanged = TRUE;
-            break;
-        }
-    }
-    if (bAnyReplicaExchanged)
-    {
-        /* reinitialize the placeholder arrays */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->cyclic[i][j] = -1;
-                re->order[i][j]  = -1;
-            }
-        }
-
-        /* Identify the cyclic decomposition of the permutation (very
-         * fast if neighbor replica exchange). */
-        cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap);
-
-        /* Now translate the decomposition into a replica exchange
-         * order at each step. */
-        compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap);
-
-        /* Did this replica do any exchange at any point? */
-        for (j = 0; j < *maxswap; j++)
-        {
-            if (replica_id != re->order[replica_id][j])
-            {
-                *bThisReplicaExchanged = TRUE;
-                break;
-            }
-        }
-    }
-}
-
-gmx_bool replica_exchange(FILE*                 fplog,
-                          const t_commrec*      cr,
-                          const gmx_multisim_t* ms,
-                          struct gmx_repl_ex*   re,
-                          t_state*              state,
-                          const gmx_enerdata_t* enerd,
-                          t_state*              state_local,
-                          int64_t               step,
-                          real                  time)
-{
-    int j;
-    int replica_id = 0;
-    int exchange_partner;
-    int maxswap = 0;
-    /* Number of rounds of exchanges needed to deal with any multiple
-     * exchanges. */
-    /* Where each replica ends up after the exchange attempt(s). */
-    /* The order in which multiple exchanges will occur. */
-    gmx_bool bThisReplicaExchanged = FALSE;
-
-    /* PLUMED */
-    if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL);
-    /* END PLUMED */
-
-    if (MASTER(cr))
-    {
-        replica_id = re->repl;
-        test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time);
-        prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged);
-    }
-    /* Do intra-simulation broadcast so all processors belonging to
-     * each simulation know whether they need to participate in
-     * collecting the state. Otherwise, they might as well get on with
-     * the next thing to do. */
-    if (DOMAINDECOMP(cr))
-    {
-#if GMX_MPI
-        MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
-#endif
-    }
-
-    if (bThisReplicaExchanged)
-    {
-        /* Exchange the states */
-        /* Collect the global state on the master node */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_collect_state(cr->dd, state_local, state);
-        }
-        else
-        {
-            copy_state_serial(state_local, state);
-        }
-
-        if (MASTER(cr))
-        {
-            /* There will be only one swap cycle with standard replica
-             * exchange, but there may be multiple swap cycles if we
-             * allow multiple swaps. */
-
-            for (j = 0; j < maxswap; j++)
-            {
-                exchange_partner = re->order[replica_id][j];
-
-                if (exchange_partner != replica_id)
-                {
-                    /* Exchange the global states between the master nodes */
-                    if (debug)
-                    {
-                        fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner);
-                    }
-                    exchange_state(ms, exchange_partner, state);
-                }
-            }
-            /* For temperature-type replica exchange, we need to scale
-             * the velocities. */
-            if (re->type == ereTEMP || re->type == ereTL)
-            {
-                scale_velocities(state->v, std::sqrt(re->q[ereTEMP][replica_id]
-                                                     / re->q[ereTEMP][re->destinations[replica_id]]));
-            }
-        }
-
-        /* With domain decomposition the global state is distributed later */
-        if (!DOMAINDECOMP(cr))
-        {
-            /* Copy the global state to the local state data structure */
-            copy_state_serial(state, state_local);
-        }
-    }
-
-    return bThisReplicaExchanged;
-}
-
-void print_replica_exchange_statistics(FILE* fplog, struct gmx_repl_ex* re)
-{
-    int i;
-
-    fprintf(fplog, "\nReplica exchange statistics\n");
-
-    if (re->nex == 0)
-    {
-        fprintf(fplog, "Repl  %d attempts, %d odd, %d even\n", re->nattempt[0] + re->nattempt[1],
-                re->nattempt[1], re->nattempt[0]);
-
-        fprintf(fplog, "Repl  average probabilities:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i % 2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] = re->prob_sum[i] / re->nattempt[i % 2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "Repl  number of exchanges:\n");
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_count(fplog, "", re->nrepl, re->nexchange);
-
-        fprintf(fplog, "Repl  average number of exchanges:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i % 2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] = (static_cast<real>(re->nexchange[i])) / re->nattempt[i % 2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "\n");
-    }
-    /* print the transition matrix */
-    print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt);
-}
-
-/* PLUMED HREX */
-int replica_exchange_get_repl(const gmx_repl_ex_t re){
-  return re->repl;
-};
-
-int replica_exchange_get_nrepl(const gmx_repl_ex_t re){
-  return re->nrepl;
-};
-/* END PLUMED HREX */
-//! \endcond
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed
deleted file mode 100644
index 9ff4b3817d..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed
+++ /dev/null
@@ -1,1391 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-/*! \internal \file
- *
- * \brief Implements the replica exchange routines.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "replicaexchange.h"
-
-#include "config.h"
-
-#include <cmath>
-
-#include <random>
-
-#include "gromacs/domdec/collect.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/random/threefry.h"
-#include "gromacs/random/uniformintdistribution.h"
-#include "gromacs/random/uniformrealdistribution.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/pleasecite.h"
-#include "gromacs/utility/smalloc.h"
-
-//! Helps cut off probability values.
-constexpr int c_probabilityCutoff = 100;
-
-/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
-
-//! Rank in the multisimulation
-#define MSRANK(ms, nodeid) (nodeid)
-
-//! Enum for replica exchange flavours
-enum
-{
-    ereTEMP,
-    ereLAMBDA,
-    ereENDSINGLE,
-    ereTL,
-    ereNR
-};
-/*! \brief Strings describing replica exchange flavours.
- *
- *  end_single_marker merely notes the end of single variable replica
- *  exchange. All types higher than it are multiple replica exchange
- *  methods.
- *
- * Eventually, should add 'pressure', 'temperature and pressure',
- *  'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait
- *  until we feel better about the pressure control methods giving
- *  exact ensembles.  Right now, we assume constant pressure */
-static const char* erename[ereNR] = { "temperature", "lambda", "end_single_marker",
-                                      "temperature and lambda" };
-
-//! Working data for replica exchange.
-struct gmx_repl_ex
-{
-    //! Replica ID
-    int repl;
-    //! Total number of replica
-    int nrepl;
-    //! Temperature
-    real temp;
-    //! Replica exchange type from ere enum
-    int type;
-    //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID
-    real** q;
-    //! Use constant pressure and temperature
-    gmx_bool bNPT;
-    //! Replica pressures
-    real* pres;
-    //! Replica indices
-    int* ind;
-    //! Used for keeping track of all the replica swaps
-    int* allswaps;
-    //! Replica exchange interval (number of steps)
-    int nst;
-    //! Number of exchanges per interval
-    int nex;
-    //! Random seed
-    int seed;
-    //! Number of even and odd replica change attempts
-    int nattempt[2];
-    //! Sum of probabilities
-    real* prob_sum;
-    //! Number of moves between replicas i and j
-    int** nmoves;
-    //! i-th element of the array is the number of exchanges between replica i-1 and i
-    int* nexchange;
-
-    /*! \brief Helper arrays for replica exchange; allocated here
-     * so they don't have to be allocated each time */
-    //! \{
-    int*      destinations;
-    int**     cyclic;
-    int**     order;
-    int*      tmpswap;
-    gmx_bool* incycle;
-    gmx_bool* bEx;
-    //! \}
-
-    //! Helper arrays to hold the quantities that are exchanged.
-    //! \{
-    real*  prob;
-    real*  Epot;
-    real*  beta;
-    real*  Vol;
-    real** de;
-    //! \}
-};
-
-// TODO We should add Doxygen here some time.
-//! \cond
-
-static gmx_bool repl_quantity(const gmx_multisim_t* ms, struct gmx_repl_ex* re, int ere, real q)
-{
-    real*    qall;
-    gmx_bool bDiff;
-    int      s;
-
-    snew(qall, ms->nsim);
-    qall[re->repl] = q;
-    gmx_sum_sim(ms->nsim, qall, ms);
-
-    bDiff = FALSE;
-    for (s = 1; s < ms->nsim; s++)
-    {
-        if (qall[s] != qall[0])
-        {
-            bDiff = TRUE;
-        }
-    }
-
-    if (bDiff)
-    {
-        /* Set the replica exchange type and quantities */
-        re->type = ere;
-
-        snew(re->q[ere], re->nrepl);
-        for (s = 0; s < ms->nsim; s++)
-        {
-            re->q[ere][s] = qall[s];
-        }
-    }
-    sfree(qall);
-    return bDiff;
-}
-
-gmx_repl_ex_t init_replica_exchange(FILE*                            fplog,
-                                    const gmx_multisim_t*            ms,
-                                    int                              numAtomsInSystem,
-                                    const t_inputrec*                ir,
-                                    const ReplicaExchangeParameters& replExParams)
-{
-    real                pres;
-    int                 i, j;
-    struct gmx_repl_ex* re;
-    gmx_bool            bTemp;
-    gmx_bool            bLambda = FALSE;
-
-    fprintf(fplog, "\nInitializing Replica Exchange\n");
-
-    if (!isMultiSim(ms) || ms->nsim == 1)
-    {
-        gmx_fatal(FARGS,
-                  "Nothing to exchange with only one replica, maybe you forgot to set the "
-                  "-multidir option of mdrun?");
-    }
-    if (replExParams.numExchanges < 0)
-    {
-        gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive");
-    }
-
-    if (!EI_DYNAMICS(ir->eI))
-    {
-        gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations");
-        /* Note that PAR(cr) is defined by cr->nnodes > 1, which is
-         * distinct from isMultiSim(ms). A multi-simulation only runs
-         * with real MPI parallelism, but this does not imply PAR(cr)
-         * is true!
-         *
-         * Since we are using a dynamical integrator, the only
-         * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are
-         * synonymous. The only way for cr->nnodes > 1 to be true is
-         * if we are using DD. */
-    }
-
-    snew(re, 1);
-
-    re->repl  = ms->sim;
-    re->nrepl = ms->nsim;
-    snew(re->q, ereENDSINGLE);
-
-    fprintf(fplog, "Repl  There are %d replicas:\n", re->nrepl);
-
-    /* We only check that the number of atoms in the systms match.
-     * This, of course, do not guarantee that the systems are the same,
-     * but it does guarantee that we can perform replica exchange.
-     */
-    check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE);
-    check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE);
-    check_multi_int64(fplog, ms, ir->init_step + ir->nsteps, "init_step+nsteps", FALSE);
-    const int nst = replExParams.exchangeInterval;
-    check_multi_int64(fplog, ms, (ir->init_step + nst - 1) / nst,
-                      "first exchange step: init_step/-replex", FALSE);
-    check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE);
-    check_multi_int(fplog, ms, ir->opts.ngtc, "the number of temperature coupling groups", FALSE);
-    check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE);
-    check_multi_int(fplog, ms, ir->efep, "free energy", FALSE);
-    check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE);
-
-    re->temp = ir->opts.ref_t[0];
-    for (i = 1; (i < ir->opts.ngtc); i++)
-    {
-        if (ir->opts.ref_t[i] != re->temp)
-        {
-            fprintf(fplog,
-                    "\nWARNING: The temperatures of the different temperature coupling groups are "
-                    "not identical\n\n");
-            fprintf(stderr,
-                    "\nWARNING: The temperatures of the different temperature coupling groups are "
-                    "not identical\n\n");
-        }
-    }
-
-    re->type = -1;
-    bTemp    = repl_quantity(ms, re, ereTEMP, re->temp);
-    if (ir->efep != efepNO)
-    {
-        bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast<real>(ir->fepvals->init_fep_state));
-    }
-    if (re->type == -1) /* nothing was assigned */
-    {
-        gmx_fatal(FARGS,
-                  "The properties of the %d systems are all the same, there is nothing to exchange",
-                  re->nrepl);
-    }
-    if (bLambda && bTemp)
-    {
-        re->type = ereTL;
-    }
-
-    if (bTemp)
-    {
-        please_cite(fplog, "Sugita1999a");
-        if (ir->epc != epcNO)
-        {
-            re->bNPT = TRUE;
-            fprintf(fplog, "Repl  Using Constant Pressure REMD.\n");
-            please_cite(fplog, "Okabe2001a");
-        }
-        if (ir->etc == etcBERENDSEN)
-        {
-            gmx_fatal(FARGS,
-                      "REMD with the %s thermostat does not produce correct potential energy "
-                      "distributions, consider using the %s thermostat instead",
-                      ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
-        }
-    }
-    if (bLambda)
-    {
-        if (ir->fepvals->delta_lambda != 0) /* check this? */
-        {
-            gmx_fatal(FARGS, "delta_lambda is not zero");
-        }
-    }
-    if (re->bNPT)
-    {
-        snew(re->pres, re->nrepl);
-        if (ir->epct == epctSURFACETENSION)
-        {
-            pres = ir->ref_p[ZZ][ZZ];
-        }
-        else
-        {
-            pres = 0;
-            j    = 0;
-            for (i = 0; i < DIM; i++)
-            {
-                if (ir->compress[i][i] != 0)
-                {
-                    pres += ir->ref_p[i][i];
-                    j++;
-                }
-            }
-            pres /= j;
-        }
-        re->pres[re->repl] = pres;
-        gmx_sum_sim(re->nrepl, re->pres, ms);
-    }
-
-    /* Make an index for increasing replica order */
-    /* only makes sense if one or the other is varying, not both!
-       if both are varying, we trust the order the person gave. */
-    snew(re->ind, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->ind[i] = i;
-    }
-
-    if (re->type < ereENDSINGLE)
-    {
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = i + 1; j < re->nrepl; j++)
-            {
-                if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
-                {
-                    /* Unordered replicas are supposed to work, but there
-                     * is still an issues somewhere.
-                     * Note that at this point still re->ind[i]=i.
-                     */
-                    gmx_fatal(FARGS,
-                              "Replicas with indices %d < %d have %ss %g > %g, please order your "
-                              "replicas on increasing %s",
-                              i, j, erename[re->type], re->q[re->type][i], re->q[re->type][j],
-                              erename[re->type]);
-                }
-                else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
-                {
-                    gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]);
-                }
-            }
-        }
-    }
-
-    /* keep track of all the swaps, starting with the initial placement. */
-    snew(re->allswaps, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->allswaps[i] = re->ind[i];
-    }
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            fprintf(fplog, "\nReplica exchange in temperature\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereLAMBDA:
-            fprintf(fplog, "\nReplica exchange in lambda\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %3d", static_cast<int>(re->q[re->type][re->ind[i]]));
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereTL:
-            fprintf(fplog, "\nReplica exchange in temperature and lambda state\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5d", static_cast<int>(re->q[ereLAMBDA][re->ind[i]]));
-            }
-            fprintf(fplog, "\n");
-            break;
-        default: gmx_incons("Unknown replica exchange quantity");
-    }
-    if (re->bNPT)
-    {
-        fprintf(fplog, "\nRepl  p");
-        for (i = 0; i < re->nrepl; i++)
-        {
-            fprintf(fplog, " %5.2f", re->pres[re->ind[i]]);
-        }
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i - 1]]))
-            {
-                fprintf(fplog,
-                        "\nWARNING: The reference pressures decrease with increasing "
-                        "temperatures\n\n");
-                fprintf(stderr,
-                        "\nWARNING: The reference pressures decrease with increasing "
-                        "temperatures\n\n");
-            }
-        }
-    }
-    re->nst = nst;
-    if (replExParams.randomSeed == -1)
-    {
-        if (isMasterSim(ms))
-        {
-            re->seed = static_cast<int>(gmx::makeRandomSeed());
-        }
-        else
-        {
-            re->seed = 0;
-        }
-        gmx_sumi_sim(1, &(re->seed), ms);
-    }
-    else
-    {
-        re->seed = replExParams.randomSeed;
-    }
-    fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst);
-    fprintf(fplog, "\nReplica random seed: %d\n", re->seed);
-
-    re->nattempt[0] = 0;
-    re->nattempt[1] = 0;
-
-    snew(re->prob_sum, re->nrepl);
-    snew(re->nexchange, re->nrepl);
-    snew(re->nmoves, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->nmoves[i], re->nrepl);
-    }
-    fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n");
-
-    /* generate space for the helper functions so we don't have to snew each time */
-
-    snew(re->destinations, re->nrepl);
-    snew(re->incycle, re->nrepl);
-    snew(re->tmpswap, re->nrepl);
-    snew(re->cyclic, re->nrepl);
-    snew(re->order, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->cyclic[i], re->nrepl + 1);
-        snew(re->order[i], re->nrepl);
-    }
-    /* allocate space for the functions storing the data for the replicas */
-    /* not all of these arrays needed in all cases, but they don't take
-       up much space, since the max size is nrepl**2 */
-    snew(re->prob, re->nrepl);
-    snew(re->bEx, re->nrepl);
-    snew(re->beta, re->nrepl);
-    snew(re->Vol, re->nrepl);
-    snew(re->Epot, re->nrepl);
-    snew(re->de, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->de[i], re->nrepl);
-    }
-    re->nex = replExParams.numExchanges;
-    return re;
-}
-
-static void exchange_reals(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, real* v, int n)
-{
-    real* buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n * sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-
-static void exchange_doubles(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, double* v, int n)
-{
-    double* buf;
-    int     i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n * sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_rvecs(const gmx_multisim_t gmx_unused* ms, int gmx_unused b, rvec* v, int n)
-{
-    rvec* buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#if GMX_MPI
-        /*
-           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf[0], n * sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, ms->mpi_comm_masters,
-                     MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(buf[i], v[i]);
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_state(const gmx_multisim_t* ms, int b, t_state* state)
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres * state->nhchainlength;
-    exchange_rvecs(ms, b, state->box, DIM);
-    exchange_rvecs(ms, b, state->box_rel, DIM);
-    exchange_rvecs(ms, b, state->boxv, DIM);
-    exchange_reals(ms, b, &(state->veta), 1);
-    exchange_reals(ms, b, &(state->vol0), 1);
-    exchange_rvecs(ms, b, state->svir_prev, DIM);
-    exchange_rvecs(ms, b, state->fvir_prev, DIM);
-    exchange_rvecs(ms, b, state->pres_prev, DIM);
-    exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc);
-    exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc);
-    exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres);
-    exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres);
-    exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc);
-    exchange_doubles(ms, b, &state->baros_integral, 1);
-    exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms);
-    exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms);
-}
-
-static void copy_state_serial(const t_state* src, t_state* dest)
-{
-    if (dest != src)
-    {
-        /* Currently the local state is always a pointer to the global
-         * in serial, so we should never end up here.
-         * TODO: Implement a (trivial) t_state copy once converted to C++.
-         */
-        GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange");
-    }
-}
-
-static void scale_velocities(gmx::ArrayRef<gmx::RVec> velocities, real fac)
-{
-    for (auto& v : velocities)
-    {
-        v *= fac;
-    }
-}
-
-static void print_transition_matrix(FILE* fplog, int n, int** nmoves, const int* nattempt)
-{
-    int   i, j, ntot;
-    float Tprint;
-
-    ntot = nattempt[0] + nattempt[1];
-    fprintf(fplog, "\n");
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "    "); /* put the title closer to the center */
-    }
-    fprintf(fplog, "Empirical Transition Matrix\n");
-
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%8d", (i + 1));
-    }
-    fprintf(fplog, "\n");
-
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "Repl");
-        for (j = 0; j < n; j++)
-        {
-            Tprint = 0.0;
-            if (nmoves[i][j] > 0)
-            {
-                Tprint = nmoves[i][j] / (2.0 * ntot);
-            }
-            fprintf(fplog, "%8.4f", Tprint);
-        }
-        fprintf(fplog, "%3d\n", i);
-    }
-}
-
-static void print_ind(FILE* fplog, const char* leg, int n, int* ind, const gmx_bool* bEx)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s %2d", leg, ind[0]);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_allswitchind(FILE* fplog, int n, int* pind, int* allswaps, int* tmpswap)
-{
-    int i;
-
-    for (i = 0; i < n; i++)
-    {
-        tmpswap[i] = allswaps[i];
-    }
-    for (i = 0; i < n; i++)
-    {
-        allswaps[i] = tmpswap[pind[i]];
-    }
-
-    fprintf(fplog, "\nAccepted Exchanges:   ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", pind[i]);
-    }
-    fprintf(fplog, "\n");
-
-    /* the "Order After Exchange" is the state label corresponding to the configuration that
-       started in state listed in order, i.e.
-
-       3 0 1 2
-
-       means that the:
-       configuration starting in simulation 3 is now in simulation 0,
-       configuration starting in simulation 0 is now in simulation 1,
-       configuration starting in simulation 1 is now in simulation 2,
-       configuration starting in simulation 2 is now in simulation 3
-     */
-    fprintf(fplog, "Order After Exchange: ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", allswaps[i]);
-    }
-    fprintf(fplog, "\n\n");
-}
-
-static void print_prob(FILE* fplog, const char* leg, int n, real* prob)
-{
-    int  i;
-    char buf[8];
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        if (prob[i] >= 0)
-        {
-            sprintf(buf, "%4.2f", prob[i]);
-            fprintf(fplog, "  %3s", buf[0] == '1' ? "1.0" : buf + 1);
-        }
-        else
-        {
-            fprintf(fplog, "     ");
-        }
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_count(FILE* fplog, const char* leg, int n, int* count)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %4d", count[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static real calc_delta(FILE* fplog, gmx_bool bPrint, struct gmx_repl_ex* re, int a, int b, int ap, int bp)
-{
-
-    real   ediff, dpV, delta = 0;
-    real*  Epot = re->Epot;
-    real*  Vol  = re->Vol;
-    real** de   = re->de;
-    real*  beta = re->beta;
-
-    /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
-       to the non permuted case */
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            /*
-             * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
-             */
-            ediff = Epot[b] - Epot[a];
-            delta = -(beta[bp] - beta[ap]) * ediff;
-            break;
-        case ereLAMBDA:
-            /* two cases:  when we are permuted, and not.  */
-            /* non-permuted:
-               ediff =  E_new - E_old
-                     =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
-                     =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
-                     =  de[b][a] + de[a][b] */
-
-            /* permuted:
-               ediff =  E_new - E_old
-                     =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
-                     =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
-                     =  (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b])    */
-            /* but, in the current code implementation, we flip configurations, not indices . . .
-               So let's examine that.
-                     =  [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)]
-                     =  [H_b(x_ap) - H_a(x_ap)]  + [H_a(x_bp) - H_b(x_pb)]
-                     = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp]
-                     So, if we exchange b<=> bp and a<=> ap, we return to the same result.
-                     So the simple solution is to flip the
-                     position of perturbed and original indices in the tests.
-             */
-
-            ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]);
-            delta = ediff * beta[a]; /* assume all same temperature in this case */
-            break;
-        case ereTL:
-            /* not permuted:  */
-            /* delta =  reduced E_new - reduced E_old
-                     =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
-                     =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
-                        [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
-                        beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
-                     =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
-            /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
-            /* permuted (big breath!) */
-            /*   delta =  reduced E_new - reduced E_old
-                     =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                        - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
-                        - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
-                        [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
-             + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
-                        [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
-             + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
-                     =  ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b]  - beta_bp de[bp][b])
-             + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
-            delta = beta[bp] * (de[bp][a] - de[bp][b]) + beta[ap] * (de[ap][b] - de[ap][a])
-                    - (beta[bp] - beta[ap]) * (Epot[b] - Epot[a]);
-            break;
-        default: gmx_incons("Unknown replica exchange quantity");
-    }
-    if (bPrint)
-    {
-        fprintf(fplog, "Repl %d <-> %d  dE_term = %10.3e (kT)\n", a, b, delta);
-    }
-    if (re->bNPT)
-    {
-        /* revist the calculation for 5.0.  Might be some improvements. */
-        dpV = (beta[ap] * re->pres[ap] - beta[bp] * re->pres[bp]) * (Vol[b] - Vol[a]) / PRESFAC;
-        if (bPrint)
-        {
-            fprintf(fplog, "  dpV = %10.3e  d = %10.3e\n", dpV, delta + dpV);
-        }
-        delta += dpV;
-    }
-    return delta;
-}
-
-static void test_for_replica_exchange(FILE*                 fplog,
-                                      const gmx_multisim_t* ms,
-                                      struct gmx_repl_ex*   re,
-                                      const gmx_enerdata_t* enerd,
-                                      real                  vol,
-                                      int64_t               step,
-                                      real                  time)
-{
-    int                                m, i, j, a, b, ap, bp, i0, i1, tmp;
-    real                               delta = 0;
-    gmx_bool                           bPrint, bMultiEx;
-    gmx_bool*                          bEx      = re->bEx;
-    real*                              prob     = re->prob;
-    int*                               pind     = re->destinations; /* permuted index */
-    gmx_bool                           bEpot    = FALSE;
-    gmx_bool                           bDLambda = FALSE;
-    gmx_bool                           bVol     = FALSE;
-    gmx::ThreeFry2x64<64>              rng(re->seed, gmx::RandomDomain::ReplicaExchange);
-    gmx::UniformRealDistribution<real> uniformRealDist;
-    gmx::UniformIntDistribution<int>   uniformNreplDist(0, re->nrepl - 1);
-
-    bMultiEx = (re->nex > 1); /* multiple exchanges at each state */
-    fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time);
-
-    if (re->bNPT)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Vol[i] = 0;
-        }
-        bVol              = TRUE;
-        re->Vol[re->repl] = vol;
-    }
-    if ((re->type == ereTEMP || re->type == ereTL))
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Epot[i] = 0;
-        }
-        bEpot              = TRUE;
-        re->Epot[re->repl] = enerd->term[F_EPOT];
-        /* temperatures of different states*/
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0 / (re->q[ereTEMP][i] * BOLTZ);
-        }
-    }
-    else
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0 / (re->temp * BOLTZ); /* we have a single temperature */
-        }
-    }
-    if (re->type == ereLAMBDA || re->type == ereTL)
-    {
-        bDLambda = TRUE;
-        /* lambda differences. */
-        /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian
-           minus the energy of the jth simulation in the jth Hamiltonian */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->de[i][j] = 0;
-            }
-        }
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->de[i][re->repl] = (enerd->enerpart_lambda[static_cast<int>(re->q[ereLAMBDA][i]) + 1]
-                                   - enerd->enerpart_lambda[0]);
-        }
-    }
-
-    /* now actually do the communication */
-    if (bVol)
-    {
-        gmx_sum_sim(re->nrepl, re->Vol, ms);
-    }
-    if (bEpot)
-    {
-        gmx_sum_sim(re->nrepl, re->Epot, ms);
-    }
-    if (bDLambda)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            gmx_sum_sim(re->nrepl, re->de[i], ms);
-        }
-    }
-
-    /* make a duplicate set of indices for shuffling */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        pind[i] = re->ind[i];
-    }
-
-    rng.restart(step, 0);
-
-    if (bMultiEx)
-    {
-        /* multiple random switch exchange */
-        int nself = 0;
-
-
-        for (i = 0; i < re->nex + nself; i++)
-        {
-            // For now this is superfluous, but just in case we ever add more
-            // calls in different branches it is safer to always reset the distribution.
-            uniformNreplDist.reset();
-
-            /* randomly select a pair  */
-            /* in theory, could reduce this by identifying only which switches had a nonneglibible
-               probability of occurring (log p > -100) and only operate on those switches */
-            /* find out which state it is from, and what label that state currently has. Likely
-               more work that useful. */
-            i0 = uniformNreplDist(rng);
-            i1 = uniformNreplDist(rng);
-            if (i0 == i1)
-            {
-                nself++;
-                continue; /* self-exchange, back up and do it again */
-            }
-
-            a  = re->ind[i0]; /* what are the indices of these states? */
-            b  = re->ind[i1];
-            ap = pind[i0];
-            bp = pind[i1];
-
-            bPrint = FALSE; /* too noisy */
-            /* calculate the energy difference */
-            /* if the code changes to flip the STATES, rather than the configurations,
-               use the commented version of the code */
-            /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */
-            delta = calc_delta(fplog, bPrint, re, ap, bp, a, b);
-
-            /* we actually only use the first space in the prob and bEx array,
-               since there are actually many switches between pairs. */
-
-            if (delta <= 0)
-            {
-                /* accepted */
-                prob[0] = 1;
-                bEx[0]  = TRUE;
-            }
-            else
-            {
-                if (delta > c_probabilityCutoff)
-                {
-                    prob[0] = 0;
-                }
-                else
-                {
-                    prob[0] = exp(-delta);
-                }
-                // roll a number to determine if accepted. For now it is superfluous to
-                // reset, but just in case we ever add more calls in different branches
-                // it is safer to always reset the distribution.
-                uniformRealDist.reset();
-                bEx[0] = uniformRealDist(rng) < prob[0];
-            }
-            re->prob_sum[0] += prob[0];
-
-            if (bEx[0])
-            {
-                /* swap the states */
-                tmp      = pind[i0];
-                pind[i0] = pind[i1];
-                pind[i1] = tmp;
-            }
-        }
-        re->nattempt[0]++; /* keep track of total permutation trials here */
-        print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap);
-    }
-    else
-    {
-        /* standard nearest neighbor replica exchange */
-
-        m = (step / re->nst) % 2;
-        for (i = 1; i < re->nrepl; i++)
-        {
-            a = re->ind[i - 1];
-            b = re->ind[i];
-
-            bPrint = (re->repl == a || re->repl == b);
-            if (i % 2 == m)
-            {
-                delta = calc_delta(fplog, bPrint, re, a, b, a, b);
-                if (delta <= 0)
-                {
-                    /* accepted */
-                    prob[i] = 1;
-                    bEx[i]  = TRUE;
-                }
-                else
-                {
-                    if (delta > c_probabilityCutoff)
-                    {
-                        prob[i] = 0;
-                    }
-                    else
-                    {
-                        prob[i] = exp(-delta);
-                    }
-                    // roll a number to determine if accepted. For now it is superfluous to
-                    // reset, but just in case we ever add more calls in different branches
-                    // it is safer to always reset the distribution.
-                    uniformRealDist.reset();
-                    bEx[i] = uniformRealDist(rng) < prob[i];
-                }
-                re->prob_sum[i] += prob[i];
-
-                if (bEx[i])
-                {
-                    /* swap these two */
-                    tmp         = pind[i - 1];
-                    pind[i - 1] = pind[i];
-                    pind[i]     = tmp;
-                    re->nexchange[i]++; /* statistics for back compatibility */
-                }
-            }
-            else
-            {
-                prob[i] = -1;
-                bEx[i]  = FALSE;
-            }
-        }
-        /* print some statistics */
-        print_ind(fplog, "ex", re->nrepl, re->ind, bEx);
-        print_prob(fplog, "pr", re->nrepl, prob);
-        fprintf(fplog, "\n");
-        re->nattempt[m]++;
-    }
-
-    /* record which moves were made and accepted */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->nmoves[re->ind[i]][pind[i]] += 1;
-        re->nmoves[pind[i]][re->ind[i]] += 1;
-    }
-    fflush(fplog); /* make sure we can see what the last exchange was */
-}
-
-static void cyclic_decomposition(const int* destinations, int** cyclic, gmx_bool* incycle, const int nrepl, int* nswap)
-{
-
-    int i, j, c, p;
-    int maxlen = 1;
-    for (i = 0; i < nrepl; i++)
-    {
-        incycle[i] = FALSE;
-    }
-    for (i = 0; i < nrepl; i++) /* one cycle for each replica */
-    {
-        if (incycle[i])
-        {
-            cyclic[i][0] = -1;
-            continue;
-        }
-        cyclic[i][0] = i;
-        incycle[i]   = TRUE;
-        c            = 1;
-        p            = i;
-        for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */
-        {
-            p = destinations[p]; /* start permuting */
-            if (p == i)
-            {
-                cyclic[i][c] = -1;
-                if (c > maxlen)
-                {
-                    maxlen = c;
-                }
-                break; /* we've reached the original element, the cycle is complete, and we marked the end. */
-            }
-            else
-            {
-                cyclic[i][c] = p; /* each permutation gives a new member of the cycle */
-                incycle[p]   = TRUE;
-                c++;
-            }
-        }
-    }
-    *nswap = maxlen - 1;
-
-    if (debug)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Cycle %d:", i);
-            for (j = 0; j < nrepl; j++)
-            {
-                if (cyclic[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", cyclic[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void compute_exchange_order(int** cyclic, int** order, const int nrepl, const int maxswap)
-{
-    int i, j;
-
-    for (j = 0; j < maxswap; j++)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            if (cyclic[i][j + 1] >= 0)
-            {
-                order[cyclic[i][j + 1]][j] = cyclic[i][j];
-                order[cyclic[i][j]][j]     = cyclic[i][j + 1];
-            }
-        }
-        for (i = 0; i < nrepl; i++)
-        {
-            if (order[i][j] < 0)
-            {
-                order[i][j] = i; /* if it's not exchanging, it should stay this round*/
-            }
-        }
-    }
-
-    if (debug)
-    {
-        fprintf(debug, "Replica Exchange Order\n");
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Replica %d:", i);
-            for (j = 0; j < maxswap; j++)
-            {
-                if (order[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", order[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void prepare_to_do_exchange(struct gmx_repl_ex* re, const int replica_id, int* maxswap, gmx_bool* bThisReplicaExchanged)
-{
-    int i, j;
-    /* Hold the cyclic decomposition of the (multiple) replica
-     * exchange. */
-    gmx_bool bAnyReplicaExchanged = FALSE;
-    *bThisReplicaExchanged        = FALSE;
-
-    for (i = 0; i < re->nrepl; i++)
-    {
-        if (re->destinations[i] != re->ind[i])
-        {
-            /* only mark as exchanged if the index has been shuffled */
-            bAnyReplicaExchanged = TRUE;
-            break;
-        }
-    }
-    if (bAnyReplicaExchanged)
-    {
-        /* reinitialize the placeholder arrays */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->cyclic[i][j] = -1;
-                re->order[i][j]  = -1;
-            }
-        }
-
-        /* Identify the cyclic decomposition of the permutation (very
-         * fast if neighbor replica exchange). */
-        cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap);
-
-        /* Now translate the decomposition into a replica exchange
-         * order at each step. */
-        compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap);
-
-        /* Did this replica do any exchange at any point? */
-        for (j = 0; j < *maxswap; j++)
-        {
-            if (replica_id != re->order[replica_id][j])
-            {
-                *bThisReplicaExchanged = TRUE;
-                break;
-            }
-        }
-    }
-}
-
-gmx_bool replica_exchange(FILE*                 fplog,
-                          const t_commrec*      cr,
-                          const gmx_multisim_t* ms,
-                          struct gmx_repl_ex*   re,
-                          t_state*              state,
-                          const gmx_enerdata_t* enerd,
-                          t_state*              state_local,
-                          int64_t               step,
-                          real                  time)
-{
-    int j;
-    int replica_id = 0;
-    int exchange_partner;
-    int maxswap = 0;
-    /* Number of rounds of exchanges needed to deal with any multiple
-     * exchanges. */
-    /* Where each replica ends up after the exchange attempt(s). */
-    /* The order in which multiple exchanges will occur. */
-    gmx_bool bThisReplicaExchanged = FALSE;
-
-    if (MASTER(cr))
-    {
-        replica_id = re->repl;
-        test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time);
-        prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged);
-    }
-    /* Do intra-simulation broadcast so all processors belonging to
-     * each simulation know whether they need to participate in
-     * collecting the state. Otherwise, they might as well get on with
-     * the next thing to do. */
-    if (DOMAINDECOMP(cr))
-    {
-#if GMX_MPI
-        MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), cr->mpi_comm_mygroup);
-#endif
-    }
-
-    if (bThisReplicaExchanged)
-    {
-        /* Exchange the states */
-        /* Collect the global state on the master node */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_collect_state(cr->dd, state_local, state);
-        }
-        else
-        {
-            copy_state_serial(state_local, state);
-        }
-
-        if (MASTER(cr))
-        {
-            /* There will be only one swap cycle with standard replica
-             * exchange, but there may be multiple swap cycles if we
-             * allow multiple swaps. */
-
-            for (j = 0; j < maxswap; j++)
-            {
-                exchange_partner = re->order[replica_id][j];
-
-                if (exchange_partner != replica_id)
-                {
-                    /* Exchange the global states between the master nodes */
-                    if (debug)
-                    {
-                        fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner);
-                    }
-                    exchange_state(ms, exchange_partner, state);
-                }
-            }
-            /* For temperature-type replica exchange, we need to scale
-             * the velocities. */
-            if (re->type == ereTEMP || re->type == ereTL)
-            {
-                scale_velocities(state->v, std::sqrt(re->q[ereTEMP][replica_id]
-                                                     / re->q[ereTEMP][re->destinations[replica_id]]));
-            }
-        }
-
-        /* With domain decomposition the global state is distributed later */
-        if (!DOMAINDECOMP(cr))
-        {
-            /* Copy the global state to the local state data structure */
-            copy_state_serial(state, state_local);
-        }
-    }
-
-    return bThisReplicaExchanged;
-}
-
-void print_replica_exchange_statistics(FILE* fplog, struct gmx_repl_ex* re)
-{
-    int i;
-
-    fprintf(fplog, "\nReplica exchange statistics\n");
-
-    if (re->nex == 0)
-    {
-        fprintf(fplog, "Repl  %d attempts, %d odd, %d even\n", re->nattempt[0] + re->nattempt[1],
-                re->nattempt[1], re->nattempt[0]);
-
-        fprintf(fplog, "Repl  average probabilities:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i % 2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] = re->prob_sum[i] / re->nattempt[i % 2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "Repl  number of exchanges:\n");
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_count(fplog, "", re->nrepl, re->nexchange);
-
-        fprintf(fplog, "Repl  average number of exchanges:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i % 2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] = (static_cast<real>(re->nexchange[i])) / re->nattempt[i % 2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, nullptr);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "\n");
-    }
-    /* print the transition matrix */
-    print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt);
-}
-
-//! \endcond
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h
deleted file mode 100644
index 86008eff51..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- *
- * \brief Declares the routines for replica exchange.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#ifndef GMX_MDRUN_REPLICAEXCHANGE_H
-#define GMX_MDRUN_REPLICAEXCHANGE_H
-
-#include <cstdio>
-
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/real.h"
-
-struct gmx_enerdata_t;
-struct gmx_multisim_t;
-struct t_commrec;
-struct t_inputrec;
-class t_state;
-
-/*! \libinternal
- * \brief The parameters for the replica exchange algorithm. */
-struct ReplicaExchangeParameters
-{
-    //! Interval in steps at which to attempt exchanges, 0 means no replica exchange.
-    int exchangeInterval = 0;
-    //! The number of exchanges to attempt at an exchange step.
-    int numExchanges = 0;
-    //! The random seed, -1 means generate a seed.
-    int randomSeed = -1;
-};
-
-//! Abstract type for replica exchange
-typedef struct gmx_repl_ex* gmx_repl_ex_t;
-
-/*! \brief Setup function.
- *
- * Should only be called on the master ranks */
-gmx_repl_ex_t init_replica_exchange(FILE*                            fplog,
-                                    const gmx_multisim_t*            ms,
-                                    int                              numAtomsInSystem,
-                                    const t_inputrec*                ir,
-                                    const ReplicaExchangeParameters& replExParams);
-
-/*! \brief Attempts replica exchange.
- *
- * Should be called on all ranks.  When running each replica in
- * parallel, this routine collects the state on the master rank before
- * exchange.  With domain decomposition, the global state after
- * exchange is stored in state and still needs to be redistributed
- * over the ranks.
- *
- * \returns TRUE if the state has been exchanged.
- */
-gmx_bool replica_exchange(FILE*                 fplog,
-                          const t_commrec*      cr,
-                          const gmx_multisim_t* ms,
-                          gmx_repl_ex_t         re,
-                          t_state*              state,
-                          const gmx_enerdata_t* enerd,
-                          t_state*              state_local,
-                          int64_t               step,
-                          real                  time);
-
-/*! \brief Prints replica exchange statistics to the log file.
- *
- * Should only be called on the master ranks */
-void print_replica_exchange_statistics(FILE* fplog, gmx_repl_ex_t re);
-
-/* PLUMED HREX */
-extern int replica_exchange_get_repl(const gmx_repl_ex_t re);
-extern int replica_exchange_get_nrepl(const gmx_repl_ex_t re);
-extern void pd_collect_state(const t_commrec *cr, t_state *state);
-extern void exchange_state(const gmx_multisim_t *ms, int b, t_state *state);
-extern void copy_state_serial(const t_state *src, t_state *dest);
-/* END PLUMED HREX */
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed
deleted file mode 100644
index f6bead1071..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/replicaexchange.h.preplumed
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \libinternal \file
- *
- * \brief Declares the routines for replica exchange.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#ifndef GMX_MDRUN_REPLICAEXCHANGE_H
-#define GMX_MDRUN_REPLICAEXCHANGE_H
-
-#include <cstdio>
-
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/real.h"
-
-struct gmx_enerdata_t;
-struct gmx_multisim_t;
-struct t_commrec;
-struct t_inputrec;
-class t_state;
-
-/*! \libinternal
- * \brief The parameters for the replica exchange algorithm. */
-struct ReplicaExchangeParameters
-{
-    //! Interval in steps at which to attempt exchanges, 0 means no replica exchange.
-    int exchangeInterval = 0;
-    //! The number of exchanges to attempt at an exchange step.
-    int numExchanges = 0;
-    //! The random seed, -1 means generate a seed.
-    int randomSeed = -1;
-};
-
-//! Abstract type for replica exchange
-typedef struct gmx_repl_ex* gmx_repl_ex_t;
-
-/*! \brief Setup function.
- *
- * Should only be called on the master ranks */
-gmx_repl_ex_t init_replica_exchange(FILE*                            fplog,
-                                    const gmx_multisim_t*            ms,
-                                    int                              numAtomsInSystem,
-                                    const t_inputrec*                ir,
-                                    const ReplicaExchangeParameters& replExParams);
-
-/*! \brief Attempts replica exchange.
- *
- * Should be called on all ranks.  When running each replica in
- * parallel, this routine collects the state on the master rank before
- * exchange.  With domain decomposition, the global state after
- * exchange is stored in state and still needs to be redistributed
- * over the ranks.
- *
- * \returns TRUE if the state has been exchanged.
- */
-gmx_bool replica_exchange(FILE*                 fplog,
-                          const t_commrec*      cr,
-                          const gmx_multisim_t* ms,
-                          gmx_repl_ex_t         re,
-                          t_state*              state,
-                          const gmx_enerdata_t* enerd,
-                          t_state*              state_local,
-                          int64_t               step,
-                          real                  time);
-
-/*! \brief Prints replica exchange statistics to the log file.
- *
- * Should only be called on the master ranks */
-void print_replica_exchange_statistics(FILE* fplog, gmx_repl_ex_t re);
-
-#endif
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp
deleted file mode 100644
index ed007d9525..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp
+++ /dev/null
@@ -1,2133 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief Implements the MD runner routine calling all integrators.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "runner.h"
-
-#include "config.h"
-
-#include <cassert>
-#include <cinttypes>
-#include <csignal>
-#include <cstdlib>
-#include <cstring>
-
-#include <algorithm>
-#include <memory>
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/builder.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/gpuhaloexchange.h"
-#include "gromacs/domdec/localatomsetmanager.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/ewald_utils.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_gpu_program.h"
-#include "gromacs/ewald/pme_pp_comm_gpu.h"
-#include "gromacs/fileio/checkpoint.h"
-#include "gromacs/fileio/gmxfio.h"
-#include "gromacs/fileio/oenv.h"
-#include "gromacs/fileio/tpxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
-#include "gromacs/hardware/cpuinfo.h"
-#include "gromacs/hardware/detecthardware.h"
-#include "gromacs/hardware/printhardware.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/listed_forces/disre.h"
-#include "gromacs/listed_forces/gpubonded.h"
-#include "gromacs/listed_forces/orires.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/boxdeformation.h"
-#include "gromacs/mdlib/broadcaststructs.h"
-#include "gromacs/mdlib/calc_verletbuf.h"
-#include "gromacs/mdlib/dispersioncorrection.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/makeconstraints.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/qmmm.h"
-#include "gromacs/mdlib/sighandler.h"
-#include "gromacs/mdlib/stophandler.h"
-#include "gromacs/mdlib/updategroups.h"
-#include "gromacs/mdrun/mdmodules.h"
-#include "gromacs/mdrun/simulationcontext.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/logging.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdrunutility/threadaffinity.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/fcdata.h"
-#include "gromacs/mdtypes/group.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/observableshistory.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/mdtypes/state_propagator_data_gpu.h"
-#include "gromacs/nbnxm/gpu_data_mgmt.h"
-#include "gromacs/nbnxm/nbnxm.h"
-#include "gromacs/nbnxm/pairlist_tuning.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/output.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/restraint/manager.h"
-#include "gromacs/restraint/restraintmdmodule.h"
-#include "gromacs/restraint/restraintpotential.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/taskassignment/decidegpuusage.h"
-#include "gromacs/taskassignment/decidesimulationworkload.h"
-#include "gromacs/taskassignment/resourcedivision.h"
-#include "gromacs/taskassignment/taskassignment.h"
-#include "gromacs/taskassignment/usergpuids.h"
-#include "gromacs/timing/gpu_timing.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/wallcyclereporting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/trajectory/trajectoryframe.h"
-#include "gromacs/utility/basenetwork.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/filestream.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/keyvaluetree.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/loggerbuilder.h"
-#include "gromacs/utility/mdmodulenotification.h"
-#include "gromacs/utility/physicalnodecommunicator.h"
-#include "gromacs/utility/pleasecite.h"
-#include "gromacs/utility/programcontext.h"
-#include "gromacs/utility/smalloc.h"
-#include "gromacs/utility/stringutil.h"
-
-#include "isimulator.h"
-#include "replicaexchange.h"
-#include "simulatorbuilder.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain; 
-/* END PLUMED */
-
-/* PLUMED HREX */
-extern int plumed_hrex;
-/* END PLUMED HREX */
-
-namespace gmx
-{
-
-
-/*! \brief Manage any development feature flag variables encountered
- *
- * The use of dev features indicated by environment variables is
- * logged in order to ensure that runs with such features enabled can
- * be identified from their log and standard output. Any cross
- * dependencies are also checked, and if unsatisfied, a fatal error
- * issued.
- *
- * Note that some development features overrides are applied already here:
- * the GPU communication flags are set to false in non-tMPI and non-CUDA builds.
- *
- * \param[in]  mdlog                Logger object.
- * \param[in]  useGpuForNonbonded   True if the nonbonded task is offloaded in this run.
- * \param[in]  pmeRunMode           The PME run mode for this run
- * \returns                         The object populated with development feature flags.
- */
-static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog,
-                                                         const bool           useGpuForNonbonded,
-                                                         const PmeRunMode     pmeRunMode)
-{
-    DevelopmentFeatureFlags devFlags;
-
-    // Some builds of GCC 5 give false positive warnings that these
-    // getenv results are ignored when clearly they are used.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-result"
-    devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
-                                  && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
-    devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
-    devFlags.enableGpuHaloExchange =
-            (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
-    devFlags.enableGpuPmePPComm =
-            (getenv("GMX_GPU_PME_PP_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
-#pragma GCC diagnostic pop
-
-    if (devFlags.enableGpuBufferOps)
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This run uses the 'GPU buffer ops' feature, enabled by the "
-                        "GMX_USE_GPU_BUFFER_OPS environment variable.");
-    }
-
-    if (devFlags.forceGpuUpdateDefault)
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This run will default to '-update gpu' as requested by the "
-                        "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain "
-                        "decomposition lacks substantial testing and should be used with caution.");
-    }
-
-    if (devFlags.enableGpuHaloExchange)
-    {
-        if (useGpuForNonbonded)
-        {
-            if (!devFlags.enableGpuBufferOps)
-            {
-                GMX_LOG(mdlog.warning)
-                        .asParagraph()
-                        .appendTextFormatted(
-                                "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS "
-                                "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
-                devFlags.enableGpuBufferOps = true;
-            }
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "This run uses the 'GPU halo exchange' feature, enabled by the "
-                            "GMX_GPU_DD_COMMS environment variable.");
-        }
-        else
-        {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
-                            "halo exchange' feature will not be enabled as nonbonded interactions "
-                            "are not offloaded.");
-            devFlags.enableGpuHaloExchange = false;
-        }
-    }
-
-    if (devFlags.enableGpuPmePPComm)
-    {
-        if (pmeRunMode == PmeRunMode::GPU)
-        {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "This run uses the 'GPU PME-PP communications' feature, enabled "
-                            "by the GMX_GPU_PME_PP_COMMS environment variable.");
-        }
-        else
-        {
-            std::string clarification;
-            if (pmeRunMode == PmeRunMode::Mixed)
-            {
-                clarification =
-                        "PME FFT and gather are not offloaded to the GPU (PME is running in mixed "
-                        "mode).";
-            }
-            else
-            {
-                clarification = "PME is not offloaded to the GPU.";
-            }
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendText(
-                            "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
-                            "'GPU PME-PP communications' feature was not enabled as "
-                            + clarification);
-            devFlags.enableGpuPmePPComm = false;
-        }
-    }
-
-    return devFlags;
-}
-
-/*! \brief Barrier for safe simultaneous thread access to mdrunner data
- *
- * Used to ensure that the master thread does not modify mdrunner during copy
- * on the spawned threads. */
-static void threadMpiMdrunnerAccessBarrier()
-{
-#if GMX_THREAD_MPI
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
-}
-
-Mdrunner Mdrunner::cloneOnSpawnedThread() const
-{
-    auto newRunner = Mdrunner(std::make_unique<MDModules>());
-
-    // All runners in the same process share a restraint manager resource because it is
-    // part of the interface to the client code, which is associated only with the
-    // original thread. Handles to the same resources can be obtained by copy.
-    {
-        newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
-    }
-
-    // Copy members of master runner.
-    // \todo Replace with builder when Simulation context and/or runner phases are better defined.
-    // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
-    newRunner.hw_opt    = hw_opt;
-    newRunner.filenames = filenames;
-
-    newRunner.oenv            = oenv;
-    newRunner.mdrunOptions    = mdrunOptions;
-    newRunner.domdecOptions   = domdecOptions;
-    newRunner.nbpu_opt        = nbpu_opt;
-    newRunner.pme_opt         = pme_opt;
-    newRunner.pme_fft_opt     = pme_fft_opt;
-    newRunner.bonded_opt      = bonded_opt;
-    newRunner.update_opt      = update_opt;
-    newRunner.nstlist_cmdline = nstlist_cmdline;
-    newRunner.replExParams    = replExParams;
-    newRunner.pforce          = pforce;
-    // Give the spawned thread the newly created valid communicator
-    // for the simulation.
-    newRunner.communicator        = MPI_COMM_WORLD;
-    newRunner.ms                  = ms;
-    newRunner.startingBehavior    = startingBehavior;
-    newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
-
-    threadMpiMdrunnerAccessBarrier();
-
-    return newRunner;
-}
-
-/*! \brief The callback used for running on spawned threads.
- *
- * Obtains the pointer to the master mdrunner object from the one
- * argument permitted to the thread-launch API call, copies it to make
- * a new runner for this thread, reinitializes necessary data, and
- * proceeds to the simulation. */
-static void mdrunner_start_fn(const void* arg)
-{
-    try
-    {
-        auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
-        /* copy the arg list to make sure that it's thread-local. This
-           doesn't copy pointed-to items, of course; fnm, cr and fplog
-           are reset in the call below, all others should be const. */
-        gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
-        mdrunner.mdrunner();
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-}
-
-
-void Mdrunner::spawnThreads(int numThreadsToLaunch)
-{
-#if GMX_THREAD_MPI
-    /* now spawn new threads that start mdrunner_start_fn(), while
-       the main thread returns. Thread affinity is handled later. */
-    if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn,
-                     static_cast<const void*>(this))
-        != TMPI_SUCCESS)
-    {
-        GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
-    }
-
-    // Give the master thread the newly created valid communicator for
-    // the simulation.
-    communicator = MPI_COMM_WORLD;
-    threadMpiMdrunnerAccessBarrier();
-#else
-    GMX_UNUSED_VALUE(numThreadsToLaunch);
-    GMX_UNUSED_VALUE(mdrunner_start_fn);
-#endif
-}
-
-} // namespace gmx
-
-/*! \brief Initialize variables for Verlet scheme simulation */
-static void prepare_verlet_scheme(FILE*               fplog,
-                                  t_commrec*          cr,
-                                  t_inputrec*         ir,
-                                  int                 nstlist_cmdline,
-                                  const gmx_mtop_t*   mtop,
-                                  const matrix        box,
-                                  bool                makeGpuPairList,
-                                  const gmx::CpuInfo& cpuinfo)
-{
-    /* For NVE simulations, we will retain the initial list buffer */
-    if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
-    {
-        /* Update the Verlet buffer size for the current run setup */
-
-        /* Here we assume SIMD-enabled kernels are being used. But as currently
-         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
-         * and 4x2 gives a larger buffer than 4x4, this is ok.
-         */
-        ListSetupType listType =
-                (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
-        VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
-
-        const real rlist_new =
-                calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
-
-        if (rlist_new != ir->rlist)
-        {
-            if (fplog != nullptr)
-            {
-                fprintf(fplog,
-                        "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
-                        ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j);
-            }
-            ir->rlist = rlist_new;
-        }
-    }
-
-    if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
-    {
-        gmx_fatal(FARGS, "Can not set nstlist without %s",
-                  !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
-    }
-
-    if (EI_DYNAMICS(ir->eI))
-    {
-        /* Set or try nstlist values */
-        increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
-    }
-}
-
-/*! \brief Override the nslist value in inputrec
- *
- * with value passed on the command line (if any)
- */
-static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir)
-{
-    assert(ir);
-
-    /* override with anything else than the default -2 */
-    if (nsteps_cmdline > -2)
-    {
-        char sbuf_steps[STEPSTRSIZE];
-        char sbuf_msg[STRLEN];
-
-        ir->nsteps = nsteps_cmdline;
-        if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
-        {
-            sprintf(sbuf_msg,
-                    "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t));
-        }
-        else
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps));
-        }
-
-        GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
-    }
-    else if (nsteps_cmdline < -2)
-    {
-        gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline);
-    }
-    /* Do nothing if nsteps_cmdline == -2 */
-}
-
-namespace gmx
-{
-
-/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
- *
- * If not, and if a warning may be issued, logs a warning about
- * falling back to CPU code. With thread-MPI, only the first
- * call to this function should have \c issueWarning true. */
-static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning)
-{
-    bool        gpuIsUseful = true;
-    std::string warning;
-
-    if (ir.opts.ngener - ir.nwall > 1)
-    {
-        /* The GPU code does not support more than one energy group.
-         * If the user requested GPUs explicitly, a fatal error is given later.
-         */
-        gpuIsUseful = false;
-        warning =
-                "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
-                "For better performance, run on the GPU without energy groups and then do "
-                "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
-    }
-
-    if (EI_TPI(ir.eI))
-    {
-        gpuIsUseful = false;
-        warning     = "TPI is not implemented for GPUs.";
-    }
-
-    if (!gpuIsUseful && issueWarning)
-    {
-        GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
-    }
-
-    return gpuIsUseful;
-}
-
-//! Initializes the logger for mdrun.
-static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank)
-{
-    gmx::LoggerBuilder builder;
-    if (fplog != nullptr)
-    {
-        builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
-    }
-    if (isSimulationMasterRank)
-    {
-        builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
-    }
-    return builder.build();
-}
-
-//! Make a TaskTarget from an mdrun argument string.
-static TaskTarget findTaskTarget(const char* optionString)
-{
-    TaskTarget returnValue = TaskTarget::Auto;
-
-    if (strncmp(optionString, "auto", 3) == 0)
-    {
-        returnValue = TaskTarget::Auto;
-    }
-    else if (strncmp(optionString, "cpu", 3) == 0)
-    {
-        returnValue = TaskTarget::Cpu;
-    }
-    else if (strncmp(optionString, "gpu", 3) == 0)
-    {
-        returnValue = TaskTarget::Gpu;
-    }
-    else
-    {
-        GMX_ASSERT(false, "Option string should have been checked for sanity already");
-    }
-
-    return returnValue;
-}
-
-//! Finish run, aggregate data to print performance info.
-static void finish_run(FILE*                     fplog,
-                       const gmx::MDLogger&      mdlog,
-                       const t_commrec*          cr,
-                       const t_inputrec*         inputrec,
-                       t_nrnb                    nrnb[],
-                       gmx_wallcycle_t           wcycle,
-                       gmx_walltime_accounting_t walltime_accounting,
-                       nonbonded_verlet_t*       nbv,
-                       const gmx_pme_t*          pme,
-                       gmx_bool                  bWriteStat)
-{
-    double delta_t = 0;
-    double nbfs = 0, mflop = 0;
-    double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads,
-            elapsed_time_over_all_threads_over_all_ranks;
-    /* Control whether it is valid to print a report. Only the
-       simulation master may print, but it should not do so if the run
-       terminated e.g. before a scheduled reset step. This is
-       complicated by the fact that PME ranks are unaware of the
-       reason why they were sent a pmerecvqxFINISH. To avoid
-       communication deadlocks, we always do the communication for the
-       report, even if we've decided not to write the report, because
-       how long it takes to finish the run is not important when we've
-       decided not to report on the simulation performance.
-
-       Further, we only report performance for dynamical integrators,
-       because those are the only ones for which we plan to
-       consider doing any optimizations. */
-    bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
-
-    if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendText("Simulation ended prematurely, no performance report will be written.");
-        printReport = false;
-    }
-
-    t_nrnb*                 nrnb_tot;
-    std::unique_ptr<t_nrnb> nrnbTotalStorage;
-    if (cr->nnodes > 1)
-    {
-        nrnbTotalStorage = std::make_unique<t_nrnb>();
-        nrnb_tot         = nrnbTotalStorage.get();
-#if GMX_MPI
-        MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
-#endif
-    }
-    else
-    {
-        nrnb_tot = nrnb;
-    }
-
-    elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting);
-    elapsed_time_over_all_threads =
-            walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
-    if (cr->nnodes > 1)
-    {
-#if GMX_MPI
-        /* reduce elapsed_time over all MPI ranks in the current simulation */
-        MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM,
-                      cr->mpi_comm_mysim);
-        elapsed_time_over_all_ranks /= cr->nnodes;
-        /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
-         * current simulation. */
-        MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks,
-                      1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
-#endif
-    }
-    else
-    {
-        elapsed_time_over_all_ranks                  = elapsed_time;
-        elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
-    }
-
-    if (printReport)
-    {
-        print_flop(fplog, nrnb_tot, &nbfs, &mflop);
-    }
-
-    if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
-    {
-        print_dd_statistics(cr, inputrec, fplog);
-    }
-
-    /* TODO Move the responsibility for any scaling by thread counts
-     * to the code that handled the thread region, so that there's a
-     * mechanism to keep cycle counting working during the transition
-     * to task parallelism. */
-    int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
-    int nthreads_pme = gmx_omp_nthreads_get(emntPME);
-    wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP),
-                                   nthreads_pp, nthreads_pme);
-    auto cycle_sum(wallcycle_sum(cr, wcycle));
-
-    if (printReport)
-    {
-        auto nbnxn_gpu_timings =
-                (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
-        gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
-
-        if (pme_gpu_task_enabled(pme))
-        {
-            pme_gpu_get_timings(pme, &pme_gpu_timings);
-        }
-        wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
-                        elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings,
-                        &pme_gpu_timings);
-
-        if (EI_DYNAMICS(inputrec->eI))
-        {
-            delta_t = inputrec->delta_t;
-        }
-
-        if (fplog)
-        {
-            print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
-                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
-                       delta_t, nbfs, mflop);
-        }
-        if (bWriteStat)
-        {
-            print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
-                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
-                       delta_t, nbfs, mflop);
-        }
-    }
-}
-
-int Mdrunner::mdrunner()
-{
-    matrix                    box;
-    t_forcerec*               fr               = nullptr;
-    t_fcdata*                 fcd              = nullptr;
-    real                      ewaldcoeff_q     = 0;
-    real                      ewaldcoeff_lj    = 0;
-    int                       nChargePerturbed = -1, nTypePerturbed = 0;
-    gmx_wallcycle_t           wcycle;
-    gmx_walltime_accounting_t walltime_accounting = nullptr;
-    gmx_membed_t*             membed              = nullptr;
-    gmx_hw_info_t*            hwinfo              = nullptr;
-
-    /* CAUTION: threads may be started later on in this function, so
-       cr doesn't reflect the final parallel state right now */
-    gmx_mtop_t mtop;
-
-    /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
-    const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
-    const bool doMembed            = opt2bSet("-membed", filenames.size(), filenames.data());
-    const bool doRerun             = mdrunOptions.rerun;
-
-    // Handle task-assignment related user options.
-    EmulateGpuNonbonded emulateGpuNonbonded =
-            (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-
-    std::vector<int> userGpuTaskAssignment;
-    try
-    {
-        userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-    auto nonbondedTarget = findTaskTarget(nbpu_opt);
-    auto pmeTarget       = findTaskTarget(pme_opt);
-    auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
-    auto bondedTarget    = findTaskTarget(bonded_opt);
-    auto updateTarget    = findTaskTarget(update_opt);
-
-    FILE* fplog = nullptr;
-    // If we are appending, we don't write log output because we need
-    // to check that the old log file matches what the checkpoint file
-    // expects. Otherwise, we should start to write log output now if
-    // there is a file ready for it.
-    if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
-    {
-        fplog = gmx_fio_getfp(logFileHandle);
-    }
-    const bool       isSimulationMasterRank = findIsSimulationMasterRank(ms, communicator);
-    gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
-    gmx::MDLogger    mdlog(logOwner.logger());
-
-    // TODO The thread-MPI master rank makes a working
-    // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
-    // after the threads have been launched. This works because no use
-    // is made of that communicator until after the execution paths
-    // have rejoined. But it is likely that we can improve the way
-    // this is expressed, e.g. by expressly running detection only the
-    // master rank for thread-MPI, rather than relying on the mutex
-    // and reference count.
-    PhysicalNodeCommunicator physicalNodeComm(communicator, gmx_physicalnode_id_hash());
-    hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
-
-    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
-
-    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
-
-    // Print citation requests after all software/hardware printing
-    pleaseCiteGromacs(fplog);
-
-    // TODO Replace this by unique_ptr once t_inputrec is C++
-    t_inputrec               inputrecInstance;
-    t_inputrec*              inputrec = nullptr;
-    std::unique_ptr<t_state> globalState;
-
-    auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
-
-    if (isSimulationMasterRank)
-    {
-        /* Only the master rank has the global state */
-        globalState = std::make_unique<t_state>();
-
-        /* Read (nearly) all data required for the simulation
-         * and keep the partly serialized tpr contents to send to other ranks later
-         */
-        *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
-                                                 &inputrecInstance, globalState.get(), &mtop);
-        inputrec                = &inputrecInstance;
-    }
-
-    /* Check and update the hardware options for internal consistency */
-    checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
-                                  inputrec);
-
-    if (GMX_THREAD_MPI && isSimulationMasterRank)
-    {
-        bool useGpuForNonbonded = false;
-        bool useGpuForPme       = false;
-        try
-        {
-            GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
-
-            // If the user specified the number of ranks, then we must
-            // respect that, but in default mode, we need to allow for
-            // the number of GPUs to choose the number of ranks.
-            auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
-            useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
-                    nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
-                    canUseGpuForNonbonded,
-                    gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
-                    hw_opt.nthreads_tmpi);
-            useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                    *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
-        }
-        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-        /* Determine how many thread-MPI ranks to start.
-         *
-         * TODO Over-writing the user-supplied value here does
-         * prevent any possible subsequent checks from working
-         * correctly. */
-        hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded,
-                                                useGpuForPme, inputrec, &mtop, mdlog, doMembed);
-
-        // Now start the threads for thread MPI.
-        spawnThreads(hw_opt.nthreads_tmpi);
-        // The spawned threads enter mdrunner() and execution of
-        // master and spawned threads joins at the end of this block.
-        physicalNodeComm = PhysicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
-    }
-
-    GMX_RELEASE_ASSERT(communicator == MPI_COMM_WORLD, "Must have valid world communicator");
-    CommrecHandle crHandle = init_commrec(communicator, ms);
-    t_commrec*    cr       = crHandle.get();
-    GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
-
-    if (PAR(cr))
-    {
-        /* now broadcast everything to the non-master nodes/threads: */
-        if (!isSimulationMasterRank)
-        {
-            inputrec = &inputrecInstance;
-        }
-        init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
-    }
-    GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
-    partialDeserializedTpr.reset(nullptr);
-
-    // Now the number of ranks is known to all ranks, and each knows
-    // the inputrec read by the master rank. The ranks can now all run
-    // the task-deciding functions and will agree on the result
-    // without needing to communicate.
-    //
-    // TODO Should we do the communication in debug mode to support
-    // having an assertion?
-    const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
-
-    // Note that these variables describe only their own node.
-    //
-    // Note that when bonded interactions run on a GPU they always run
-    // alongside a nonbonded task, so do not influence task assignment
-    // even though they affect the force calculation workload.
-    bool useGpuForNonbonded = false;
-    bool useGpuForPme       = false;
-    bool useGpuForBonded    = false;
-    bool useGpuForUpdate    = false;
-    bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
-    try
-    {
-        // It's possible that there are different numbers of GPUs on
-        // different nodes, which is the user's responsibility to
-        // handle. If unsuitable, we will notice that during task
-        // assignment.
-        auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
-        useGpuForNonbonded         = decideWhetherToUseGpusForNonbonded(
-                nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
-                gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
-        useGpuForPme = decideWhetherToUseGpusForPme(
-                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec, mtop,
-                cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected);
-        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
-                                  && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
-        useGpuForBonded = decideWhetherToUseGpusForBonded(
-                useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
-                EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
-                domdecOptions.numPmeRanks, gpusWereDetected);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-    const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
-
-    // Initialize development feature flags that enabled by environment variable
-    // and report those features that are enabled.
-    const DevelopmentFeatureFlags devFlags =
-            manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
-
-    const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
-            false, inputrec, doRerun, mtop, ms, replExParams, nullptr, doEssentialDynamics, doMembed) && (plumedswitch==0);
-    const bool useModularSimulator = inputIsCompatibleWithModularSimulator
-                                     && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
-
-    // Build restraints.
-    // TODO: hide restraint implementation details from Mdrunner.
-    // There is nothing unique about restraints at this point as far as the
-    // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
-    // factory functions from the SimulationContext on which to call mdModules_->add().
-    // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
-    for (auto&& restraint : restraintManager_->getRestraints())
-    {
-        auto module = RestraintMDModule::create(restraint, restraint->sites());
-        mdModules_->add(std::move(module));
-    }
-
-    // TODO: Error handling
-    mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
-    const auto& mdModulesNotifier = mdModules_->notifier().notifier_;
-
-    if (inputrec->internalParameters != nullptr)
-    {
-        mdModulesNotifier.notify(*inputrec->internalParameters);
-    }
-
-    if (fplog != nullptr)
-    {
-        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
-        fprintf(fplog, "\n");
-    }
-
-    if (SIMMASTER(cr))
-    {
-        /* In rerun, set velocities to zero if present */
-        if (doRerun && ((globalState->flags & (1 << estV)) != 0))
-        {
-            // rerun does not use velocities
-            GMX_LOG(mdlog.info)
-                    .asParagraph()
-                    .appendText(
-                            "Rerun trajectory contains velocities. Rerun does only evaluate "
-                            "potential energy and forces. The velocities will be ignored.");
-            for (int i = 0; i < globalState->natoms; i++)
-            {
-                clear_rvec(globalState->v[i]);
-            }
-            globalState->flags &= ~(1 << estV);
-        }
-
-        /* now make sure the state is initialized and propagated */
-        set_state_entries(globalState.get(), inputrec, useModularSimulator);
-    }
-
-    /* NM and TPI parallelize over force/energy calculations, not atoms,
-     * so we need to initialize and broadcast the global state.
-     */
-    if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
-    {
-        if (!MASTER(cr))
-        {
-            globalState = std::make_unique<t_state>();
-        }
-        broadcastStateWithoutDynamics(cr, globalState.get());
-    }
-
-    /* A parallel command line option consistency check that we can
-       only do after any threads have started. */
-    if (!PAR(cr)
-        && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1
-            || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0))
-    {
-        gmx_fatal(FARGS,
-                  "The -dd or -npme option request a parallel simulation, "
-#if !GMX_MPI
-                  "but %s was compiled without threads or MPI enabled",
-                  output_env_get_program_display_name(oenv));
-#elif GMX_THREAD_MPI
-                  "but the number of MPI-threads (option -ntmpi) is not set or is 1");
-#else
-                  "but %s was not started through mpirun/mpiexec or only one rank was requested "
-                  "through mpirun/mpiexec",
-                  output_env_get_program_display_name(oenv));
-#endif
-    }
-
-    if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
-    {
-        gmx_fatal(FARGS,
-                  "The .mdp file specified an energy mininization or normal mode algorithm, and "
-                  "these are not compatible with mdrun -rerun");
-    }
-
-    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
-    {
-        if (domdecOptions.numPmeRanks > 0)
-        {
-            gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
-                                 "PME-only ranks are requested, but the system does not use PME "
-                                 "for electrostatics or LJ");
-        }
-
-        domdecOptions.numPmeRanks = 0;
-    }
-
-    if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
-    {
-        /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
-         * improve performance with many threads per GPU, since our OpenMP
-         * scaling is bad, but it's difficult to automate the setup.
-         */
-        domdecOptions.numPmeRanks = 0;
-    }
-    if (useGpuForPme)
-    {
-        if (domdecOptions.numPmeRanks < 0)
-        {
-            domdecOptions.numPmeRanks = 0;
-            // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
-        }
-        else
-        {
-            GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1,
-                               "PME GPU decomposition is not supported");
-        }
-    }
-
-    /* NMR restraints must be initialized before load_checkpoint,
-     * since with time averaging the history is added to t_state.
-     * For proper consistency check we therefore need to extend
-     * t_state here.
-     * So the PME-only nodes (if present) will also initialize
-     * the distance restraints.
-     */
-    snew(fcd, 1);
-
-    /* This needs to be called before read_checkpoint to extend the state */
-    init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
-
-    init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
-
-    auto deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
-
-#if GMX_FAHCORE
-    /* We have to remember the generation's first step before reading checkpoint.
-       This way, we can report to the F@H core both the generation's first step
-       and the restored first step, thus making it able to distinguish between
-       an interruption/resume and start of the n-th generation simulation.
-       Having this information, the F@H core can correctly calculate and report
-       the progress.
-     */
-    int gen_first_step = 0;
-    if (MASTER(cr))
-    {
-        gen_first_step = inputrec->init_step;
-    }
-#endif
-
-    ObservablesHistory observablesHistory = {};
-
-    if (startingBehavior != StartingBehavior::NewSimulation)
-    {
-        /* Check if checkpoint file exists before doing continuation.
-         * This way we can use identical input options for the first and subsequent runs...
-         */
-        if (mdrunOptions.numStepsCommandline > -2)
-        {
-            /* Temporarily set the number of steps to unlimited to avoid
-             * triggering the nsteps check in load_checkpoint().
-             * This hack will go away soon when the -nsteps option is removed.
-             */
-            inputrec->nsteps = -1;
-        }
-
-        load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
-                        logFileHandle, cr, domdecOptions.numCells, inputrec, globalState.get(),
-                        &observablesHistory, mdrunOptions.reproducible, mdModules_->notifier());
-
-        if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
-        {
-            // Now we can start normal logging to the truncated log file.
-            fplog = gmx_fio_getfp(logFileHandle);
-            prepareLogAppending(fplog);
-            logOwner = buildLogger(fplog, MASTER(cr));
-            mdlog    = logOwner.logger();
-        }
-    }
-
-#if GMX_FAHCORE
-    if (MASTER(cr))
-    {
-        fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step);
-    }
-#endif
-
-    if (mdrunOptions.numStepsCommandline > -2)
-    {
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -nsteps functionality is deprecated, and may be removed in a future "
-                        "version. "
-                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp "
-                        "file field.");
-    }
-    /* override nsteps with value set on the commandline */
-    override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
-
-    if (SIMMASTER(cr))
-    {
-        copy_mat(globalState->box, box);
-    }
-
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(box), box, cr);
-    }
-
-    if (inputrec->cutoff_scheme != ecutsVERLET)
-    {
-        gmx_fatal(FARGS,
-                  "This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
-                  "Verlet scheme, or use an earlier version of GROMACS if necessary.");
-    }
-    /* Update rlist and nstlist. */
-    prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
-                          useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
-                          *hwinfo->cpuInfo);
-
-    const bool prefer1DAnd1PulseDD = (devFlags.enableGpuHaloExchange && useGpuForNonbonded);
-    // This builder is necessary while we have multi-part construction
-    // of DD. Before DD is constructed, we use the existence of
-    // the builder object to indicate that further construction of DD
-    // is needed.
-    std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
-    if (useDomainDecomposition)
-    {
-        ddBuilder = std::make_unique<DomainDecompositionBuilder>(
-                mdlog, cr, domdecOptions, mdrunOptions, prefer1DAnd1PulseDD, mtop, *inputrec, box,
-                positionsFromStatePointer(globalState.get()));
-    }
-    else
-    {
-        /* PME, if used, is done on all nodes with 1D decomposition */
-        cr->npmenodes = 0;
-        cr->duty      = (DUTY_PP | DUTY_PME);
-
-        if (inputrec->ePBC == epbcSCREW)
-        {
-            gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
-        }
-    }
-
-    // Produce the task assignment for this rank.
-    GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
-    GpuTaskAssignments        gpuTaskAssignments = gpuTaskAssignmentsBuilder.build(
-            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, communicator, physicalNodeComm,
-            nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
-            useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
-            // TODO cr->duty & DUTY_PME should imply that a PME
-            // algorithm is active, but currently does not.
-            EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
-
-    // Get the device handles for the modules, nullptr when no task is assigned.
-    gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
-    gmx_device_info_t* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
-
-    // TODO Initialize GPU streams here.
-
-    // TODO Currently this is always built, yet DD partition code
-    // checks if it is built before using it. Probably it should
-    // become an MDModule that is made only when another module
-    // requires it (e.g. pull, CompEl, density fitting), so that we
-    // don't update the local atom sets unilaterally every step.
-    LocalAtomSetManager atomSets;
-    if (ddBuilder)
-    {
-        // TODO Pass the GPU streams to ddBuilder to use in buffer
-        // transfers (e.g. halo exchange)
-        cr->dd = ddBuilder->build(&atomSets);
-        // The builder's job is done, so destruct it
-        ddBuilder.reset(nullptr);
-        // Note that local state still does not exist yet.
-    }
-
-    // The GPU update is decided here because we need to know whether the constraints or
-    // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
-    // defined). This is only known after DD is initialized, hence decision on using GPU
-    // update is done so late.
-    try
-    {
-        const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
-
-        useGpuForUpdate = decideWhetherToUseGpuForUpdate(
-                useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0,
-                useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
-                doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
-                replExParams.exchangeInterval > 0, doRerun, devFlags, mdlog);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-    const bool printHostName = (cr->nnodes > 1);
-    gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
-
-    // If the user chose a task assignment, give them some hints
-    // where appropriate.
-    if (!userGpuTaskAssignment.empty())
-    {
-        gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
-    }
-
-    if (PAR(cr))
-    {
-        /* After possible communicator splitting in make_dd_communicators.
-         * we can set up the intra/inter node communication.
-         */
-        gmx_setup_nodecomm(fplog, cr);
-    }
-
-#if GMX_MPI
-    if (isMultiSim(ms))
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This is simulation %d out of %d running as a composite GROMACS\n"
-                        "multi-simulation job. Setup for this simulation:\n",
-                        ms->sim, ms->nsim);
-    }
-    GMX_LOG(mdlog.warning)
-            .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
-#    if GMX_THREAD_MPI
-                                 cr->nnodes == 1 ? "thread" : "threads"
-#    else
-                                 cr->nnodes == 1 ? "process" : "processes"
-#    endif
-            );
-    fflush(stderr);
-#endif
-
-    // If mdrun -pin auto honors any affinity setting that already
-    // exists. If so, it is nice to provide feedback about whether
-    // that existing affinity setting was from OpenMP or something
-    // else, so we run this code both before and after we initialize
-    // the OpenMP support.
-    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-    /* Check and update the number of OpenMP threads requested */
-    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
-                                            pmeRunMode, mtop, *inputrec);
-
-    gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
-                          hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
-
-    // Enable FP exception detection, but not in
-    // Release mode and not for compilers with known buggy FP
-    // exception support (clang with any optimization) or suspected
-    // buggy FP exception support (gcc 7.* with optimization).
-#if !defined NDEBUG                                                                         \
-        && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
-             && defined __OPTIMIZE__)
-    const bool bEnableFPE = true;
-#else
-    const bool bEnableFPE = false;
-#endif
-    // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
-    if (bEnableFPE)
-    {
-        gmx_feenableexcept();
-    }
-
-    /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
-                                       mdrunOptions.ntompOptionIsSet, cr, mdlog);
-
-    /* getting number of PP/PME threads on this MPI / tMPI rank.
-       PME: env variable should be read only on one node to make sure it is
-       identical everywhere;
-     */
-    const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
-                                                                  : gmx_omp_nthreads_get(emntPME);
-    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
-                                  physicalNodeComm, mdlog);
-
-    // Enable Peer access between GPUs where available
-    // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
-    // any of the GPU communication features are active.
-    if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
-        && (devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm))
-    {
-        setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
-    }
-
-    if (hw_opt.threadAffinity != ThreadAffinity::Off)
-    {
-        /* Before setting affinity, check whether the affinity has changed
-         * - which indicates that probably the OpenMP library has changed it
-         * since we first checked).
-         */
-        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
-
-        int numThreadsOnThisNode, intraNodeThreadOffset;
-        analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
-                                 &intraNodeThreadOffset);
-
-        /* Set the CPU affinity */
-        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
-                                numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
-    }
-
-    if (mdrunOptions.timingOptions.resetStep > -1)
-    {
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -resetstep functionality is deprecated, and may be removed in a "
-                        "future version.");
-    }
-    wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
-
-    if (PAR(cr))
-    {
-        /* Master synchronizes its value of reset_counters with all nodes
-         * including PME only nodes */
-        int64_t reset_counters = wcycle_get_reset_counters(wcycle);
-        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
-        wcycle_set_reset_counters(wcycle, reset_counters);
-    }
-
-    // Membrane embedding must be initialized before we call init_forcerec()
-    if (doMembed)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "Initializing membed");
-        }
-        /* Note that membed cannot work in parallel because mtop is
-         * changed here. Fix this if we ever want to make it run with
-         * multiple ranks. */
-        membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec,
-                             globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
-    }
-
-    const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
-    std::unique_ptr<MDAtoms>     mdAtoms;
-    std::unique_ptr<gmx_vsite_t> vsite;
-
-    t_nrnb nrnb;
-    if (thisRankHasDuty(cr, DUTY_PP))
-    {
-        mdModulesNotifier.notify(*cr);
-        mdModulesNotifier.notify(&atomSets);
-        mdModulesNotifier.notify(PeriodicBoundaryConditionType{ inputrec->ePBC });
-        mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
-        /* Initiate forcerecord */
-        fr                 = new t_forcerec;
-        fr->forceProviders = mdModules_->initForceProviders();
-        init_forcerec(fplog, mdlog, fr, fcd, inputrec, &mtop, cr, box,
-                      opt2fn("-table", filenames.size(), filenames.data()),
-                      opt2fn("-tablep", filenames.size(), filenames.data()),
-                      opt2fns("-tableb", filenames.size(), filenames.data()), *hwinfo,
-                      nonbondedDeviceInfo, useGpuForBonded,
-                      pmeRunMode == PmeRunMode::GPU && !thisRankHasDuty(cr, DUTY_PME), pforce, wcycle);
-
-        // TODO Move this to happen during domain decomposition setup,
-        // once stream and event handling works well with that.
-        // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
-        if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
-        {
-            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps,
-                               "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
-            void* streamLocal =
-                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
-            void* streamNonLocal =
-                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the "
-                            "GMX_GPU_DD_COMMS environment variable.");
-            cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(
-                    cr->dd, cr->mpi_comm_mysim, streamLocal, streamNonLocal);
-        }
-
-        /* Initialize the mdAtoms structure.
-         * mdAtoms is not filled with atom data,
-         * as this can not be done now with domain decomposition.
-         */
-        mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
-        if (globalState && thisRankHasPmeGpuTask)
-        {
-            // The pinning of coordinates in the global state object works, because we only use
-            // PME on GPU without DD or on a separate PME rank, and because the local state pointer
-            // points to the global state object without DD.
-            // FIXME: MD and EM separately set up the local state - this should happen in the same
-            // function, which should also perform the pinning.
-            changePinningPolicy(&globalState->x, pme_get_pinning_policy());
-        }
-
-        /* Initialize the virtual site communication */
-        vsite = initVsite(mtop, cr);
-
-        calc_shifts(box, fr->shift_vec);
-
-        /* With periodic molecules the charge groups should be whole at start up
-         * and the virtual sites should not be far from their proper positions.
-         */
-        if (!inputrec->bContinuation && MASTER(cr) && !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
-        {
-            /* Make molecules whole at start of run */
-            if (fr->ePBC != epbcNONE)
-            {
-                do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
-            }
-            if (vsite)
-            {
-                /* Correct initial vsite positions are required
-                 * for the initial distribution in the domain decomposition
-                 * and for the initial shell prediction.
-                 */
-                constructVsitesGlobal(mtop, globalState->x);
-            }
-        }
-
-        if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-        {
-            ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
-            ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
-        }
-    }
-    else
-    {
-        /* This is a PME only node */
-
-        GMX_ASSERT(globalState == nullptr,
-                   "We don't need the state on a PME only rank and expect it to be unitialized");
-
-        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
-        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
-    }
-
-    gmx_pme_t* sepPmeData = nullptr;
-    // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
-    GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr),
-               "Double-checking that only PME-only ranks have no forcerec");
-    gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData;
-
-    // TODO should live in ewald module once its testing is improved
-    //
-    // Later, this program could contain kernels that might be later
-    // re-used as auto-tuning progresses, or subsequent simulations
-    // are invoked.
-    PmeGpuProgramStorage pmeGpuProgram;
-    if (thisRankHasPmeGpuTask)
-    {
-        pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
-    }
-
-    /* Initiate PME if necessary,
-     * either on all nodes or on dedicated PME nodes only. */
-    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
-    {
-        if (mdAtoms && mdAtoms->mdatoms())
-        {
-            nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
-            if (EVDW_PME(inputrec->vdwtype))
-            {
-                nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed;
-            }
-        }
-        if (cr->npmenodes > 0)
-        {
-            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
-            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
-            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
-        }
-
-        if (thisRankHasDuty(cr, DUTY_PME))
-        {
-            try
-            {
-                pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec, nChargePerturbed != 0,
-                                       nTypePerturbed != 0, mdrunOptions.reproducible, ewaldcoeff_q,
-                                       ewaldcoeff_lj, gmx_omp_nthreads_get(emntPME), pmeRunMode,
-                                       nullptr, pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
-            }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-        }
-    }
-
-
-    if (EI_DYNAMICS(inputrec->eI))
-    {
-        /* Turn on signal handling on all nodes */
-        /*
-         * (A user signal from the PME nodes (if any)
-         * is communicated to the PP nodes.
-         */
-        signal_handler_install();
-    }
-
-    pull_t* pull_work = nullptr;
-    if (thisRankHasDuty(cr, DUTY_PP))
-    {
-        /* Assumes uniform use of the number of OpenMP threads */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
-
-        if (inputrec->bPull)
-        {
-            /* Initialize pull code */
-            pull_work = init_pull(fplog, inputrec->pull, inputrec, &mtop, cr, &atomSets,
-                                  inputrec->fepvals->init_lambda);
-            if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
-            {
-                initPullHistory(pull_work, &observablesHistory);
-            }
-            if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
-            {
-                init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior);
-            }
-        }
-
-        std::unique_ptr<EnforcedRotation> enforcedRotation;
-        if (inputrec->bRot)
-        {
-            /* Initialize enforced rotation code */
-            enforcedRotation =
-                    init_rot(fplog, inputrec, filenames.size(), filenames.data(), cr, &atomSets,
-                             globalState.get(), &mtop, oenv, mdrunOptions, startingBehavior);
-        }
-
-        t_swap* swap = nullptr;
-        if (inputrec->eSwapCoords != eswapNO)
-        {
-            /* Initialize ion swapping code */
-            swap = init_swapcoords(fplog, inputrec,
-                                   opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
-                                   &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
-                                   oenv, mdrunOptions, startingBehavior);
-        }
-
-        /* Let makeConstraints know whether we have essential dynamics constraints. */
-        auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog,
-                                      *mdAtoms->mdatoms(), cr, ms, &nrnb, wcycle, fr->bMolPBC);
-
-        /* Energy terms and groups */
-        gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
-                             inputrec->fepvals->n_lambda);
-
-        // cos acceleration is only supported by md, but older tpr
-        // files might still combine it with other integrators
-        GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == eiMD,
-                           "cos_acceleration is only supported by integrator=md");
-
-        /* Kinetic energy data */
-        gmx_ekindata_t ekind;
-        init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
-
-        /* Set up interactive MD (IMD) */
-        auto imdSession =
-                makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
-                               MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
-                               filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
-
-        if (DOMAINDECOMP(cr))
-        {
-            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
-            /* This call is not included in init_domain_decomposition mainly
-             * because fr->cginfo_mb is set later.
-             */
-            dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
-                            domdecOptions.checkBondedInteractions, fr->cginfo_mb);
-        }
-
-        // TODO This is not the right place to manage the lifetime of
-        // this data structure, but currently it's the easiest way to
-        // make it work.
-        MdrunScheduleWorkload runScheduleWork;
-        // Also populates the simulation constant workload description.
-        runScheduleWork.simulationWork = createSimulationWorkload(
-                useGpuForNonbonded, pmeRunMode, useGpuForBonded, useGpuForUpdate,
-                devFlags.enableGpuBufferOps, devFlags.enableGpuHaloExchange,
-                devFlags.enableGpuPmePPComm, haveEwaldSurfaceContribution(*inputrec));
-
-        std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
-        if (gpusWereDetected
-            && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME))
-                || runScheduleWork.simulationWork.useGpuBufferOps))
-        {
-            const void* pmeStream = pme_gpu_get_device_stream(fr->pmedata);
-            const void* localStream =
-                    fr->nbv->gpu_nbv != nullptr
-                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local)
-                            : nullptr;
-            const void* nonLocalStream =
-                    fr->nbv->gpu_nbv != nullptr
-                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
-                            : nullptr;
-            const void*        deviceContext = pme_gpu_get_device_context(fr->pmedata);
-            const int          paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
-            GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
-                                                      ? GpuApiCallBehavior::Async
-                                                      : GpuApiCallBehavior::Sync;
-
-            stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                    pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
-            fr->stateGpu = stateGpu.get();
-        }
-
-        GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
-        SimulatorBuilder simulatorBuilder;
-
-        /* PLUMED */
-        if(plumedswitch){
-          if(useModularSimulator) gmx_fatal(FARGS, "PLUMED is not yet compatible with GROMACS new modular simulator");
-          /* detect plumed API version */
-          int pversion=0;
-          plumed_cmd(plumedmain,"getApiVersion",&pversion);
-          if(pversion>5) {
-             int nth = gmx_omp_nthreads_get(emntDefault);
-             if(pversion>5) plumed_cmd(plumedmain,"setNumOMPthreads",&nth);
-          }
-        }
-        /* END PLUMED */
-
-        // build and run simulator object based on user-input
-        auto simulator = simulatorBuilder.build(
-                inputIsCompatibleWithModularSimulator, fplog, cr, ms, mdlog,
-                static_cast<int>(filenames.size()), filenames.data(), oenv, mdrunOptions,
-                startingBehavior, vsite.get(), constr.get(),
-                enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, deform.get(),
-                mdModules_->outputProvider(), mdModules_->notifier(), inputrec, imdSession.get(),
-                pull_work, swap, &mtop, fcd, globalState.get(), &observablesHistory, mdAtoms.get(),
-                &nrnb, wcycle, fr, &enerd, &ekind, &runScheduleWork, replExParams, membed,
-                walltime_accounting, std::move(stopHandlerBuilder_), doRerun);
-        simulator->run();
-
-        if (fr->pmePpCommGpu)
-        {
-            // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
-            fr->pmePpCommGpu.reset();
-        }
-
-        if (inputrec->bPull)
-        {
-            finish_pull(pull_work);
-        }
-        finish_swapcoords(swap);
-    }
-    else
-    {
-        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
-        /* do PME only */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-
-    /* Finish up, write some stuff
-     * if rerunMD, don't write last frame again
-     */
-    finish_run(fplog, mdlog, cr, inputrec, &nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
-
-    // clean up cycle counter
-    wallcycle_destroy(wcycle);
-
-    // Free PME data
-    if (pmedata)
-    {
-        gmx_pme_destroy(pmedata);
-        pmedata = nullptr;
-    }
-
-    // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
-    // before we destroy the GPU context(s) in free_gpu_resources().
-    // Pinned buffers are associated with contexts in CUDA.
-    // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
-    mdAtoms.reset(nullptr);
-    globalState.reset(nullptr);
-    mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
-
-    /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
-    free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
-    free_gpu(nonbondedDeviceInfo);
-    free_gpu(pmeDeviceInfo);
-    done_forcerec(fr, mtop.molblock.size());
-    sfree(fcd);
-
-    if (doMembed)
-    {
-        free_membed(membed);
-    }
-
-    /* Does what it says */
-    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
-    walltime_accounting_destroy(walltime_accounting);
-
-    /* PLUMED */
-    if(plumedswitch){
-      plumed_finalize(plumedmain);
-    }
-    /* END PLUMED */
-
-    // Ensure log file content is written
-    if (logFileHandle)
-    {
-        gmx_fio_flush(logFileHandle);
-    }
-
-    /* Reset FPEs (important for unit tests) by disabling them. Assumes no
-     * exceptions were enabled before function was called. */
-    if (bEnableFPE)
-    {
-        gmx_fedisableexcept();
-    }
-
-    auto rc = static_cast<int>(gmx_get_stop_condition());
-
-#if GMX_THREAD_MPI
-    /* we need to join all threads. The sub-threads join when they
-       exit this function, but the master thread needs to be told to
-       wait for that. */
-    if (PAR(cr) && MASTER(cr))
-    {
-        tMPI_Finalize();
-    }
-#endif
-    return rc;
-}
-
-Mdrunner::~Mdrunner()
-{
-    // Clean up of the Manager.
-    // This will end up getting called on every thread-MPI rank, which is unnecessary,
-    // but okay as long as threads synchronize some time before adding or accessing
-    // a new set of restraints.
-    if (restraintManager_)
-    {
-        restraintManager_->clear();
-        GMX_ASSERT(restraintManager_->countRestraints() == 0,
-                   "restraints added during runner life time should be cleared at runner "
-                   "destruction.");
-    }
-};
-
-void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, const std::string& name)
-{
-    GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
-    // Not sure if this should be logged through the md logger or something else,
-    // but it is helpful to have some sort of INFO level message sent somewhere.
-    //    std::cout << "Registering restraint named " << name << std::endl;
-
-    // When multiple restraints are used, it may be wasteful to register them separately.
-    // Maybe instead register an entire Restraint Manager as a force provider.
-    restraintManager_->addToSpec(std::move(puller), name);
-}
-
-Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules) : mdModules_(std::move(mdModules)) {}
-
-Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
-
-//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
-Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
-
-class Mdrunner::BuilderImplementation
-{
-public:
-    BuilderImplementation() = delete;
-    BuilderImplementation(std::unique_ptr<MDModules> mdModules, compat::not_null<SimulationContext*> context);
-    ~BuilderImplementation();
-
-    BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options,
-                                                real                forceWarningThreshold,
-                                                StartingBehavior    startingBehavior);
-
-    void addDomdec(const DomdecOptions& options);
-
-    void addVerletList(int nstlist);
-
-    void addReplicaExchange(const ReplicaExchangeParameters& params);
-
-    void addNonBonded(const char* nbpu_opt);
-
-    void addPME(const char* pme_opt_, const char* pme_fft_opt_);
-
-    void addBondedTaskAssignment(const char* bonded_opt);
-
-    void addUpdateTaskAssignment(const char* update_opt);
-
-    void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions);
-
-    void addFilenames(ArrayRef<const t_filenm> filenames);
-
-    void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
-
-    void addLogFile(t_fileio* logFileHandle);
-
-    void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
-
-    Mdrunner build();
-
-private:
-    // Default parameters copied from runner.h
-    // \todo Clarify source(s) of default parameters.
-
-    const char* nbpu_opt_    = nullptr;
-    const char* pme_opt_     = nullptr;
-    const char* pme_fft_opt_ = nullptr;
-    const char* bonded_opt_  = nullptr;
-    const char* update_opt_  = nullptr;
-
-    MdrunOptions mdrunOptions_;
-
-    DomdecOptions domdecOptions_;
-
-    ReplicaExchangeParameters replicaExchangeParameters_;
-
-    //! Command-line override for the duration of a neighbor list with the Verlet scheme.
-    int nstlist_ = 0;
-
-    //! Multisim communicator handle.
-    gmx_multisim_t* multiSimulation_;
-
-    //! mdrun communicator
-    MPI_Comm communicator_ = MPI_COMM_NULL;
-
-    //! Print a warning if any force is larger than this (in kJ/mol nm).
-    real forceWarningThreshold_ = -1;
-
-    //! Whether the simulation will start afresh, or restart with/without appending.
-    StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
-
-    //! The modules that comprise the functionality of mdrun.
-    std::unique_ptr<MDModules> mdModules_;
-
-    //! \brief Parallelism information.
-    gmx_hw_opt_t hardwareOptions_;
-
-    //! filename options for simulation.
-    ArrayRef<const t_filenm> filenames_;
-
-    /*! \brief Handle to output environment.
-     *
-     * \todo gmx_output_env_t needs lifetime management.
-     */
-    gmx_output_env_t* outputEnvironment_ = nullptr;
-
-    /*! \brief Non-owning handle to MD log file.
-     *
-     * \todo Context should own output facilities for client.
-     * \todo Improve log file handle management.
-     * \internal
-     * Code managing the FILE* relies on the ability to set it to
-     * nullptr to check whether the filehandle is valid.
-     */
-    t_fileio* logFileHandle_ = nullptr;
-
-    /*!
-     * \brief Builder for simulation stop signal handler.
-     */
-    std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
-};
-
-Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
-                                                       compat::not_null<SimulationContext*> context) :
-    mdModules_(std::move(mdModules))
-{
-    communicator_    = context->communicator_;
-    multiSimulation_ = context->multiSimulation_.get();
-}
-
-Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
-
-Mdrunner::BuilderImplementation&
-Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions&    options,
-                                                      const real             forceWarningThreshold,
-                                                      const StartingBehavior startingBehavior)
-{
-    mdrunOptions_          = options;
-    forceWarningThreshold_ = forceWarningThreshold;
-    startingBehavior_      = startingBehavior;
-    return *this;
-}
-
-void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options)
-{
-    domdecOptions_ = options;
-}
-
-void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
-{
-    nstlist_ = nstlist;
-}
-
-void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params)
-{
-    replicaExchangeParameters_ = params;
-}
-
-Mdrunner Mdrunner::BuilderImplementation::build()
-{
-    auto newRunner = Mdrunner(std::move(mdModules_));
-
-    newRunner.mdrunOptions     = mdrunOptions_;
-    newRunner.pforce           = forceWarningThreshold_;
-    newRunner.startingBehavior = startingBehavior_;
-    newRunner.domdecOptions    = domdecOptions_;
-
-    // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
-    newRunner.hw_opt = hardwareOptions_;
-
-    // No invariant to check. This parameter exists to optionally override other behavior.
-    newRunner.nstlist_cmdline = nstlist_;
-
-    newRunner.replExParams = replicaExchangeParameters_;
-
-    newRunner.filenames = filenames_;
-
-    newRunner.communicator = communicator_;
-
-    // nullptr is a valid value for the multisim handle
-    newRunner.ms = multiSimulation_;
-
-    // \todo Clarify ownership and lifetime management for gmx_output_env_t
-    // \todo Update sanity checking when output environment has clearly specified invariants.
-    // Initialization and default values for oenv are not well specified in the current version.
-    if (outputEnvironment_)
-    {
-        newRunner.oenv = outputEnvironment_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addOutputEnvironment() is required before build()"));
-    }
-
-    newRunner.logFileHandle = logFileHandle_;
-
-    if (nbpu_opt_)
-    {
-        newRunner.nbpu_opt = nbpu_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
-    }
-
-    if (pme_opt_ && pme_fft_opt_)
-    {
-        newRunner.pme_opt     = pme_opt_;
-        newRunner.pme_fft_opt = pme_fft_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
-    }
-
-    if (bonded_opt_)
-    {
-        newRunner.bonded_opt = bonded_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
-    }
-
-    if (update_opt_)
-    {
-        newRunner.update_opt = update_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
-    }
-
-
-    newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
-
-    if (stopHandlerBuilder_)
-    {
-        newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
-    }
-    else
-    {
-        newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
-    }
-
-    return newRunner;
-}
-
-void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
-{
-    nbpu_opt_ = nbpu_opt;
-}
-
-void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt)
-{
-    pme_opt_     = pme_opt;
-    pme_fft_opt_ = pme_fft_opt;
-}
-
-void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
-{
-    bonded_opt_ = bonded_opt;
-}
-
-void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
-{
-    update_opt_ = update_opt;
-}
-
-void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
-{
-    hardwareOptions_ = hardwareOptions;
-}
-
-void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
-{
-    filenames_ = filenames;
-}
-
-void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
-{
-    outputEnvironment_ = outputEnvironment;
-}
-
-void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle)
-{
-    logFileHandle_ = logFileHandle;
-}
-
-void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
-{
-    stopHandlerBuilder_ = std::move(builder);
-}
-
-MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
-                                 compat::not_null<SimulationContext*> context) :
-    impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
-{
-}
-
-MdrunnerBuilder::~MdrunnerBuilder() = default;
-
-MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
-                                                      real                   forceWarningThreshold,
-                                                      const StartingBehavior startingBehavior)
-{
-    impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options)
-{
-    impl_->addDomdec(options);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist)
-{
-    impl_->addVerletList(nstlist);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params)
-{
-    impl_->addReplicaExchange(params);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
-{
-    impl_->addNonBonded(nbpu_opt);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt)
-{
-    // The builder method may become more general in the future, but in this version,
-    // parameters for PME electrostatics are both required and the only parameters
-    // available.
-    if (pme_opt && pme_fft_opt)
-    {
-        impl_->addPME(pme_opt, pme_fft_opt);
-    }
-    else
-    {
-        GMX_THROW(
-                gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
-    }
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
-{
-    impl_->addBondedTaskAssignment(bonded_opt);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
-{
-    impl_->addUpdateTaskAssignment(update_opt);
-    return *this;
-}
-
-Mdrunner MdrunnerBuilder::build()
-{
-    return impl_->build();
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
-{
-    impl_->addHardwareOptions(hardwareOptions);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
-{
-    impl_->addFilenames(filenames);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
-{
-    impl_->addOutputEnvironment(outputEnvironment);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle)
-{
-    impl_->addLogFile(logFileHandle);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
-{
-    impl_->addStopHandlerBuilder(std::move(builder));
-    return *this;
-}
-
-MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
-
-MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
-
-} // namespace gmx
diff --git a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp.preplumed b/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp.preplumed
deleted file mode 100644
index c2b3c088d7..0000000000
--- a/patches/gromacs-2020.7.diff/src/gromacs/mdrun/runner.cpp.preplumed
+++ /dev/null
@@ -1,2104 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \internal \file
- *
- * \brief Implements the MD runner routine calling all integrators.
- *
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "runner.h"
-
-#include "config.h"
-
-#include <cassert>
-#include <cinttypes>
-#include <csignal>
-#include <cstdlib>
-#include <cstring>
-
-#include <algorithm>
-#include <memory>
-
-#include "gromacs/commandline/filenm.h"
-#include "gromacs/domdec/builder.h"
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_struct.h"
-#include "gromacs/domdec/gpuhaloexchange.h"
-#include "gromacs/domdec/localatomsetmanager.h"
-#include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/ewald_utils.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_gpu_program.h"
-#include "gromacs/ewald/pme_pp_comm_gpu.h"
-#include "gromacs/fileio/checkpoint.h"
-#include "gromacs/fileio/gmxfio.h"
-#include "gromacs/fileio/oenv.h"
-#include "gromacs/fileio/tpxio.h"
-#include "gromacs/gmxlib/network.h"
-#include "gromacs/gmxlib/nrnb.h"
-#include "gromacs/gpu_utils/gpu_utils.h"
-#include "gromacs/hardware/cpuinfo.h"
-#include "gromacs/hardware/detecthardware.h"
-#include "gromacs/hardware/printhardware.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/listed_forces/disre.h"
-#include "gromacs/listed_forces/gpubonded.h"
-#include "gromacs/listed_forces/orires.h"
-#include "gromacs/math/functions.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/boxdeformation.h"
-#include "gromacs/mdlib/broadcaststructs.h"
-#include "gromacs/mdlib/calc_verletbuf.h"
-#include "gromacs/mdlib/dispersioncorrection.h"
-#include "gromacs/mdlib/enerdata_utils.h"
-#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/makeconstraints.h"
-#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/mdatoms.h"
-#include "gromacs/mdlib/membed.h"
-#include "gromacs/mdlib/qmmm.h"
-#include "gromacs/mdlib/sighandler.h"
-#include "gromacs/mdlib/stophandler.h"
-#include "gromacs/mdlib/updategroups.h"
-#include "gromacs/mdrun/mdmodules.h"
-#include "gromacs/mdrun/simulationcontext.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/mdrunutility/logging.h"
-#include "gromacs/mdrunutility/multisim.h"
-#include "gromacs/mdrunutility/printtime.h"
-#include "gromacs/mdrunutility/threadaffinity.h"
-#include "gromacs/mdtypes/commrec.h"
-#include "gromacs/mdtypes/enerdata.h"
-#include "gromacs/mdtypes/fcdata.h"
-#include "gromacs/mdtypes/group.h"
-#include "gromacs/mdtypes/inputrec.h"
-#include "gromacs/mdtypes/md_enums.h"
-#include "gromacs/mdtypes/mdrunoptions.h"
-#include "gromacs/mdtypes/observableshistory.h"
-#include "gromacs/mdtypes/simulation_workload.h"
-#include "gromacs/mdtypes/state.h"
-#include "gromacs/mdtypes/state_propagator_data_gpu.h"
-#include "gromacs/nbnxm/gpu_data_mgmt.h"
-#include "gromacs/nbnxm/nbnxm.h"
-#include "gromacs/nbnxm/pairlist_tuning.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/output.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/restraint/manager.h"
-#include "gromacs/restraint/restraintmdmodule.h"
-#include "gromacs/restraint/restraintpotential.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/taskassignment/decidegpuusage.h"
-#include "gromacs/taskassignment/decidesimulationworkload.h"
-#include "gromacs/taskassignment/resourcedivision.h"
-#include "gromacs/taskassignment/taskassignment.h"
-#include "gromacs/taskassignment/usergpuids.h"
-#include "gromacs/timing/gpu_timing.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/wallcyclereporting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/trajectory/trajectoryframe.h"
-#include "gromacs/utility/basenetwork.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/filestream.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/keyvaluetree.h"
-#include "gromacs/utility/logger.h"
-#include "gromacs/utility/loggerbuilder.h"
-#include "gromacs/utility/mdmodulenotification.h"
-#include "gromacs/utility/physicalnodecommunicator.h"
-#include "gromacs/utility/pleasecite.h"
-#include "gromacs/utility/programcontext.h"
-#include "gromacs/utility/smalloc.h"
-#include "gromacs/utility/stringutil.h"
-
-#include "isimulator.h"
-#include "replicaexchange.h"
-#include "simulatorbuilder.h"
-
-namespace gmx
-{
-
-
-/*! \brief Manage any development feature flag variables encountered
- *
- * The use of dev features indicated by environment variables is
- * logged in order to ensure that runs with such features enabled can
- * be identified from their log and standard output. Any cross
- * dependencies are also checked, and if unsatisfied, a fatal error
- * issued.
- *
- * Note that some development features overrides are applied already here:
- * the GPU communication flags are set to false in non-tMPI and non-CUDA builds.
- *
- * \param[in]  mdlog                Logger object.
- * \param[in]  useGpuForNonbonded   True if the nonbonded task is offloaded in this run.
- * \param[in]  pmeRunMode           The PME run mode for this run
- * \returns                         The object populated with development feature flags.
- */
-static DevelopmentFeatureFlags manageDevelopmentFeatures(const gmx::MDLogger& mdlog,
-                                                         const bool           useGpuForNonbonded,
-                                                         const PmeRunMode     pmeRunMode)
-{
-    DevelopmentFeatureFlags devFlags;
-
-    // Some builds of GCC 5 give false positive warnings that these
-    // getenv results are ignored when clearly they are used.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-result"
-    devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
-                                  && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
-    devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
-    devFlags.enableGpuHaloExchange =
-            (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
-    devFlags.enableGpuPmePPComm =
-            (getenv("GMX_GPU_PME_PP_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
-#pragma GCC diagnostic pop
-
-    if (devFlags.enableGpuBufferOps)
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This run uses the 'GPU buffer ops' feature, enabled by the "
-                        "GMX_USE_GPU_BUFFER_OPS environment variable.");
-    }
-
-    if (devFlags.forceGpuUpdateDefault)
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This run will default to '-update gpu' as requested by the "
-                        "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable. GPU update with domain "
-                        "decomposition lacks substantial testing and should be used with caution.");
-    }
-
-    if (devFlags.enableGpuHaloExchange)
-    {
-        if (useGpuForNonbonded)
-        {
-            if (!devFlags.enableGpuBufferOps)
-            {
-                GMX_LOG(mdlog.warning)
-                        .asParagraph()
-                        .appendTextFormatted(
-                                "Enabling GPU buffer operations required by GMX_GPU_DD_COMMS "
-                                "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
-                devFlags.enableGpuBufferOps = true;
-            }
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "This run uses the 'GPU halo exchange' feature, enabled by the "
-                            "GMX_GPU_DD_COMMS environment variable.");
-        }
-        else
-        {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
-                            "halo exchange' feature will not be enabled as nonbonded interactions "
-                            "are not offloaded.");
-            devFlags.enableGpuHaloExchange = false;
-        }
-    }
-
-    if (devFlags.enableGpuPmePPComm)
-    {
-        if (pmeRunMode == PmeRunMode::GPU)
-        {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "This run uses the 'GPU PME-PP communications' feature, enabled "
-                            "by the GMX_GPU_PME_PP_COMMS environment variable.");
-        }
-        else
-        {
-            std::string clarification;
-            if (pmeRunMode == PmeRunMode::Mixed)
-            {
-                clarification =
-                        "PME FFT and gather are not offloaded to the GPU (PME is running in mixed "
-                        "mode).";
-            }
-            else
-            {
-                clarification = "PME is not offloaded to the GPU.";
-            }
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendText(
-                            "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
-                            "'GPU PME-PP communications' feature was not enabled as "
-                            + clarification);
-            devFlags.enableGpuPmePPComm = false;
-        }
-    }
-
-    return devFlags;
-}
-
-/*! \brief Barrier for safe simultaneous thread access to mdrunner data
- *
- * Used to ensure that the master thread does not modify mdrunner during copy
- * on the spawned threads. */
-static void threadMpiMdrunnerAccessBarrier()
-{
-#if GMX_THREAD_MPI
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
-}
-
-Mdrunner Mdrunner::cloneOnSpawnedThread() const
-{
-    auto newRunner = Mdrunner(std::make_unique<MDModules>());
-
-    // All runners in the same process share a restraint manager resource because it is
-    // part of the interface to the client code, which is associated only with the
-    // original thread. Handles to the same resources can be obtained by copy.
-    {
-        newRunner.restraintManager_ = std::make_unique<RestraintManager>(*restraintManager_);
-    }
-
-    // Copy members of master runner.
-    // \todo Replace with builder when Simulation context and/or runner phases are better defined.
-    // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
-    newRunner.hw_opt    = hw_opt;
-    newRunner.filenames = filenames;
-
-    newRunner.oenv            = oenv;
-    newRunner.mdrunOptions    = mdrunOptions;
-    newRunner.domdecOptions   = domdecOptions;
-    newRunner.nbpu_opt        = nbpu_opt;
-    newRunner.pme_opt         = pme_opt;
-    newRunner.pme_fft_opt     = pme_fft_opt;
-    newRunner.bonded_opt      = bonded_opt;
-    newRunner.update_opt      = update_opt;
-    newRunner.nstlist_cmdline = nstlist_cmdline;
-    newRunner.replExParams    = replExParams;
-    newRunner.pforce          = pforce;
-    // Give the spawned thread the newly created valid communicator
-    // for the simulation.
-    newRunner.communicator        = MPI_COMM_WORLD;
-    newRunner.ms                  = ms;
-    newRunner.startingBehavior    = startingBehavior;
-    newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
-
-    threadMpiMdrunnerAccessBarrier();
-
-    return newRunner;
-}
-
-/*! \brief The callback used for running on spawned threads.
- *
- * Obtains the pointer to the master mdrunner object from the one
- * argument permitted to the thread-launch API call, copies it to make
- * a new runner for this thread, reinitializes necessary data, and
- * proceeds to the simulation. */
-static void mdrunner_start_fn(const void* arg)
-{
-    try
-    {
-        auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
-        /* copy the arg list to make sure that it's thread-local. This
-           doesn't copy pointed-to items, of course; fnm, cr and fplog
-           are reset in the call below, all others should be const. */
-        gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread();
-        mdrunner.mdrunner();
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-}
-
-
-void Mdrunner::spawnThreads(int numThreadsToLaunch)
-{
-#if GMX_THREAD_MPI
-    /* now spawn new threads that start mdrunner_start_fn(), while
-       the main thread returns. Thread affinity is handled later. */
-    if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn,
-                     static_cast<const void*>(this))
-        != TMPI_SUCCESS)
-    {
-        GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
-    }
-
-    // Give the master thread the newly created valid communicator for
-    // the simulation.
-    communicator = MPI_COMM_WORLD;
-    threadMpiMdrunnerAccessBarrier();
-#else
-    GMX_UNUSED_VALUE(numThreadsToLaunch);
-    GMX_UNUSED_VALUE(mdrunner_start_fn);
-#endif
-}
-
-} // namespace gmx
-
-/*! \brief Initialize variables for Verlet scheme simulation */
-static void prepare_verlet_scheme(FILE*               fplog,
-                                  t_commrec*          cr,
-                                  t_inputrec*         ir,
-                                  int                 nstlist_cmdline,
-                                  const gmx_mtop_t*   mtop,
-                                  const matrix        box,
-                                  bool                makeGpuPairList,
-                                  const gmx::CpuInfo& cpuinfo)
-{
-    /* For NVE simulations, we will retain the initial list buffer */
-    if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
-    {
-        /* Update the Verlet buffer size for the current run setup */
-
-        /* Here we assume SIMD-enabled kernels are being used. But as currently
-         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
-         * and 4x2 gives a larger buffer than 4x4, this is ok.
-         */
-        ListSetupType listType =
-                (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported);
-        VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
-
-        const real rlist_new =
-                calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
-
-        if (rlist_new != ir->rlist)
-        {
-            if (fplog != nullptr)
-            {
-                fprintf(fplog,
-                        "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
-                        ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j);
-            }
-            ir->rlist = rlist_new;
-        }
-    }
-
-    if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
-    {
-        gmx_fatal(FARGS, "Can not set nstlist without %s",
-                  !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
-    }
-
-    if (EI_DYNAMICS(ir->eI))
-    {
-        /* Set or try nstlist values */
-        increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
-    }
-}
-
-/*! \brief Override the nslist value in inputrec
- *
- * with value passed on the command line (if any)
- */
-static void override_nsteps_cmdline(const gmx::MDLogger& mdlog, int64_t nsteps_cmdline, t_inputrec* ir)
-{
-    assert(ir);
-
-    /* override with anything else than the default -2 */
-    if (nsteps_cmdline > -2)
-    {
-        char sbuf_steps[STEPSTRSIZE];
-        char sbuf_msg[STRLEN];
-
-        ir->nsteps = nsteps_cmdline;
-        if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
-        {
-            sprintf(sbuf_msg,
-                    "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t));
-        }
-        else
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps));
-        }
-
-        GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg);
-    }
-    else if (nsteps_cmdline < -2)
-    {
-        gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, nsteps_cmdline);
-    }
-    /* Do nothing if nsteps_cmdline == -2 */
-}
-
-namespace gmx
-{
-
-/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings.
- *
- * If not, and if a warning may be issued, logs a warning about
- * falling back to CPU code. With thread-MPI, only the first
- * call to this function should have \c issueWarning true. */
-static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger& mdlog, const t_inputrec& ir, bool issueWarning)
-{
-    bool        gpuIsUseful = true;
-    std::string warning;
-
-    if (ir.opts.ngener - ir.nwall > 1)
-    {
-        /* The GPU code does not support more than one energy group.
-         * If the user requested GPUs explicitly, a fatal error is given later.
-         */
-        gpuIsUseful = false;
-        warning =
-                "Multiple energy groups is not implemented for GPUs, falling back to the CPU. "
-                "For better performance, run on the GPU without energy groups and then do "
-                "gmx mdrun -rerun option on the trajectory with an energy group .tpr file.";
-    }
-
-    if (EI_TPI(ir.eI))
-    {
-        gpuIsUseful = false;
-        warning     = "TPI is not implemented for GPUs.";
-    }
-
-    if (!gpuIsUseful && issueWarning)
-    {
-        GMX_LOG(mdlog.warning).asParagraph().appendText(warning);
-    }
-
-    return gpuIsUseful;
-}
-
-//! Initializes the logger for mdrun.
-static gmx::LoggerOwner buildLogger(FILE* fplog, const bool isSimulationMasterRank)
-{
-    gmx::LoggerBuilder builder;
-    if (fplog != nullptr)
-    {
-        builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog);
-    }
-    if (isSimulationMasterRank)
-    {
-        builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
-    }
-    return builder.build();
-}
-
-//! Make a TaskTarget from an mdrun argument string.
-static TaskTarget findTaskTarget(const char* optionString)
-{
-    TaskTarget returnValue = TaskTarget::Auto;
-
-    if (strncmp(optionString, "auto", 3) == 0)
-    {
-        returnValue = TaskTarget::Auto;
-    }
-    else if (strncmp(optionString, "cpu", 3) == 0)
-    {
-        returnValue = TaskTarget::Cpu;
-    }
-    else if (strncmp(optionString, "gpu", 3) == 0)
-    {
-        returnValue = TaskTarget::Gpu;
-    }
-    else
-    {
-        GMX_ASSERT(false, "Option string should have been checked for sanity already");
-    }
-
-    return returnValue;
-}
-
-//! Finish run, aggregate data to print performance info.
-static void finish_run(FILE*                     fplog,
-                       const gmx::MDLogger&      mdlog,
-                       const t_commrec*          cr,
-                       const t_inputrec*         inputrec,
-                       t_nrnb                    nrnb[],
-                       gmx_wallcycle_t           wcycle,
-                       gmx_walltime_accounting_t walltime_accounting,
-                       nonbonded_verlet_t*       nbv,
-                       const gmx_pme_t*          pme,
-                       gmx_bool                  bWriteStat)
-{
-    double delta_t = 0;
-    double nbfs = 0, mflop = 0;
-    double elapsed_time, elapsed_time_over_all_ranks, elapsed_time_over_all_threads,
-            elapsed_time_over_all_threads_over_all_ranks;
-    /* Control whether it is valid to print a report. Only the
-       simulation master may print, but it should not do so if the run
-       terminated e.g. before a scheduled reset step. This is
-       complicated by the fact that PME ranks are unaware of the
-       reason why they were sent a pmerecvqxFINISH. To avoid
-       communication deadlocks, we always do the communication for the
-       report, even if we've decided not to write the report, because
-       how long it takes to finish the run is not important when we've
-       decided not to report on the simulation performance.
-
-       Further, we only report performance for dynamical integrators,
-       because those are the only ones for which we plan to
-       consider doing any optimizations. */
-    bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
-
-    if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendText("Simulation ended prematurely, no performance report will be written.");
-        printReport = false;
-    }
-
-    t_nrnb*                 nrnb_tot;
-    std::unique_ptr<t_nrnb> nrnbTotalStorage;
-    if (cr->nnodes > 1)
-    {
-        nrnbTotalStorage = std::make_unique<t_nrnb>();
-        nrnb_tot         = nrnbTotalStorage.get();
-#if GMX_MPI
-        MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
-#endif
-    }
-    else
-    {
-        nrnb_tot = nrnb;
-    }
-
-    elapsed_time = walltime_accounting_get_time_since_reset(walltime_accounting);
-    elapsed_time_over_all_threads =
-            walltime_accounting_get_time_since_reset_over_all_threads(walltime_accounting);
-    if (cr->nnodes > 1)
-    {
-#if GMX_MPI
-        /* reduce elapsed_time over all MPI ranks in the current simulation */
-        MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM,
-                      cr->mpi_comm_mysim);
-        elapsed_time_over_all_ranks /= cr->nnodes;
-        /* Reduce elapsed_time_over_all_threads over all MPI ranks in the
-         * current simulation. */
-        MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks,
-                      1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
-#endif
-    }
-    else
-    {
-        elapsed_time_over_all_ranks                  = elapsed_time;
-        elapsed_time_over_all_threads_over_all_ranks = elapsed_time_over_all_threads;
-    }
-
-    if (printReport)
-    {
-        print_flop(fplog, nrnb_tot, &nbfs, &mflop);
-    }
-
-    if (thisRankHasDuty(cr, DUTY_PP) && DOMAINDECOMP(cr))
-    {
-        print_dd_statistics(cr, inputrec, fplog);
-    }
-
-    /* TODO Move the responsibility for any scaling by thread counts
-     * to the code that handled the thread region, so that there's a
-     * mechanism to keep cycle counting working during the transition
-     * to task parallelism. */
-    int nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
-    int nthreads_pme = gmx_omp_nthreads_get(emntPME);
-    wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP),
-                                   nthreads_pp, nthreads_pme);
-    auto cycle_sum(wallcycle_sum(cr, wcycle));
-
-    if (printReport)
-    {
-        auto nbnxn_gpu_timings =
-                (nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
-        gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
-
-        if (pme_gpu_task_enabled(pme))
-        {
-            pme_gpu_get_timings(pme, &pme_gpu_timings);
-        }
-        wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
-                        elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings,
-                        &pme_gpu_timings);
-
-        if (EI_DYNAMICS(inputrec->eI))
-        {
-            delta_t = inputrec->delta_t;
-        }
-
-        if (fplog)
-        {
-            print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
-                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
-                       delta_t, nbfs, mflop);
-        }
-        if (bWriteStat)
-        {
-            print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
-                       walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
-                       delta_t, nbfs, mflop);
-        }
-    }
-}
-
-int Mdrunner::mdrunner()
-{
-    matrix                    box;
-    t_forcerec*               fr               = nullptr;
-    t_fcdata*                 fcd              = nullptr;
-    real                      ewaldcoeff_q     = 0;
-    real                      ewaldcoeff_lj    = 0;
-    int                       nChargePerturbed = -1, nTypePerturbed = 0;
-    gmx_wallcycle_t           wcycle;
-    gmx_walltime_accounting_t walltime_accounting = nullptr;
-    gmx_membed_t*             membed              = nullptr;
-    gmx_hw_info_t*            hwinfo              = nullptr;
-
-    /* CAUTION: threads may be started later on in this function, so
-       cr doesn't reflect the final parallel state right now */
-    gmx_mtop_t mtop;
-
-    /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
-    const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
-    const bool doMembed            = opt2bSet("-membed", filenames.size(), filenames.data());
-    const bool doRerun             = mdrunOptions.rerun;
-
-    // Handle task-assignment related user options.
-    EmulateGpuNonbonded emulateGpuNonbonded =
-            (getenv("GMX_EMULATE_GPU") != nullptr ? EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-
-    std::vector<int> userGpuTaskAssignment;
-    try
-    {
-        userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-    auto nonbondedTarget = findTaskTarget(nbpu_opt);
-    auto pmeTarget       = findTaskTarget(pme_opt);
-    auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
-    auto bondedTarget    = findTaskTarget(bonded_opt);
-    auto updateTarget    = findTaskTarget(update_opt);
-
-    FILE* fplog = nullptr;
-    // If we are appending, we don't write log output because we need
-    // to check that the old log file matches what the checkpoint file
-    // expects. Otherwise, we should start to write log output now if
-    // there is a file ready for it.
-    if (logFileHandle != nullptr && startingBehavior != StartingBehavior::RestartWithAppending)
-    {
-        fplog = gmx_fio_getfp(logFileHandle);
-    }
-    const bool       isSimulationMasterRank = findIsSimulationMasterRank(ms, communicator);
-    gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
-    gmx::MDLogger    mdlog(logOwner.logger());
-
-    // TODO The thread-MPI master rank makes a working
-    // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
-    // after the threads have been launched. This works because no use
-    // is made of that communicator until after the execution paths
-    // have rejoined. But it is likely that we can improve the way
-    // this is expressed, e.g. by expressly running detection only the
-    // master rank for thread-MPI, rather than relying on the mutex
-    // and reference count.
-    PhysicalNodeCommunicator physicalNodeComm(communicator, gmx_physicalnode_id_hash());
-    hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
-
-    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
-
-    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
-
-    // Print citation requests after all software/hardware printing
-    pleaseCiteGromacs(fplog);
-
-    // TODO Replace this by unique_ptr once t_inputrec is C++
-    t_inputrec               inputrecInstance;
-    t_inputrec*              inputrec = nullptr;
-    std::unique_ptr<t_state> globalState;
-
-    auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
-
-    if (isSimulationMasterRank)
-    {
-        /* Only the master rank has the global state */
-        globalState = std::make_unique<t_state>();
-
-        /* Read (nearly) all data required for the simulation
-         * and keep the partly serialized tpr contents to send to other ranks later
-         */
-        *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
-                                                 &inputrecInstance, globalState.get(), &mtop);
-        inputrec                = &inputrecInstance;
-    }
-
-    /* Check and update the hardware options for internal consistency */
-    checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
-                                  inputrec);
-
-    if (GMX_THREAD_MPI && isSimulationMasterRank)
-    {
-        bool useGpuForNonbonded = false;
-        bool useGpuForPme       = false;
-        try
-        {
-            GMX_RELEASE_ASSERT(inputrec != nullptr, "Keep the compiler happy");
-
-            // If the user specified the number of ranks, then we must
-            // respect that, but in default mode, we need to allow for
-            // the number of GPUs to choose the number of ranks.
-            auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
-            useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
-                    nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
-                    canUseGpuForNonbonded,
-                    gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
-                    hw_opt.nthreads_tmpi);
-            useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
-                    *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
-        }
-        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-        /* Determine how many thread-MPI ranks to start.
-         *
-         * TODO Over-writing the user-supplied value here does
-         * prevent any possible subsequent checks from working
-         * correctly. */
-        hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded,
-                                                useGpuForPme, inputrec, &mtop, mdlog, doMembed);
-
-        // Now start the threads for thread MPI.
-        spawnThreads(hw_opt.nthreads_tmpi);
-        // The spawned threads enter mdrunner() and execution of
-        // master and spawned threads joins at the end of this block.
-        physicalNodeComm = PhysicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
-    }
-
-    GMX_RELEASE_ASSERT(communicator == MPI_COMM_WORLD, "Must have valid world communicator");
-    CommrecHandle crHandle = init_commrec(communicator, ms);
-    t_commrec*    cr       = crHandle.get();
-    GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
-
-    if (PAR(cr))
-    {
-        /* now broadcast everything to the non-master nodes/threads: */
-        if (!isSimulationMasterRank)
-        {
-            inputrec = &inputrecInstance;
-        }
-        init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
-    }
-    GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
-    partialDeserializedTpr.reset(nullptr);
-
-    // Now the number of ranks is known to all ranks, and each knows
-    // the inputrec read by the master rank. The ranks can now all run
-    // the task-deciding functions and will agree on the result
-    // without needing to communicate.
-    //
-    // TODO Should we do the communication in debug mode to support
-    // having an assertion?
-    const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
-
-    // Note that these variables describe only their own node.
-    //
-    // Note that when bonded interactions run on a GPU they always run
-    // alongside a nonbonded task, so do not influence task assignment
-    // even though they affect the force calculation workload.
-    bool useGpuForNonbonded = false;
-    bool useGpuForPme       = false;
-    bool useGpuForBonded    = false;
-    bool useGpuForUpdate    = false;
-    bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
-    try
-    {
-        // It's possible that there are different numbers of GPUs on
-        // different nodes, which is the user's responsibility to
-        // handle. If unsuitable, we will notice that during task
-        // assignment.
-        auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
-        useGpuForNonbonded         = decideWhetherToUseGpusForNonbonded(
-                nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
-                gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
-        useGpuForPme = decideWhetherToUseGpusForPme(
-                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec, mtop,
-                cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected);
-        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
-                                  && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
-        useGpuForBonded = decideWhetherToUseGpusForBonded(
-                useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
-                EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
-                domdecOptions.numPmeRanks, gpusWereDetected);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-    const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
-
-    // Initialize development feature flags that enabled by environment variable
-    // and report those features that are enabled.
-    const DevelopmentFeatureFlags devFlags =
-            manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
-
-    const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
-            false, inputrec, doRerun, mtop, ms, replExParams, nullptr, doEssentialDynamics, doMembed);
-    const bool useModularSimulator = inputIsCompatibleWithModularSimulator
-                                     && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
-
-    // Build restraints.
-    // TODO: hide restraint implementation details from Mdrunner.
-    // There is nothing unique about restraints at this point as far as the
-    // Mdrunner is concerned. The Mdrunner should just be getting a sequence of
-    // factory functions from the SimulationContext on which to call mdModules_->add().
-    // TODO: capture all restraints into a single RestraintModule, passed to the runner builder.
-    for (auto&& restraint : restraintManager_->getRestraints())
-    {
-        auto module = RestraintMDModule::create(restraint, restraint->sites());
-        mdModules_->add(std::move(module));
-    }
-
-    // TODO: Error handling
-    mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
-    const auto& mdModulesNotifier = mdModules_->notifier().notifier_;
-
-    if (inputrec->internalParameters != nullptr)
-    {
-        mdModulesNotifier.notify(*inputrec->internalParameters);
-    }
-
-    if (fplog != nullptr)
-    {
-        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
-        fprintf(fplog, "\n");
-    }
-
-    if (SIMMASTER(cr))
-    {
-        /* In rerun, set velocities to zero if present */
-        if (doRerun && ((globalState->flags & (1 << estV)) != 0))
-        {
-            // rerun does not use velocities
-            GMX_LOG(mdlog.info)
-                    .asParagraph()
-                    .appendText(
-                            "Rerun trajectory contains velocities. Rerun does only evaluate "
-                            "potential energy and forces. The velocities will be ignored.");
-            for (int i = 0; i < globalState->natoms; i++)
-            {
-                clear_rvec(globalState->v[i]);
-            }
-            globalState->flags &= ~(1 << estV);
-        }
-
-        /* now make sure the state is initialized and propagated */
-        set_state_entries(globalState.get(), inputrec, useModularSimulator);
-    }
-
-    /* NM and TPI parallelize over force/energy calculations, not atoms,
-     * so we need to initialize and broadcast the global state.
-     */
-    if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
-    {
-        if (!MASTER(cr))
-        {
-            globalState = std::make_unique<t_state>();
-        }
-        broadcastStateWithoutDynamics(cr, globalState.get());
-    }
-
-    /* A parallel command line option consistency check that we can
-       only do after any threads have started. */
-    if (!PAR(cr)
-        && (domdecOptions.numCells[XX] > 1 || domdecOptions.numCells[YY] > 1
-            || domdecOptions.numCells[ZZ] > 1 || domdecOptions.numPmeRanks > 0))
-    {
-        gmx_fatal(FARGS,
-                  "The -dd or -npme option request a parallel simulation, "
-#if !GMX_MPI
-                  "but %s was compiled without threads or MPI enabled",
-                  output_env_get_program_display_name(oenv));
-#elif GMX_THREAD_MPI
-                  "but the number of MPI-threads (option -ntmpi) is not set or is 1");
-#else
-                  "but %s was not started through mpirun/mpiexec or only one rank was requested "
-                  "through mpirun/mpiexec",
-                  output_env_get_program_display_name(oenv));
-#endif
-    }
-
-    if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
-    {
-        gmx_fatal(FARGS,
-                  "The .mdp file specified an energy mininization or normal mode algorithm, and "
-                  "these are not compatible with mdrun -rerun");
-    }
-
-    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
-    {
-        if (domdecOptions.numPmeRanks > 0)
-        {
-            gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
-                                 "PME-only ranks are requested, but the system does not use PME "
-                                 "for electrostatics or LJ");
-        }
-
-        domdecOptions.numPmeRanks = 0;
-    }
-
-    if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
-    {
-        /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
-         * improve performance with many threads per GPU, since our OpenMP
-         * scaling is bad, but it's difficult to automate the setup.
-         */
-        domdecOptions.numPmeRanks = 0;
-    }
-    if (useGpuForPme)
-    {
-        if (domdecOptions.numPmeRanks < 0)
-        {
-            domdecOptions.numPmeRanks = 0;
-            // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
-        }
-        else
-        {
-            GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1,
-                               "PME GPU decomposition is not supported");
-        }
-    }
-
-    /* NMR restraints must be initialized before load_checkpoint,
-     * since with time averaging the history is added to t_state.
-     * For proper consistency check we therefore need to extend
-     * t_state here.
-     * So the PME-only nodes (if present) will also initialize
-     * the distance restraints.
-     */
-    snew(fcd, 1);
-
-    /* This needs to be called before read_checkpoint to extend the state */
-    init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
-
-    init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
-
-    auto deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
-
-#if GMX_FAHCORE
-    /* We have to remember the generation's first step before reading checkpoint.
-       This way, we can report to the F@H core both the generation's first step
-       and the restored first step, thus making it able to distinguish between
-       an interruption/resume and start of the n-th generation simulation.
-       Having this information, the F@H core can correctly calculate and report
-       the progress.
-     */
-    int gen_first_step = 0;
-    if (MASTER(cr))
-    {
-        gen_first_step = inputrec->init_step;
-    }
-#endif
-
-    ObservablesHistory observablesHistory = {};
-
-    if (startingBehavior != StartingBehavior::NewSimulation)
-    {
-        /* Check if checkpoint file exists before doing continuation.
-         * This way we can use identical input options for the first and subsequent runs...
-         */
-        if (mdrunOptions.numStepsCommandline > -2)
-        {
-            /* Temporarily set the number of steps to unlimited to avoid
-             * triggering the nsteps check in load_checkpoint().
-             * This hack will go away soon when the -nsteps option is removed.
-             */
-            inputrec->nsteps = -1;
-        }
-
-        load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
-                        logFileHandle, cr, domdecOptions.numCells, inputrec, globalState.get(),
-                        &observablesHistory, mdrunOptions.reproducible, mdModules_->notifier());
-
-        if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
-        {
-            // Now we can start normal logging to the truncated log file.
-            fplog = gmx_fio_getfp(logFileHandle);
-            prepareLogAppending(fplog);
-            logOwner = buildLogger(fplog, MASTER(cr));
-            mdlog    = logOwner.logger();
-        }
-    }
-
-#if GMX_FAHCORE
-    if (MASTER(cr))
-    {
-        fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step);
-    }
-#endif
-
-    if (mdrunOptions.numStepsCommandline > -2)
-    {
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -nsteps functionality is deprecated, and may be removed in a future "
-                        "version. "
-                        "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp "
-                        "file field.");
-    }
-    /* override nsteps with value set on the commandline */
-    override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
-
-    if (SIMMASTER(cr))
-    {
-        copy_mat(globalState->box, box);
-    }
-
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(box), box, cr);
-    }
-
-    if (inputrec->cutoff_scheme != ecutsVERLET)
-    {
-        gmx_fatal(FARGS,
-                  "This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
-                  "Verlet scheme, or use an earlier version of GROMACS if necessary.");
-    }
-    /* Update rlist and nstlist. */
-    prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
-                          useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
-                          *hwinfo->cpuInfo);
-
-    const bool prefer1DAnd1PulseDD = (devFlags.enableGpuHaloExchange && useGpuForNonbonded);
-    // This builder is necessary while we have multi-part construction
-    // of DD. Before DD is constructed, we use the existence of
-    // the builder object to indicate that further construction of DD
-    // is needed.
-    std::unique_ptr<DomainDecompositionBuilder> ddBuilder;
-    if (useDomainDecomposition)
-    {
-        ddBuilder = std::make_unique<DomainDecompositionBuilder>(
-                mdlog, cr, domdecOptions, mdrunOptions, prefer1DAnd1PulseDD, mtop, *inputrec, box,
-                positionsFromStatePointer(globalState.get()));
-    }
-    else
-    {
-        /* PME, if used, is done on all nodes with 1D decomposition */
-        cr->npmenodes = 0;
-        cr->duty      = (DUTY_PP | DUTY_PME);
-
-        if (inputrec->ePBC == epbcSCREW)
-        {
-            gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
-        }
-    }
-
-    // Produce the task assignment for this rank.
-    GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
-    GpuTaskAssignments        gpuTaskAssignments = gpuTaskAssignmentsBuilder.build(
-            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, communicator, physicalNodeComm,
-            nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
-            useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
-            // TODO cr->duty & DUTY_PME should imply that a PME
-            // algorithm is active, but currently does not.
-            EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
-
-    // Get the device handles for the modules, nullptr when no task is assigned.
-    gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
-    gmx_device_info_t* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
-
-    // TODO Initialize GPU streams here.
-
-    // TODO Currently this is always built, yet DD partition code
-    // checks if it is built before using it. Probably it should
-    // become an MDModule that is made only when another module
-    // requires it (e.g. pull, CompEl, density fitting), so that we
-    // don't update the local atom sets unilaterally every step.
-    LocalAtomSetManager atomSets;
-    if (ddBuilder)
-    {
-        // TODO Pass the GPU streams to ddBuilder to use in buffer
-        // transfers (e.g. halo exchange)
-        cr->dd = ddBuilder->build(&atomSets);
-        // The builder's job is done, so destruct it
-        ddBuilder.reset(nullptr);
-        // Note that local state still does not exist yet.
-    }
-
-    // The GPU update is decided here because we need to know whether the constraints or
-    // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
-    // defined). This is only known after DD is initialized, hence decision on using GPU
-    // update is done so late.
-    try
-    {
-        const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
-
-        useGpuForUpdate = decideWhetherToUseGpuForUpdate(
-                useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0,
-                useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
-                doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
-                replExParams.exchangeInterval > 0, doRerun, devFlags, mdlog);
-    }
-    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-
-    const bool printHostName = (cr->nnodes > 1);
-    gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
-
-    // If the user chose a task assignment, give them some hints
-    // where appropriate.
-    if (!userGpuTaskAssignment.empty())
-    {
-        gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
-    }
-
-    if (PAR(cr))
-    {
-        /* After possible communicator splitting in make_dd_communicators.
-         * we can set up the intra/inter node communication.
-         */
-        gmx_setup_nodecomm(fplog, cr);
-    }
-
-#if GMX_MPI
-    if (isMultiSim(ms))
-    {
-        GMX_LOG(mdlog.warning)
-                .asParagraph()
-                .appendTextFormatted(
-                        "This is simulation %d out of %d running as a composite GROMACS\n"
-                        "multi-simulation job. Setup for this simulation:\n",
-                        ms->sim, ms->nsim);
-    }
-    GMX_LOG(mdlog.warning)
-            .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
-#    if GMX_THREAD_MPI
-                                 cr->nnodes == 1 ? "thread" : "threads"
-#    else
-                                 cr->nnodes == 1 ? "process" : "processes"
-#    endif
-            );
-    fflush(stderr);
-#endif
-
-    // If mdrun -pin auto honors any affinity setting that already
-    // exists. If so, it is nice to provide feedback about whether
-    // that existing affinity setting was from OpenMP or something
-    // else, so we run this code both before and after we initialize
-    // the OpenMP support.
-    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-    /* Check and update the number of OpenMP threads requested */
-    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
-                                            pmeRunMode, mtop, *inputrec);
-
-    gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
-                          hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
-
-    // Enable FP exception detection, but not in
-    // Release mode and not for compilers with known buggy FP
-    // exception support (clang with any optimization) or suspected
-    // buggy FP exception support (gcc 7.* with optimization).
-#if !defined NDEBUG                                                                         \
-        && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
-             && defined __OPTIMIZE__)
-    const bool bEnableFPE = true;
-#else
-    const bool bEnableFPE = false;
-#endif
-    // FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
-    if (bEnableFPE)
-    {
-        gmx_feenableexcept();
-    }
-
-    /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
-                                       mdrunOptions.ntompOptionIsSet, cr, mdlog);
-
-    /* getting number of PP/PME threads on this MPI / tMPI rank.
-       PME: env variable should be read only on one node to make sure it is
-       identical everywhere;
-     */
-    const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
-                                                                  : gmx_omp_nthreads_get(emntPME);
-    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
-                                  physicalNodeComm, mdlog);
-
-    // Enable Peer access between GPUs where available
-    // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
-    // any of the GPU communication features are active.
-    if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
-        && (devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm))
-    {
-        setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
-    }
-
-    if (hw_opt.threadAffinity != ThreadAffinity::Off)
-    {
-        /* Before setting affinity, check whether the affinity has changed
-         * - which indicates that probably the OpenMP library has changed it
-         * since we first checked).
-         */
-        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
-
-        int numThreadsOnThisNode, intraNodeThreadOffset;
-        analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
-                                 &intraNodeThreadOffset);
-
-        /* Set the CPU affinity */
-        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
-                                numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
-    }
-
-    if (mdrunOptions.timingOptions.resetStep > -1)
-    {
-        GMX_LOG(mdlog.info)
-                .asParagraph()
-                .appendText(
-                        "The -resetstep functionality is deprecated, and may be removed in a "
-                        "future version.");
-    }
-    wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
-
-    if (PAR(cr))
-    {
-        /* Master synchronizes its value of reset_counters with all nodes
-         * including PME only nodes */
-        int64_t reset_counters = wcycle_get_reset_counters(wcycle);
-        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
-        wcycle_set_reset_counters(wcycle, reset_counters);
-    }
-
-    // Membrane embedding must be initialized before we call init_forcerec()
-    if (doMembed)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "Initializing membed");
-        }
-        /* Note that membed cannot work in parallel because mtop is
-         * changed here. Fix this if we ever want to make it run with
-         * multiple ranks. */
-        membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec,
-                             globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
-    }
-
-    const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
-    std::unique_ptr<MDAtoms>     mdAtoms;
-    std::unique_ptr<gmx_vsite_t> vsite;
-
-    t_nrnb nrnb;
-    if (thisRankHasDuty(cr, DUTY_PP))
-    {
-        mdModulesNotifier.notify(*cr);
-        mdModulesNotifier.notify(&atomSets);
-        mdModulesNotifier.notify(PeriodicBoundaryConditionType{ inputrec->ePBC });
-        mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
-        /* Initiate forcerecord */
-        fr                 = new t_forcerec;
-        fr->forceProviders = mdModules_->initForceProviders();
-        init_forcerec(fplog, mdlog, fr, fcd, inputrec, &mtop, cr, box,
-                      opt2fn("-table", filenames.size(), filenames.data()),
-                      opt2fn("-tablep", filenames.size(), filenames.data()),
-                      opt2fns("-tableb", filenames.size(), filenames.data()), *hwinfo,
-                      nonbondedDeviceInfo, useGpuForBonded,
-                      pmeRunMode == PmeRunMode::GPU && !thisRankHasDuty(cr, DUTY_PME), pforce, wcycle);
-
-        // TODO Move this to happen during domain decomposition setup,
-        // once stream and event handling works well with that.
-        // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
-        if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
-        {
-            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps,
-                               "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
-            void* streamLocal =
-                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
-            void* streamNonLocal =
-                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the "
-                            "GMX_GPU_DD_COMMS environment variable.");
-            cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(
-                    cr->dd, cr->mpi_comm_mysim, streamLocal, streamNonLocal);
-        }
-
-        /* Initialize the mdAtoms structure.
-         * mdAtoms is not filled with atom data,
-         * as this can not be done now with domain decomposition.
-         */
-        mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask);
-        if (globalState && thisRankHasPmeGpuTask)
-        {
-            // The pinning of coordinates in the global state object works, because we only use
-            // PME on GPU without DD or on a separate PME rank, and because the local state pointer
-            // points to the global state object without DD.
-            // FIXME: MD and EM separately set up the local state - this should happen in the same
-            // function, which should also perform the pinning.
-            changePinningPolicy(&globalState->x, pme_get_pinning_policy());
-        }
-
-        /* Initialize the virtual site communication */
-        vsite = initVsite(mtop, cr);
-
-        calc_shifts(box, fr->shift_vec);
-
-        /* With periodic molecules the charge groups should be whole at start up
-         * and the virtual sites should not be far from their proper positions.
-         */
-        if (!inputrec->bContinuation && MASTER(cr) && !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
-        {
-            /* Make molecules whole at start of run */
-            if (fr->ePBC != epbcNONE)
-            {
-                do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
-            }
-            if (vsite)
-            {
-                /* Correct initial vsite positions are required
-                 * for the initial distribution in the domain decomposition
-                 * and for the initial shell prediction.
-                 */
-                constructVsitesGlobal(mtop, globalState->x);
-            }
-        }
-
-        if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
-        {
-            ewaldcoeff_q  = fr->ic->ewaldcoeff_q;
-            ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
-        }
-    }
-    else
-    {
-        /* This is a PME only node */
-
-        GMX_ASSERT(globalState == nullptr,
-                   "We don't need the state on a PME only rank and expect it to be unitialized");
-
-        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
-        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
-    }
-
-    gmx_pme_t* sepPmeData = nullptr;
-    // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks
-    GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr),
-               "Double-checking that only PME-only ranks have no forcerec");
-    gmx_pme_t*& pmedata = fr ? fr->pmedata : sepPmeData;
-
-    // TODO should live in ewald module once its testing is improved
-    //
-    // Later, this program could contain kernels that might be later
-    // re-used as auto-tuning progresses, or subsequent simulations
-    // are invoked.
-    PmeGpuProgramStorage pmeGpuProgram;
-    if (thisRankHasPmeGpuTask)
-    {
-        pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
-    }
-
-    /* Initiate PME if necessary,
-     * either on all nodes or on dedicated PME nodes only. */
-    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
-    {
-        if (mdAtoms && mdAtoms->mdatoms())
-        {
-            nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed;
-            if (EVDW_PME(inputrec->vdwtype))
-            {
-                nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed;
-            }
-        }
-        if (cr->npmenodes > 0)
-        {
-            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
-            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
-            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
-        }
-
-        if (thisRankHasDuty(cr, DUTY_PME))
-        {
-            try
-            {
-                pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec, nChargePerturbed != 0,
-                                       nTypePerturbed != 0, mdrunOptions.reproducible, ewaldcoeff_q,
-                                       ewaldcoeff_lj, gmx_omp_nthreads_get(emntPME), pmeRunMode,
-                                       nullptr, pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
-            }
-            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
-        }
-    }
-
-
-    if (EI_DYNAMICS(inputrec->eI))
-    {
-        /* Turn on signal handling on all nodes */
-        /*
-         * (A user signal from the PME nodes (if any)
-         * is communicated to the PP nodes.
-         */
-        signal_handler_install();
-    }
-
-    pull_t* pull_work = nullptr;
-    if (thisRankHasDuty(cr, DUTY_PP))
-    {
-        /* Assumes uniform use of the number of OpenMP threads */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
-
-        if (inputrec->bPull)
-        {
-            /* Initialize pull code */
-            pull_work = init_pull(fplog, inputrec->pull, inputrec, &mtop, cr, &atomSets,
-                                  inputrec->fepvals->init_lambda);
-            if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
-            {
-                initPullHistory(pull_work, &observablesHistory);
-            }
-            if (EI_DYNAMICS(inputrec->eI) && MASTER(cr))
-            {
-                init_pull_output_files(pull_work, filenames.size(), filenames.data(), oenv, startingBehavior);
-            }
-        }
-
-        std::unique_ptr<EnforcedRotation> enforcedRotation;
-        if (inputrec->bRot)
-        {
-            /* Initialize enforced rotation code */
-            enforcedRotation =
-                    init_rot(fplog, inputrec, filenames.size(), filenames.data(), cr, &atomSets,
-                             globalState.get(), &mtop, oenv, mdrunOptions, startingBehavior);
-        }
-
-        t_swap* swap = nullptr;
-        if (inputrec->eSwapCoords != eswapNO)
-        {
-            /* Initialize ion swapping code */
-            swap = init_swapcoords(fplog, inputrec,
-                                   opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
-                                   &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
-                                   oenv, mdrunOptions, startingBehavior);
-        }
-
-        /* Let makeConstraints know whether we have essential dynamics constraints. */
-        auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog,
-                                      *mdAtoms->mdatoms(), cr, ms, &nrnb, wcycle, fr->bMolPBC);
-
-        /* Energy terms and groups */
-        gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
-                             inputrec->fepvals->n_lambda);
-
-        // cos acceleration is only supported by md, but older tpr
-        // files might still combine it with other integrators
-        GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == eiMD,
-                           "cos_acceleration is only supported by integrator=md");
-
-        /* Kinetic energy data */
-        gmx_ekindata_t ekind;
-        init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
-
-        /* Set up interactive MD (IMD) */
-        auto imdSession =
-                makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
-                               MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
-                               filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
-
-        if (DOMAINDECOMP(cr))
-        {
-            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
-            /* This call is not included in init_domain_decomposition mainly
-             * because fr->cginfo_mb is set later.
-             */
-            dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
-                            domdecOptions.checkBondedInteractions, fr->cginfo_mb);
-        }
-
-        // TODO This is not the right place to manage the lifetime of
-        // this data structure, but currently it's the easiest way to
-        // make it work.
-        MdrunScheduleWorkload runScheduleWork;
-        // Also populates the simulation constant workload description.
-        runScheduleWork.simulationWork = createSimulationWorkload(
-                useGpuForNonbonded, pmeRunMode, useGpuForBonded, useGpuForUpdate,
-                devFlags.enableGpuBufferOps, devFlags.enableGpuHaloExchange,
-                devFlags.enableGpuPmePPComm, haveEwaldSurfaceContribution(*inputrec));
-
-        std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
-        if (gpusWereDetected
-            && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME))
-                || runScheduleWork.simulationWork.useGpuBufferOps))
-        {
-            const void* pmeStream = pme_gpu_get_device_stream(fr->pmedata);
-            const void* localStream =
-                    fr->nbv->gpu_nbv != nullptr
-                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local)
-                            : nullptr;
-            const void* nonLocalStream =
-                    fr->nbv->gpu_nbv != nullptr
-                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
-                            : nullptr;
-            const void*        deviceContext = pme_gpu_get_device_context(fr->pmedata);
-            const int          paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
-            GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
-                                                      ? GpuApiCallBehavior::Async
-                                                      : GpuApiCallBehavior::Sync;
-
-            stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
-                    pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
-            fr->stateGpu = stateGpu.get();
-        }
-
-        GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
-        SimulatorBuilder simulatorBuilder;
-
-        // build and run simulator object based on user-input
-        auto simulator = simulatorBuilder.build(
-                inputIsCompatibleWithModularSimulator, fplog, cr, ms, mdlog,
-                static_cast<int>(filenames.size()), filenames.data(), oenv, mdrunOptions,
-                startingBehavior, vsite.get(), constr.get(),
-                enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, deform.get(),
-                mdModules_->outputProvider(), mdModules_->notifier(), inputrec, imdSession.get(),
-                pull_work, swap, &mtop, fcd, globalState.get(), &observablesHistory, mdAtoms.get(),
-                &nrnb, wcycle, fr, &enerd, &ekind, &runScheduleWork, replExParams, membed,
-                walltime_accounting, std::move(stopHandlerBuilder_), doRerun);
-        simulator->run();
-
-        if (fr->pmePpCommGpu)
-        {
-            // destroy object since it is no longer required. (This needs to be done while the GPU context still exists.)
-            fr->pmePpCommGpu.reset();
-        }
-
-        if (inputrec->bPull)
-        {
-            finish_pull(pull_work);
-        }
-        finish_swapcoords(swap);
-    }
-    else
-    {
-        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
-        /* do PME only */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-
-    /* Finish up, write some stuff
-     * if rerunMD, don't write last frame again
-     */
-    finish_run(fplog, mdlog, cr, inputrec, &nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
-
-    // clean up cycle counter
-    wallcycle_destroy(wcycle);
-
-    // Free PME data
-    if (pmedata)
-    {
-        gmx_pme_destroy(pmedata);
-        pmedata = nullptr;
-    }
-
-    // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
-    // before we destroy the GPU context(s) in free_gpu_resources().
-    // Pinned buffers are associated with contexts in CUDA.
-    // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
-    mdAtoms.reset(nullptr);
-    globalState.reset(nullptr);
-    mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
-
-    /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
-    free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
-    free_gpu(nonbondedDeviceInfo);
-    free_gpu(pmeDeviceInfo);
-    done_forcerec(fr, mtop.molblock.size());
-    sfree(fcd);
-
-    if (doMembed)
-    {
-        free_membed(membed);
-    }
-
-    /* Does what it says */
-    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
-    walltime_accounting_destroy(walltime_accounting);
-
-    // Ensure log file content is written
-    if (logFileHandle)
-    {
-        gmx_fio_flush(logFileHandle);
-    }
-
-    /* Reset FPEs (important for unit tests) by disabling them. Assumes no
-     * exceptions were enabled before function was called. */
-    if (bEnableFPE)
-    {
-        gmx_fedisableexcept();
-    }
-
-    auto rc = static_cast<int>(gmx_get_stop_condition());
-
-#if GMX_THREAD_MPI
-    /* we need to join all threads. The sub-threads join when they
-       exit this function, but the master thread needs to be told to
-       wait for that. */
-    if (PAR(cr) && MASTER(cr))
-    {
-        tMPI_Finalize();
-    }
-#endif
-    return rc;
-}
-
-Mdrunner::~Mdrunner()
-{
-    // Clean up of the Manager.
-    // This will end up getting called on every thread-MPI rank, which is unnecessary,
-    // but okay as long as threads synchronize some time before adding or accessing
-    // a new set of restraints.
-    if (restraintManager_)
-    {
-        restraintManager_->clear();
-        GMX_ASSERT(restraintManager_->countRestraints() == 0,
-                   "restraints added during runner life time should be cleared at runner "
-                   "destruction.");
-    }
-};
-
-void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, const std::string& name)
-{
-    GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager.");
-    // Not sure if this should be logged through the md logger or something else,
-    // but it is helpful to have some sort of INFO level message sent somewhere.
-    //    std::cout << "Registering restraint named " << name << std::endl;
-
-    // When multiple restraints are used, it may be wasteful to register them separately.
-    // Maybe instead register an entire Restraint Manager as a force provider.
-    restraintManager_->addToSpec(std::move(puller), name);
-}
-
-Mdrunner::Mdrunner(std::unique_ptr<MDModules> mdModules) : mdModules_(std::move(mdModules)) {}
-
-Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
-
-//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
-Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
-
-class Mdrunner::BuilderImplementation
-{
-public:
-    BuilderImplementation() = delete;
-    BuilderImplementation(std::unique_ptr<MDModules> mdModules, compat::not_null<SimulationContext*> context);
-    ~BuilderImplementation();
-
-    BuilderImplementation& setExtraMdrunOptions(const MdrunOptions& options,
-                                                real                forceWarningThreshold,
-                                                StartingBehavior    startingBehavior);
-
-    void addDomdec(const DomdecOptions& options);
-
-    void addVerletList(int nstlist);
-
-    void addReplicaExchange(const ReplicaExchangeParameters& params);
-
-    void addNonBonded(const char* nbpu_opt);
-
-    void addPME(const char* pme_opt_, const char* pme_fft_opt_);
-
-    void addBondedTaskAssignment(const char* bonded_opt);
-
-    void addUpdateTaskAssignment(const char* update_opt);
-
-    void addHardwareOptions(const gmx_hw_opt_t& hardwareOptions);
-
-    void addFilenames(ArrayRef<const t_filenm> filenames);
-
-    void addOutputEnvironment(gmx_output_env_t* outputEnvironment);
-
-    void addLogFile(t_fileio* logFileHandle);
-
-    void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder);
-
-    Mdrunner build();
-
-private:
-    // Default parameters copied from runner.h
-    // \todo Clarify source(s) of default parameters.
-
-    const char* nbpu_opt_    = nullptr;
-    const char* pme_opt_     = nullptr;
-    const char* pme_fft_opt_ = nullptr;
-    const char* bonded_opt_  = nullptr;
-    const char* update_opt_  = nullptr;
-
-    MdrunOptions mdrunOptions_;
-
-    DomdecOptions domdecOptions_;
-
-    ReplicaExchangeParameters replicaExchangeParameters_;
-
-    //! Command-line override for the duration of a neighbor list with the Verlet scheme.
-    int nstlist_ = 0;
-
-    //! Multisim communicator handle.
-    gmx_multisim_t* multiSimulation_;
-
-    //! mdrun communicator
-    MPI_Comm communicator_ = MPI_COMM_NULL;
-
-    //! Print a warning if any force is larger than this (in kJ/mol nm).
-    real forceWarningThreshold_ = -1;
-
-    //! Whether the simulation will start afresh, or restart with/without appending.
-    StartingBehavior startingBehavior_ = StartingBehavior::NewSimulation;
-
-    //! The modules that comprise the functionality of mdrun.
-    std::unique_ptr<MDModules> mdModules_;
-
-    //! \brief Parallelism information.
-    gmx_hw_opt_t hardwareOptions_;
-
-    //! filename options for simulation.
-    ArrayRef<const t_filenm> filenames_;
-
-    /*! \brief Handle to output environment.
-     *
-     * \todo gmx_output_env_t needs lifetime management.
-     */
-    gmx_output_env_t* outputEnvironment_ = nullptr;
-
-    /*! \brief Non-owning handle to MD log file.
-     *
-     * \todo Context should own output facilities for client.
-     * \todo Improve log file handle management.
-     * \internal
-     * Code managing the FILE* relies on the ability to set it to
-     * nullptr to check whether the filehandle is valid.
-     */
-    t_fileio* logFileHandle_ = nullptr;
-
-    /*!
-     * \brief Builder for simulation stop signal handler.
-     */
-    std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
-};
-
-Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
-                                                       compat::not_null<SimulationContext*> context) :
-    mdModules_(std::move(mdModules))
-{
-    communicator_    = context->communicator_;
-    multiSimulation_ = context->multiSimulation_.get();
-}
-
-Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
-
-Mdrunner::BuilderImplementation&
-Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions&    options,
-                                                      const real             forceWarningThreshold,
-                                                      const StartingBehavior startingBehavior)
-{
-    mdrunOptions_          = options;
-    forceWarningThreshold_ = forceWarningThreshold;
-    startingBehavior_      = startingBehavior;
-    return *this;
-}
-
-void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions& options)
-{
-    domdecOptions_ = options;
-}
-
-void Mdrunner::BuilderImplementation::addVerletList(int nstlist)
-{
-    nstlist_ = nstlist;
-}
-
-void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters& params)
-{
-    replicaExchangeParameters_ = params;
-}
-
-Mdrunner Mdrunner::BuilderImplementation::build()
-{
-    auto newRunner = Mdrunner(std::move(mdModules_));
-
-    newRunner.mdrunOptions     = mdrunOptions_;
-    newRunner.pforce           = forceWarningThreshold_;
-    newRunner.startingBehavior = startingBehavior_;
-    newRunner.domdecOptions    = domdecOptions_;
-
-    // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid
-    newRunner.hw_opt = hardwareOptions_;
-
-    // No invariant to check. This parameter exists to optionally override other behavior.
-    newRunner.nstlist_cmdline = nstlist_;
-
-    newRunner.replExParams = replicaExchangeParameters_;
-
-    newRunner.filenames = filenames_;
-
-    newRunner.communicator = communicator_;
-
-    // nullptr is a valid value for the multisim handle
-    newRunner.ms = multiSimulation_;
-
-    // \todo Clarify ownership and lifetime management for gmx_output_env_t
-    // \todo Update sanity checking when output environment has clearly specified invariants.
-    // Initialization and default values for oenv are not well specified in the current version.
-    if (outputEnvironment_)
-    {
-        newRunner.oenv = outputEnvironment_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addOutputEnvironment() is required before build()"));
-    }
-
-    newRunner.logFileHandle = logFileHandle_;
-
-    if (nbpu_opt_)
-    {
-        newRunner.nbpu_opt = nbpu_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()"));
-    }
-
-    if (pme_opt_ && pme_fft_opt_)
-    {
-        newRunner.pme_opt     = pme_opt_;
-        newRunner.pme_fft_opt = pme_fft_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()"));
-    }
-
-    if (bonded_opt_)
-    {
-        newRunner.bonded_opt = bonded_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addBondedTaskAssignment() is required before build()"));
-    }
-
-    if (update_opt_)
-    {
-        newRunner.update_opt = update_opt_;
-    }
-    else
-    {
-        GMX_THROW(gmx::APIError(
-                "MdrunnerBuilder::addUpdateTaskAssignment() is required before build()  "));
-    }
-
-
-    newRunner.restraintManager_ = std::make_unique<gmx::RestraintManager>();
-
-    if (stopHandlerBuilder_)
-    {
-        newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_);
-    }
-    else
-    {
-        newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>();
-    }
-
-    return newRunner;
-}
-
-void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
-{
-    nbpu_opt_ = nbpu_opt;
-}
-
-void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, const char* pme_fft_opt)
-{
-    pme_opt_     = pme_opt;
-    pme_fft_opt_ = pme_fft_opt;
-}
-
-void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt)
-{
-    bonded_opt_ = bonded_opt;
-}
-
-void Mdrunner::BuilderImplementation::addUpdateTaskAssignment(const char* update_opt)
-{
-    update_opt_ = update_opt;
-}
-
-void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
-{
-    hardwareOptions_ = hardwareOptions;
-}
-
-void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames)
-{
-    filenames_ = filenames;
-}
-
-void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
-{
-    outputEnvironment_ = outputEnvironment;
-}
-
-void Mdrunner::BuilderImplementation::addLogFile(t_fileio* logFileHandle)
-{
-    logFileHandle_ = logFileHandle;
-}
-
-void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
-{
-    stopHandlerBuilder_ = std::move(builder);
-}
-
-MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
-                                 compat::not_null<SimulationContext*> context) :
-    impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
-{
-}
-
-MdrunnerBuilder::~MdrunnerBuilder() = default;
-
-MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
-                                                      real                   forceWarningThreshold,
-                                                      const StartingBehavior startingBehavior)
-{
-    impl_->setExtraMdrunOptions(options, forceWarningThreshold, startingBehavior);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addDomainDecomposition(const DomdecOptions& options)
-{
-    impl_->addDomdec(options);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addNeighborList(int nstlist)
-{
-    impl_->addVerletList(nstlist);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters& params)
-{
-    impl_->addReplicaExchange(params);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addNonBonded(const char* nbpu_opt)
-{
-    impl_->addNonBonded(nbpu_opt);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addElectrostatics(const char* pme_opt, const char* pme_fft_opt)
-{
-    // The builder method may become more general in the future, but in this version,
-    // parameters for PME electrostatics are both required and the only parameters
-    // available.
-    if (pme_opt && pme_fft_opt)
-    {
-        impl_->addPME(pme_opt, pme_fft_opt);
-    }
-    else
-    {
-        GMX_THROW(
-                gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers."));
-    }
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt)
-{
-    impl_->addBondedTaskAssignment(bonded_opt);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addUpdateTaskAssignment(const char* update_opt)
-{
-    impl_->addUpdateTaskAssignment(update_opt);
-    return *this;
-}
-
-Mdrunner MdrunnerBuilder::build()
-{
-    return impl_->build();
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t& hardwareOptions)
-{
-    impl_->addHardwareOptions(hardwareOptions);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames)
-{
-    impl_->addFilenames(filenames);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment)
-{
-    impl_->addOutputEnvironment(outputEnvironment);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addLogFile(t_fileio* logFileHandle)
-{
-    impl_->addLogFile(logFileHandle);
-    return *this;
-}
-
-MdrunnerBuilder& MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder)
-{
-    impl_->addStopHandlerBuilder(std::move(builder));
-    return *this;
-}
-
-MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
-
-MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
-
-} // namespace gmx