From 791783d1fe841d41ad62e6c8a892316fb0347d79 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Wed, 26 Apr 2023 09:18:43 -0400
Subject: [PATCH 01/42] Improve build process and minor build-related code
 updates

---
 CMake/FileLists.cmake                   |   1 -
 CMake/GOMCCPUSetup.cmake                |  48 +++-
 CMake/GOMCCUDASetup.cmake               | 284 ++++++++++++++----------
 CMakeLists.txt                          |  74 +++---
 README.md                               |  30 +--
 lib/AlphaNum.cpp                        |   7 +
 lib/CircuitFinder.cpp                   |  11 +
 lib/Endian.h                            | 111 ---------
 metamake.sh                             |  67 ++++--
 src/GOMCEventsProfileDef.h              |   7 +
 src/GPU/CalculateForceCUDAKernel.cu     |   2 +-
 src/GPU/CalculateMinImageCUDAKernel.cuh |   6 +-
 src/Random123Wrapper.cpp                |   7 +
 src/Random123Wrapper.h                  |   7 +
 14 files changed, 358 insertions(+), 304 deletions(-)
 delete mode 100644 lib/Endian.h

diff --git a/CMake/FileLists.cmake b/CMake/FileLists.cmake
index bc5e09e49..b711d2777 100644
--- a/CMake/FileLists.cmake
+++ b/CMake/FileLists.cmake
@@ -191,7 +191,6 @@ set(libHeaders
    lib/AlphaNum.h
    lib/BasicTypes.h
    lib/BitLib.h
-   lib/Endian.h
    lib/GeomLib.h
    lib/Lambda.h
    lib/NumLib.h
diff --git a/CMake/GOMCCPUSetup.cmake b/CMake/GOMCCPUSetup.cmake
index 567963ca2..684410575 100644
--- a/CMake/GOMCCPUSetup.cmake
+++ b/CMake/GOMCCPUSetup.cmake
@@ -1,26 +1,33 @@
 #EnsemblePreprocessor defines NVT = 1, GEMC = 2, GCMC = 3, NPT = 4
-#NPT (Isothermal-Isobaric) Ensemble
 
-set(NPT_flags "-DENSEMBLE=4")
-set(NPT_name "GOMC_CPU_NPT")
-
-#Grand Canonical Monte Carlo
-set(GC_flags "-DENSEMBLE=3")
-set(GC_name "GOMC_CPU_GCMC")
+#NVT (Canonical) Ensemble
+set(NVT_flags "-DENSEMBLE=1")
+set(NVT_name "GOMC_CPU_NVT")
 
 #Gibbs Ensemble Monte Carlo
 set(GE_flags "-DENSEMBLE=2")
 set(GE_name "GOMC_CPU_GEMC")
 
-#NVT (Canonical) Ensemble
-set(NVT_flags "-DENSEMBLE=1")
-set(NVT_name "GOMC_CPU_NVT")
+#Grand Canonical Monte Carlo
+set(GC_flags "-DENSEMBLE=3")
+set(GC_name "GOMC_CPU_GCMC")
+
+#NPT (Isothermal-Isobaric) Ensemble
+set(NPT_flags "-DENSEMBLE=4")
+set(NPT_name "GOMC_CPU_NPT")
 
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 
 if(ENSEMBLE_NVT)
    add_executable(NVT ${sources} ${headers} ${libHeaders} ${libSources})
+   # Set Compiler and linker flags for each compiler
+   target_compile_options(NVT
+      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+   target_link_options(NVT
+      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
    set_target_properties(NVT PROPERTIES 
       OUTPUT_NAME ${NVT_name}
       COMPILE_FLAGS "${NVT_flags}")
@@ -35,6 +42,13 @@ endif()
 
 if(ENSEMBLE_GEMC)
    add_executable(GEMC ${sources} ${headers} ${libHeaders} ${libSources})
+   # Set Compiler and linker flags for each compiler
+   target_compile_options(GEMC
+      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+   target_link_options(GEMC
+      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
    set_target_properties(GEMC PROPERTIES 
       OUTPUT_NAME ${GE_name}
       COMPILE_FLAGS "${GE_flags}")
@@ -49,6 +63,13 @@ endif()
 
 if(ENSEMBLE_GCMC)
    add_executable(GCMC ${sources} ${headers} ${libHeaders} ${libSources})
+   # Set Compiler and linker flags for each compiler
+   target_compile_options(GCMC
+      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+   target_link_options(GCMC
+      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
    set_target_properties(GCMC PROPERTIES 
       OUTPUT_NAME ${GC_name}
       COMPILE_FLAGS "${GC_flags}")
@@ -63,6 +84,13 @@ endif()
 
 if(ENSEMBLE_NPT)
    add_executable(NPT ${sources} ${headers} ${libHeaders} ${libSources})
+   # Set Compiler and linker flags for each compiler
+   target_compile_options(NPT
+      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+   target_link_options(NPT
+      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
    set_target_properties(NPT PROPERTIES 
       OUTPUT_NAME ${NPT_name}
       COMPILE_FLAGS "${NPT_flags}")
diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index 9099e3107..9eca33dcd 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -1,118 +1,166 @@
-# Find CUDA is enabled, set it up
-
-if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-	message("-- Debug build type detected, passing : '-g -G --keep' to nvcc")
-	set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -g -G --keep")
-endif()
-
-
-set(GEN_COMP_flag "-DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT ")
-
-if (GOMC_NVTX_ENABLED)
-	message("-- Enabling profiling with NVTX for GPU")
-	set(GEN_COMP_flag "${GEN_COMP_flag} -DGOMC_NVTX_ENABLED")
-endif()
-
-
-include_directories(src/GPU)
-
-set(GPU_NPT_flags "-DENSEMBLE=4 ${GEN_COMP_flag}")
-set(GPU_NPT_name "GOMC_GPU_NPT")
-set(GPU_GC_flags "-DENSEMBLE=3 ${GEN_COMP_flag}")
-set(GPU_GC_name "GOMC_GPU_GCMC")
-set(GPU_GE_flags "-DENSEMBLE=2 ${GEN_COMP_flag}")
-set(GPU_GE_name "GOMC_GPU_GEMC")
-set(GPU_NVT_flags "-DENSEMBLE=1 ${GEN_COMP_flag}")
-set(GPU_NVT_name "GOMC_GPU_NVT")
-
-set(CMAKE_CUDA_STANDARD 14)
-set(CMAKE_CUDA_STANDARD_REQUIRED true)
-set(CMAKE_CXX_STANDARD 14)
-set(CMAKE_CXX_STANDARD_REQUIRED true)
-
-# Set host compiler
-set(CCBIN "-ccbin=${CMAKE_CXX_COMPILER}")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CCBIN} -Wno-deprecated-gpu-targets" )
-
-include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-
-#####################################
-if(ENSEMBLE_GPU_NVT)
-    add_executable(GPU_NVT ${cudaSources} ${cudaHeaders}
-    ${sources} ${headers} ${libHeaders} ${libSources})
-    set_target_properties(GPU_NVT PROPERTIES
-        CUDA_SEPARABLE_COMPILATION ON
-        OUTPUT_NAME ${GPU_NVT_name}
-        CUDA_ARCHITECTURES "35;60;70;80"
-        COMPILE_FLAGS "${GPU_NVT_flags}")
-	if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_NVT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_NVT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
-    if(WIN32)
-        target_link_libraries(GPU_NVT ws2_32)
-    endif()
-    if(MPI_FOUND)
-	    target_link_libraries(GPU_NVT ${MPI_LIBRARIES})
-    endif()
-endif()
-
-if(ENSEMBLE_GPU_GEMC)
-    add_executable(GPU_GEMC ${cudaSources} ${cudaHeaders} ${sources}
-    ${headers} ${libHeaders} ${libSources})
-    set_target_properties(GPU_GEMC PROPERTIES
-        CUDA_SEPARABLE_COMPILATION ON
-        OUTPUT_NAME ${GPU_GE_name}
-        CUDA_ARCHITECTURES "35;60;70;80"
-        COMPILE_FLAGS "${GPU_GE_flags}")
-	if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_GEMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_GEMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
-    if(WIN32)
-        target_link_libraries(GPU_GEMC ws2_32)
-    endif()
-    if(MPI_FOUND)
-	    target_link_libraries(GPU_GEMC ${MPI_LIBRARIES})
-    endif()
-endif()
-
-if(ENSEMBLE_GPU_GCMC)
-    add_executable(GPU_GCMC ${cudaSources} ${cudaHeaders} ${sources}
-    ${headers} ${libHeaders} ${libSources})
-    set_target_properties(GPU_GCMC PROPERTIES
-        CUDA_SEPARABLE_COMPILATION ON
-        OUTPUT_NAME ${GPU_GC_name}
-        CUDA_ARCHITECTURES "35;60;70;80"
-        COMPILE_FLAGS "${GPU_GC_flags}")
-	if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_GCMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_GCMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
-    if(WIN32)
-        target_link_libraries(GPU_GCMC ws2_32)
-    endif()
-    if(MPI_FOUND)
-	    target_link_libraries(GPU_GCMC ${MPI_LIBRARIES})
-    endif()
-endif()
-
-if(ENSEMBLE_GPU_NPT)
-    add_executable(GPU_NPT ${cudaSources} ${cudaHeaders} ${sources}
-    ${headers} ${libHeaders} ${libSources})
-    set_target_properties(GPU_NPT PROPERTIES
-        CUDA_SEPARABLE_COMPILATION ON
-        OUTPUT_NAME ${GPU_NPT_name}
-        CUDA_ARCHITECTURES "35;60;70;80"
-        COMPILE_FLAGS "${GPU_NPT_flags}")
-	if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_NPT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_NPT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
-    if(WIN32)
-        target_link_libraries(GPU_NPT ws2_32)
-    endif()
-    if(MPI_FOUND)
-	    target_link_libraries(GPU_NPT ${MPI_LIBRARIES})
-    endif()
-endif()
+# Find CUDA is enabled, set it up
+
+set(CMAKE_CUDA_COMP_FLAGS -DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT)
+
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+	message("-- Debug build type detected, passing '-g -G --keep' to nvcc")
+	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -g -G --keep)
+endif()
+
+if(GOMC_OPT)
+	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -O3)
+endif()
+
+if(GOMC_NVTX_ENABLED)
+	message("-- Enabling profiling with NVTX for GPU")
+	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -DGOMC_NVTX_ENABLED)
+endif()
+
+# Set architecture flags based on the CMake version
+# Once CMake 3.23 has been available for a while, we should just use
+# set(CMAKE_CUDA_ARCHITECTURES all) and remove the if block
+if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
+    set(CMAKE_CUDA_ARCHITECTURES all)
+else()
+    set(CMAKE_CUDA_ARCHITECTURES 50;60;70;80)
+endif()
+
+include_directories(src/GPU)
+
+set(GPU_NVT_flags "-DENSEMBLE=1")
+set(GPU_NVT_name "GOMC_GPU_NVT")
+set(GPU_GE_flags "-DENSEMBLE=2")
+set(GPU_GE_name "GOMC_GPU_GEMC")
+set(GPU_GC_flags "-DENSEMBLE=3")
+set(GPU_GC_name "GOMC_GPU_GCMC")
+set(GPU_NPT_flags "-DENSEMBLE=4")
+set(GPU_NPT_name "GOMC_GPU_NPT")
+
+set(CMAKE_CUDA_STANDARD 14)
+set(CMAKE_CUDA_STANDARD_REQUIRED true)
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+
+# Turn off warning that CUDA code was not compiled with the -ipo flag
+if(GOMC_OPT)
+   set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} -diag-disable=11003)
+endif()
+
+# Only disable the warning on deprecated GPU targets when compiling, not linking
+set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_COMP_FLAGS})
+set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -Wno-deprecated-gpu-targets)
+
+include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+
+#####################################
+if(ENSEMBLE_GPU_NVT)
+    add_executable(GPU_NVT ${cudaSources} ${cudaHeaders}
+    ${sources} ${headers} ${libHeaders} ${libSources})
+    # Set compiler and linker flags for each compiler
+    target_compile_options(GPU_NVT
+       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+    target_link_options(GPU_NVT
+       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+    set_target_properties(GPU_NVT PROPERTIES
+        CUDA_SEPARABLE_COMPILATION ON
+        OUTPUT_NAME ${GPU_NVT_name}
+        COMPILE_FLAGS "${GPU_NVT_flags}")
+	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+		message("-- Debug build type detected, GPU_NVT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+    	set_property(TARGET GPU_NVT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+	endif()
+    if(WIN32)
+        target_link_libraries(GPU_NVT ws2_32)
+    endif()
+    if(MPI_FOUND)
+	    target_link_libraries(GPU_NVT ${MPI_LIBRARIES})
+    endif()
+endif()
+
+if(ENSEMBLE_GPU_GEMC)
+    add_executable(GPU_GEMC ${cudaSources} ${cudaHeaders} ${sources}
+    ${headers} ${libHeaders} ${libSources})
+    # Set compiler and linker flags for each compiler
+    target_compile_options(GPU_GEMC
+       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+    target_link_options(GPU_GEMC
+       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+    set_target_properties(GPU_GEMC PROPERTIES
+        CUDA_SEPARABLE_COMPILATION ON
+        OUTPUT_NAME ${GPU_GE_name}
+        COMPILE_FLAGS "${GPU_GE_flags}")
+	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+		message("-- Debug build type detected, GPU_GEMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+    	set_property(TARGET GPU_GEMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+	endif()
+    if(WIN32)
+        target_link_libraries(GPU_GEMC ws2_32)
+    endif()
+    if(MPI_FOUND)
+	    target_link_libraries(GPU_GEMC ${MPI_LIBRARIES})
+    endif()
+endif()
+
+if(ENSEMBLE_GPU_GCMC)
+    add_executable(GPU_GCMC ${cudaSources} ${cudaHeaders} ${sources}
+    ${headers} ${libHeaders} ${libSources})
+    # Set compiler and linker flags for each compiler
+    target_compile_options(GPU_GCMC
+       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+    target_link_options(GPU_GCMC
+       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+    set_target_properties(GPU_GCMC PROPERTIES
+        CUDA_SEPARABLE_COMPILATION ON
+        OUTPUT_NAME ${GPU_GC_name}
+        COMPILE_FLAGS "${GPU_GC_flags}")
+	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+		message("-- Debug build type detected, GPU_GCMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+    	set_property(TARGET GPU_GCMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+	endif()
+    if(WIN32)
+        target_link_libraries(GPU_GCMC ws2_32)
+    endif()
+    if(MPI_FOUND)
+	    target_link_libraries(GPU_GCMC ${MPI_LIBRARIES})
+    endif()
+endif()
+
+if(ENSEMBLE_GPU_NPT)
+    add_executable(GPU_NPT ${cudaSources} ${cudaHeaders} ${sources}
+    ${headers} ${libHeaders} ${libSources})
+    # Set compiler and linker flags for each compiler
+    target_compile_options(GPU_NPT
+       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+    target_link_options(GPU_NPT
+       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+    set_target_properties(GPU_NPT PROPERTIES
+        CUDA_SEPARABLE_COMPILATION ON
+        OUTPUT_NAME ${GPU_NPT_name}
+        COMPILE_FLAGS "${GPU_NPT_flags}")
+	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+		message("-- Debug build type detected, GPU_NPT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+    	set_property(TARGET GPU_NPT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+	endif()
+    if(WIN32)
+        target_link_libraries(GPU_NPT ws2_32)
+    endif()
+    if(MPI_FOUND)
+	    target_link_libraries(GPU_NPT ${MPI_LIBRARIES})
+    endif()
+endif()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5305e8697..37d00553a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.8)
+cmake_minimum_required(VERSION 3.18)
 
 project(GOMC)
 
@@ -8,21 +8,49 @@ include_directories(src/cbmc)
 include_directories(src/moves)
 include_directories(src/GPU)
 
+#Versioning
+set(GOMC_VERSION_MAJOR 2)
+set(GOMC_VERSION_MINOR 75)
+
 #Out-of-source build
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
 
-# clang-tidy
-set(CMAKE_CXX_CLANG_TIDY clang-tidy -checks=-*,mpi-*,openmp-*)
-
-#Versioning
-set (GOMC_VERSION_MAJOR 2)
-set (GOMC_VERSION_MINOR 75)
+#clang-tidy
+if(GOMC_TIDY)
+   set(CMAKE_CXX_CLANG_TIDY clang-tidy -checks=-*,mpi-*,openmp-*)
+endif()
 
-IF(NOT CMAKE_BUILD_TYPE)
-   SET(CMAKE_BUILD_TYPE Release CACHE STRING
+if(NOT CMAKE_BUILD_TYPE)
+   set(CMAKE_BUILD_TYPE Release CACHE STRING
       "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
       FORCE)
-ENDIF(NOT CMAKE_BUILD_TYPE)
+endif(NOT CMAKE_BUILD_TYPE)
+
+#Set compile and link flags. Need to do it this way so that we can pass
+#the flags to NVCC properly.
+if(GOMC_OPT)
+   set(CMAKE_INTEL_COMP_FLAGS -Ofast -ipo -xHost)
+   set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
+   set(CMAKE_GNU_COMP_FLAGS -flto -O3 -march=native)
+   set(CMAKE_GNU_LINK_FLAGS -flto -O3 -march=native)
+endif()
+
+if(GOMC_ASAN)
+   set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
+endif()
+
+# find OpenMP and set it up
+# Don't use OpenMP if using address sanitizer
+if(NOT GOMC_ASAN)
+    find_package(OpenMP)
+    if(OPENMP_FOUND)
+       set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_INTEL_COMP_FLAGS ${CMAKE_INTEL_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
+    endif()
+endif()
 
 set(ENSEMBLE_NVT ON CACHE BOOL "Build NVT version")
 set(ENSEMBLE_GEMC ON CACHE BOOL "Build GEMC version")
@@ -37,14 +65,10 @@ include(${PROJECT_SOURCE_DIR}/CMake/GOMCMPI.cmake)
 
 include_directories("${PROJECT_BINARY_DIR}")
 
-if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -stdlib=libc++") 
-  set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -stdlib=libc++")
-endif()
-
-if(CMAKE_COMPILER_IS_GNUCXX)
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") 
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+# Additional flags for GNU and Intel compilers set elsewhere
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -stdlib=libc++") 
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -stdlib=libc++")
 elseif(MSVC)
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
     set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
@@ -70,26 +94,20 @@ configure_file(
 )
 
 # Enable google test
-# for now we will disable testing for intel compiler
+# for now we will disable testing for Intel compiler
 if(GOMC_GTEST OR GOMC_GTEST_MPI)
   enable_testing()
   include(test/GoogleTest.cmake)
 endif()
 
-# Find if CUDA exists and what is the version number
+# Check if CUDA exists and, if so, set the host compiler and enable CUDA
 include(CheckLanguage)
 check_language(CUDA)
-if (CMAKE_CUDA_COMPILER)
+if(CMAKE_CUDA_COMPILER)
+    set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
     enable_language(CUDA)
     include(${PROJECT_SOURCE_DIR}/CMake/GOMCCUDASetup.cmake)
 endif()
 
 # Setup Serial version
 include(${PROJECT_SOURCE_DIR}/CMake/GOMCCPUSetup.cmake)
-
-# find OpenMP and set it up
-find_package(OpenMP)
-if (OPENMP_FOUND)
-	set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-	set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-endif()
diff --git a/README.md b/README.md
index 8eb302785..5fcfe19c3 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,9 @@ Current Release: 2.75 (6/21/2022)
 
 We recommend the [GOMC Project Website](http://gomc.eng.wayne.edu/ "GOMC Website") and the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual") for further information and examples.
 
-To cite GOMC project, please use cite the following papers:
-1.  [Y. Nejahi, M. Soroush Barhaghi,  G. Schwing, L. Schwiebert, J. Potoff. SoftwareX, 13, 100627 (2021)](https://www.sciencedirect.com/science/article/pii/S235271102030340X)
-2.  [Y. Nejahi, M. Soroush Barhaghi, J. Mick, B. Jackman, K. Rushaidat, Y. Li, L. Schwiebert, J. Potoff. SoftwareX, 9, 20-27 (2019)](https://www.sciencedirect.com/science/article/pii/S2352711018301171?via%3Dihub "SoftwareX")
+To cite GOMC project, please cite the following papers:
+1.  [Y. Nejahi, M. Soroush Barhaghi,  G. Schwing, L. Schwiebert, J. Potoff. SoftwareX, 13, 100627 (2021). doi: 10.1016/j.softx.2020.100627.](https://www.sciencedirect.com/science/article/pii/S235271102030340X)
+2.  [Y. Nejahi, M. Soroush Barhaghi, J. Mick, B. Jackman, K. Rushaidat, Y. Li, L. Schwiebert, J. Potoff. SoftwareX, 9, 20-27 (2019). doi: 10.1016/j.softx.2018.11.005.](https://www.sciencedirect.com/science/article/pii/S2352711018301171?via%3Dihub "SoftwareX")
 
 ## Building GOMC on GNU/Linux, macOS, or Cygwin:
 
@@ -29,24 +29,24 @@ To cite GOMC project, please use cite the following papers:
       ```bash
       ./metamake.sh
       ```
-  5. Step 4 should generate all the executables in ```bin``` directory.
+  5. Step 4 will place all the executables in ```bin``` directory.
 
-  `./metamake.sh` accepts flags which indicates which ensembles to compile. Default behavior with no flag will compile all CPU compilers and if CUDA available, all GPU ensembles. Multiple flags can be used by separating with a space. Current accepted flags are: `CPU` to compile all CPU ensembles, `GPU` to compile all GPU ensembles, or you can compile ensembles individually by using any of the following flags:
+  `./metamake.sh` accepts flags which indicate which ensembles to compile. Default behavior with no flags will compile all CPU ensembles and, if CUDA is available, all GPU ensembles. Multiple flags must be separated by spaces. Current accepted flags are: `CPU` to compile all CPU ensembles, `GPU` to compile all GPU ensembles, or you can compile ensembles individually by using any of the following flags:
   `NVT`, `NPT`, `GCMC`, `GEMC`, `GPU_NVT`, `GPU_NPT`, `GPU_GCMC`, `GPU_GEMC`.
 
-> NOTES: Building GOMC requires cmake, available at http://www.cmake.org and in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphic cards you will need to install NVIDIA toolkit before compiling. The metamake file will automatically detect the location of CUDA installation. (More info in Manual)
+> NOTES: Building GOMC requires CMake, available at http://www.cmake.org and in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. (More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual".)
 
 ## Building GOMC on Windows:
   1. Open the Windows-compatible CMake GUI.
   2. Set the Source Folder to the GOMC root folder.
-  3. Set the build Folder to your Build Folder.
-  4. Click configure, select your compiler/environment
+  3. Set the Build Folder to your build folder.
+  4. Click Configure, select your compiler/environment.
   5. Wait for CMake to finish the configuration.
-  6. Click configure again and click generate.
-  7. Download [CUB library](https://nvlabs.github.io/cub/download_cub.html)
-  8. Extract CUB library and copy the "cub" folder from CUB library into "lib" folder inside GOMC directory.
-  9. Open the CMake-generated project/solution etc. to the desired IDE (e.g Visual Studio).
-  10. Using the solution in the IDE of choice build GOMC per the IDE's standard release compilation/executable generation methods.
+  6. Click Configure again and click Generate.
+  7. If your version of CUDA is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
+  8. If your version of CUDA is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
+  9. Open the CMake-generated project/solution etc. in the desired IDE (e.g., Visual Studio).
+  10. Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
 
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.
 
@@ -54,8 +54,8 @@ To cite GOMC project, please use cite the following papers:
   You can set the number of the threads using the +pN argument, where N is the number of threads.
   For example:
   ```bash
-  ./GOMC_<CPU|GPU>_XXXX +p4 in.conf
+  ./GOMC_GPU_GEMC +p4 in.conf
   ```
 
-  Which will run 4 threads and reads input file "in.conf".
+  will run a simulation with the Gibbs ensemble on the GPU using 4 threads and loads configuration settings from the file "in.conf".
 
diff --git a/lib/AlphaNum.cpp b/lib/AlphaNum.cpp
index 782bb6255..b3f8bb056 100644
--- a/lib/AlphaNum.cpp
+++ b/lib/AlphaNum.cpp
@@ -1,3 +1,10 @@
+/*******************************************************************************
+GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
+Copyright (C) 2022 GOMC Group
+A copy of the MIT License can be found in License.txt
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
+********************************************************************************/
 #include "AlphaNum.h"
 
 AlphaNum::AlphaNum() {}
diff --git a/lib/CircuitFinder.cpp b/lib/CircuitFinder.cpp
index f3aee00fa..abfbb1771 100644
--- a/lib/CircuitFinder.cpp
+++ b/lib/CircuitFinder.cpp
@@ -1,3 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Written by Xing Mingjie (mingjie.xing@gmail.com).
+//
+// An implementation of the Johnson's circuit finding algorithm [1].
+//
+// [1] Donald B. Johnson, Finding all the elementary circuits of a directed
+//     graph, SIAM Journal on Computing, 1975.
+//
+//===----------------------------------------------------------------------===//
+
 #include "CircuitFinder.h"
 
 void CircuitFinder::addEdge(int src, int dest) {
diff --git a/lib/Endian.h b/lib/Endian.h
deleted file mode 100644
index c4c9fe043..000000000
--- a/lib/Endian.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#pragma once
-
-/*
- * This file was created to handle endianness problem when writing to binary
- * files. The bug could occur when an integer is being written to a binary file
- * and read from a different system. If systems have different endianness, the
- * order of bytes could get reversed.
- *
- * NUM: 0x04030201
- * LITTLE_ENDIAN => 0x01020304
- * BIG_ENDIAN    => 0x04030201
- *
- * e.g. If the writing system (little endian) have 0x04030201 in its variable
- * and wants to write it to file, it would be in the order of 0x01, 0x02, 0x03
- * 0x04. And when the reading system (big endian) reads with the same order and
- * store it in an integer, our original 0x04030201 becomes 0x01020304.
- *
- * To prevent this from happening, we are always going to assume that the file
- * format is going to be little endian, and GOMC will perform conversion to
- * little or big endian based on the type of system.
- *
- * This header file will include tools to detect the endianness of the system
- * and functions to convert the little endian to big endian and big endian to
- * little endian.
- *
- * For developers outside of this header file, two functions should only be
- * used:
- *
- * // converts host integer to file integer
- * uint64_t htof64(uint64_t host_integer)
- * // converts host integer to file integer
- * uint64_t ftoh64(uint64_t file_integer)
- *
- * Before writing to file, make sure you use htof64() and after reading use
- * ftoh64()!
- *
- */
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#define bswap_64(x)                                                            \
-  ((((x)&0xff00000000000000ull) >> 56) | (((x)&0x00ff000000000000ull) >> 40) | \
-   (((x)&0x0000ff0000000000ull) >> 24) | (((x)&0x000000ff00000000ull) >> 8) |  \
-   (((x)&0x00000000ff000000ull) << 8) | (((x)&0x0000000000ff0000ull) << 24) |  \
-   (((x)&0x000000000000ff00ull) << 40) | (((x)&0x00000000000000ffull) << 56))
-
-#define bswap_32(x)                                                            \
-  ((((x)&0xff000000) >> 24) | (((x)&0x00ff0000) >> 8) |                        \
-   (((x)&0x0000ff00) << 8) | (((x)&0x000000ff) << 24))
-
-#define bswap_16(x) ((((x)&0xff00) >> 8) | (((x)&0x00ff) << 8))
-
-enum ENDIANNESS { LT_ENDIAN, BG_ENDIAN };
-
-inline ENDIANNESS GetEndian() {
-  long int endian = 0x0000000000000001;
-  return (*(char *)&endian == 0x01) ? LT_ENDIAN : BG_ENDIAN;
-}
-
-inline uint64_t htof64(uint64_t host_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    // Same endianness, so just return the same integer
-    return host_integer;
-  } else {
-    // need to reverse here
-    return bswap_64(host_integer);
-  }
-}
-
-inline uint64_t ftoh64(uint64_t file_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    // Same endianness, so just return the same integer
-    return file_integer;
-  } else {
-    // need to reverse order here
-    return bswap_64(file_integer);
-  }
-}
-
-inline uint32_t htof32(uint32_t host_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    return host_integer;
-  } else {
-    return bswap_32(host_integer);
-  }
-}
-
-inline uint32_t ftoh32(uint32_t file_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    return file_integer;
-  } else {
-    return bswap_32(file_integer);
-  }
-}
-
-inline uint16_t htof16(uint32_t host_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    return host_integer;
-  } else {
-    return bswap_16(host_integer);
-  }
-}
-
-inline uint16_t ftoh16(uint16_t file_integer) {
-  if (GetEndian() == LT_ENDIAN) {
-    return file_integer;
-  } else {
-    return bswap_16(file_integer);
-  }
-}
diff --git a/metamake.sh b/metamake.sh
index ee9884522..182010980 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -5,6 +5,9 @@ use_profiler=0
 use_gtest=0
 use_gcc=0
 use_mpi=0
+use_asan=0
+use_opt=1
+use_tidy=0
 use_debug=0
 ENSEMBLES=""
 CMAKEARGS=""
@@ -66,26 +69,38 @@ then
 	fi
 fi
 
-while getopts 'mptgd' opt; do
+while getopts 'acdgmnpt' opt; do
     case "$opt" in
-        p)
-            use_profiler=1;;
+        a)
+            use_asan=1;;
+        c)
+            use_tidy=1
+            CMAKEARGS+="-DGOMC_TIDY=on ";;
+        d)
+            use_debug=1;;
+        g)
+            use_gcc=1;;
         m)
             use_mpi=1
             CMAKEARGS+="-DGOMC_MPI=on ";;
-        g)
-            use_gcc=1;;
+        n)
+            use_opt=0;;
+        p)
+            use_profiler=1;;
         t)
-            use_gtest=1;;
-        d)
-            use_debug=1;;
+            use_gtest=1
+            use_gcc=1;;
         *)  echo 'Error in command line options' >&2
             echo "Available options are: "
-            echo "-p (NVTX tags),"
-            echo "-t (disables Intel compiler to allow GTests to compile),"
-            echo "-m, enables MPI support (Required for Parallel Tempering)"
+            echo "-a, enables address sanitizer runtime checking"
+            echo "-c, enables clang-tidy source code checks"
             echo "-d, enables Debug Mode compilation"
-            echo "For combined usage: -ptmg"
+            echo "-g, use the gcc compiler"
+            echo "-m, enables MPI support (Required for Parallel Tempering)"
+            echo "-n, disables most optimizing compiler flags"
+            echo "-p enables GPU code profiling (NVTX tags)"
+            echo "-t disables Intel compiler to allow GTests to compile"
+            echo "For combined usage, concatenate flags, e.g.: -ptmg"
             exit 1
     esac
 done
@@ -122,13 +137,22 @@ cd bin
 if (( !use_gtest )); then
     if (( !use_gcc )); 
     then
-        ICC_PATH="$(which icc 2> /dev/null)"
-        ICPC_PATH="$(which icpc 2> /dev/null)"
+        ICC_PATH="$(which icx 2> /dev/null)"
+        ICPC_PATH="$(which icpx 2> /dev/null)"
+        if [ -z "$ICC_PATH" ]
+        then
+            ICC_PATH="$(which icc 2> /dev/null)"
+            ICPC_PATH="$(which icpc 2> /dev/null)"
+		fi
         if [ -z "$ICC_PATH" ]
         then
             export CC="$(which gcc 2> /dev/null)"
             export CXX="$(which g++ 2> /dev/null)"
         else
+            if (( use_asan )); then
+				echo "Warning: Address sanitizer unset. Not compatible with the Intel compiler."
+				use_asan=0
+			fi
             export CC=${ICC_PATH}
             export CXX=${ICPC_PATH}
         fi
@@ -143,7 +167,7 @@ else
 		ENSEMBLES+="GOMC_NPT_MPI_Test "
 		ENSEMBLES+="GOMC_GCMC_MPI_Test "
 		ENSEMBLES+="GOMC_GEMC_MPI_Test "
-		if(( use_cuda ))
+		if (( use_cuda ))
 		then
         	ENSEMBLES+="GOMC_GPU_NVT_MPI_Test "
         	ENSEMBLES+="GOMC_GPU_NPT_MPI_Test "
@@ -156,7 +180,7 @@ else
         ENSEMBLES+="GOMC_NPT_Test "
         ENSEMBLES+="GOMC_GCMC_Test "
         ENSEMBLES+="GOMC_GEMC_Test "
-		if(( use_cuda ))
+		if (( use_cuda ))
 		then
         	ENSEMBLES+="GOMC_GPU_NVT_Test "
         	ENSEMBLES+="GOMC_GPU_NPT_Test "
@@ -184,12 +208,21 @@ echo "Ensembles To Compile: $ENSEMBLES"
 if (( use_profiler )); then
     if (( use_cuda )); then
       	echo "Enabling NVTX profiling for CUDA "
-	    CMAKEARGS+="-DGOMC_NVTX_ENABLED=1 "
+	    CMAKEARGS+="-DGOMC_NVTX_ENABLED=on "
     else
       	echo "Warning: Cannot enable NVTX profiling without CUDA enabled."
     fi
 fi
 
+if (( use_asan )); then
+    use_debug=1
+    CMAKEARGS+="-DGOMC_ASAN=on "
+fi
+
+if (( use_opt )); then
+    CMAKEARGS+="-DGOMC_OPT=on "
+fi
+
 if (( use_debug )); then
 	echo "Enabling Debug Compilation "
 	CMAKEARGS+="-DCMAKE_BUILD_TYPE=Debug "
diff --git a/src/GOMCEventsProfileDef.h b/src/GOMCEventsProfileDef.h
index 750e36f30..b228191a0 100644
--- a/src/GOMCEventsProfileDef.h
+++ b/src/GOMCEventsProfileDef.h
@@ -1,3 +1,10 @@
+/*******************************************************************************
+GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
+Copyright (C) 2022 GOMC Group
+A copy of the MIT License can be found in License.txt
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
+********************************************************************************/
 GOMC_PROFILE_EVENT(INITIALIZE, "initialization")
 GOMC_PROFILE_EVENT(MC_RUN, "MC_run")
 GOMC_PROFILE_EVENT(DESTRUCTION, "destruction")
diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu
index 84bec2c99..8f9cb5caa 100644
--- a/src/GPU/CalculateForceCUDAKernel.cu
+++ b/src/GPU/CalculateForceCUDAKernel.cu
@@ -542,7 +542,7 @@ __global__ void BoxInterForceGPU(
         double lambdaVDW = DeviceGetLambdaVDW(mA, mB, box, gpu_isFraction,
                                               gpu_molIndex, gpu_lambdaVDW);
 
-        diff_com = Difference(gpu_comx, gpu_comy, gpu_comz, mA, mB);
+        diff_com = Difference3(gpu_comx, gpu_comy, gpu_comz, mA, mB);
         if (gpu_nonOrth[0])
           diff_com = MinImageNonOrthGPU(diff_com, axis, halfAx, gpu_cell_x,
                                         gpu_cell_y, gpu_cell_z, gpu_Invcell_x,
diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh
index ae7742ce9..d1e9646c5 100644
--- a/src/GPU/CalculateMinImageCUDAKernel.cuh
+++ b/src/GPU/CalculateMinImageCUDAKernel.cuh
@@ -11,7 +11,7 @@ along with this program, also can be found at <https://opensource.org/licenses/M
 #include <cuda_runtime.h>
 #include "ConstantDefinitionsCUDAKernel.cuh"
 
-__device__ inline double3 Difference(const double *x, const double *y, const double *z,
+__device__ inline double3 Difference3(const double *x, const double *y, const double *z,
                                      uint i, uint j)
 {
   return make_double3(x[i] - x[j], y[i] - y[j], z[i] - z[j]);
@@ -165,7 +165,7 @@ __device__ inline bool InRcutGPU(double &distSq, const double *x, const double *
                                  const double *gpu_Invcell_y, const double *gpu_Invcell_z)
 {
   double3 dist;
-  dist = Difference(x, y, z, i, j);
+  dist = Difference3(x, y, z, i, j);
   // Do a binary print here of dist
   if(gpu_nonOrth) {
     dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y, gpu_cell_z,
@@ -188,7 +188,7 @@ __device__ inline bool InRcutGPU(double &distSq, double3 &dist,
                                  const double *gpu_cell_z, const double *gpu_Invcell_x,
                                  const double *gpu_Invcell_y, const double *gpu_Invcell_z)
 {
-  dist = Difference(x, y, z, i, j);
+  dist = Difference3(x, y, z, i, j);
   if(gpu_nonOrth) {
     dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y, gpu_cell_z,
                               gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
diff --git a/src/Random123Wrapper.cpp b/src/Random123Wrapper.cpp
index fb07ec155..0c40cd398 100644
--- a/src/Random123Wrapper.cpp
+++ b/src/Random123Wrapper.cpp
@@ -1,3 +1,10 @@
+/*******************************************************************************
+GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
+Copyright (C) 2022 GOMC Group
+A copy of the MIT License can be found in License.txt
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
+********************************************************************************/
 #include "Random123Wrapper.h"
 
 #include "Random123/boxmuller.hpp"
diff --git a/src/Random123Wrapper.h b/src/Random123Wrapper.h
index c67d30d2f..50bcf4651 100644
--- a/src/Random123Wrapper.h
+++ b/src/Random123Wrapper.h
@@ -1,3 +1,10 @@
+/*******************************************************************************
+GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
+Copyright (C) 2022 GOMC Group
+A copy of the MIT License can be found in License.txt
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
+********************************************************************************/
 #pragma once
 
 #include "BasicTypes.h"

From bf4a10b3b277b7297110245045f5e8d1734b8909 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Wed, 26 Apr 2023 09:25:05 -0400
Subject: [PATCH 02/42] Correct EOL

---
 CMake/GOMCCUDASetup.cmake | 2 +-
 CMakeLists.txt            | 2 +-
 metamake.sh               | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index 9eca33dcd..f66a8bcb9 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -41,7 +41,7 @@ set(CMAKE_CUDA_STANDARD_REQUIRED true)
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 
-# Turn off warning that CUDA code was not compiled with the -ipo flag
+# Turn off warning that CUDA files were not compiled with the -ipo flag
 if(GOMC_OPT)
    set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} -diag-disable=11003)
 endif()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37d00553a..30680d5c2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,7 +27,7 @@ if(NOT CMAKE_BUILD_TYPE)
 endif(NOT CMAKE_BUILD_TYPE)
 
 #Set compile and link flags. Need to do it this way so that we can pass
-#the flags to NVCC properly.
+#the source compiler flags to NVCC properly.
 if(GOMC_OPT)
    set(CMAKE_INTEL_COMP_FLAGS -Ofast -ipo -xHost)
    set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
diff --git a/metamake.sh b/metamake.sh
index 182010980..2210c570a 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-# Check if nvcc is available
+
+# Initialize the commandline options and flags
 use_cuda=0
 use_profiler=0
 use_gtest=0

From 199a8eb6bd5f8ac33357985ceaa0b531e9edf6bc Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 5 May 2023 14:15:38 -0400
Subject: [PATCH 03/42] Fix a minor memory leak

---
 src/Main.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Main.cpp b/src/Main.cpp
index 74be71299..262b151d5 100644
--- a/src/Main.cpp
+++ b/src/Main.cpp
@@ -282,9 +282,10 @@ void PrintHardwareInfo() {
               << mem.totalram / megabyte - mem.freeram / megabyte << "MB"
               << std::endl;
   }
-  std::cout << "Info: Working in the current directory: "
-            << get_current_dir_name();
+  char *pathname = get_current_dir_name();
+  std::cout << "Info: Working in the current directory: " << pathname;
   std::cout << std::endl;
+  free(pathname);
 #endif
 }
 

From 03a60641ff653b6e4ecee7bda07ea6d865d45976 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 5 May 2023 17:55:49 -0400
Subject: [PATCH 04/42] Make sure the correct abs() function is used with
 floats

---
 src/BoxDimensions.cpp                   | 8 ++++----
 src/ConfigSetup.cpp                     | 4 ++--
 src/Ewald.cpp                           | 2 +-
 src/FFSetup.cpp                         | 4 ++--
 src/GPU/CalculateMinImageCUDAKernel.cuh | 2 +-
 src/MoveSettings.cpp                    | 4 ++--
 src/PRNG.h                              | 2 +-
 src/Simulation.cpp                      | 2 +-
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/BoxDimensions.cpp b/src/BoxDimensions.cpp
index b378e1eed..3b01dd54b 100644
--- a/src/BoxDimensions.cpp
+++ b/src/BoxDimensions.cpp
@@ -275,7 +275,7 @@ double BoxDimensions::WrapPBC(double &v, const double ax) const {
   if (
     bool negate = (v > ax);
     double vNeg = v + (ax ^ -negate) + negate;
-    return (fabs(v - halfAx) > halfAx) ? v : vNeg;
+    return (std::fabs(v - halfAx) > halfAx) ? v : vNeg;
 #else
   // Note: testing shows that it's most efficient to negate if true.
   // Source:
@@ -297,9 +297,9 @@ double BoxDimensions::UnwrapPBC(double &v, const double ref, const double ax,
 #ifdef NO_BRANCHING_UNWRAP
   bool negate = (ref > halfAx);
   double vDiff = v + (ax ^ -negate) + negate;
-  return (fabs(ref - v) > halfAx) ? v : vDiff;
+  return (std::fabs(ref - v) > halfAx) ? v : vDiff;
 #else
-  if (fabs(ref - v) > halfAx) {
+  if (std::fabs(ref - v) > halfAx) {
     // Note: testing shows that it's most efficient to negate if true.
     // Source:
     // http://jacksondunstan.com/articles/2052
@@ -341,7 +341,7 @@ XYZ BoxDimensions::MinImage_Z(XYZ rawVec, const uint b) const {
 //
 double BoxDimensions::MinImage(double &raw, const double ax,
                                const double halfAx) const {
-  raw = fabs(raw);
+  raw = std::fabs(raw);
   // If shorter over periodic boundary, get that dist.
 #ifdef NO_BRANCHING_MIN_IMAGE
   rawDiff = ax - raw;
diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp
index 5eef02b62..3f3920d17 100644
--- a/src/ConfigSetup.cpp
+++ b/src/ConfigSetup.cpp
@@ -1746,8 +1746,8 @@ void ConfigSetup::verifyInputs(void) {
     }
   }
 
-  if (abs(sys.moves.multiParticle) > 0.0000001 &&
-      abs(sys.moves.multiParticleBrownian) > 0.0000001) {
+  if (std::abs(sys.moves.multiParticle) > 0.0000001 &&
+      std::abs(sys.moves.multiParticleBrownian) > 0.0000001) {
     std::cout << "Error: Both multi-Particle and multi-Particle Brownian! "
               << " cannot be used at the same time!" << std::endl;
     exit(EXIT_FAILURE);
diff --git a/src/Ewald.cpp b/src/Ewald.cpp
index 315bb689b..ad3071a0c 100644
--- a/src/Ewald.cpp
+++ b/src/Ewald.cpp
@@ -1456,7 +1456,7 @@ void Ewald::UpdateRecipVec(uint box) {
 }
 
 void compareDouble(const double &x, const double &y, const int &i) {
-  if (abs(x - y) > 1e-15) {
+  if (std::fabs(x - y) > 1e-15) {
     printf("%d: %lf != %lf\n", i, x, y);
   }
 }
diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp
index b191248f7..ec48751a8 100644
--- a/src/FFSetup.cpp
+++ b/src/FFSetup.cpp
@@ -237,11 +237,11 @@ void Particle::Read(Reader &param, std::string const &firstVar) {
   // or geometric mean of any pair > 6.0. See FFParticle::Blend() for underlying
   // math.
   double smallVal = 1e-20;
-  if (abs(e) < smallVal) {
+  if (std::fabs(e) < smallVal) {
     e = 0.0;
     expN = 12.0; // Set to default (LJ) exponent.
   }
-  if (abs(e_1_4) < smallVal) {
+  if (std::fabs(e_1_4) < smallVal) {
     e_1_4 = 0.0;
     expN_1_4 = 12.0; // Set to default (LJ) exponent.
   }
diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh
index d1e9646c5..b5657afa4 100644
--- a/src/GPU/CalculateMinImageCUDAKernel.cuh
+++ b/src/GPU/CalculateMinImageCUDAKernel.cuh
@@ -66,7 +66,7 @@ __device__ inline void WrapPBCNonOrth3(double3 &v, const double3 &ax,
 __device__ inline void  UnwrapPBC(double &v, const double &ref, const double &ax,
                                   const double &halfax)
 {
-  if(abs(ref - v) > halfax) {
+  if(std::fabs(ref - v) > halfax) {
     if(ref < halfax)
       v -= ax;
     else
diff --git a/src/MoveSettings.cpp b/src/MoveSettings.cpp
index 138e93765..79dfd4736 100644
--- a/src/MoveSettings.cpp
+++ b/src/MoveSettings.cpp
@@ -183,7 +183,7 @@ void MoveSettings::AdjustMultiParticle(const uint box, const uint typePick) {
     if (typePick == mp::MPDISPLACE) {
       if (fractOfIntervalAccept == 0.0) {
         mp_t_max[box] *= 0.5;
-      } else if (fabs(fractOfIntervalAccept - mp::TARGET_ACCEPT_FRACT) >
+      } else if (std::fabs(fractOfIntervalAccept - mp::TARGET_ACCEPT_FRACT) >
                  mp_accept_tol) {
         mp_t_max[box] *= ((1.0 - t_alpha) * fractOfTotalAccept +
                           t_alpha * fractOfIntervalAccept);
@@ -193,7 +193,7 @@ void MoveSettings::AdjustMultiParticle(const uint box, const uint typePick) {
     } else {
       if (fractOfIntervalAccept == 0.0) {
         mp_r_max[box] *= 0.5;
-      } else if (fabs(fractOfIntervalAccept - mp::TARGET_ACCEPT_FRACT) >
+      } else if (std::fabs(fractOfIntervalAccept - mp::TARGET_ACCEPT_FRACT) >
                  mp_accept_tol) {
         mp_r_max[box] *= ((1.0 - r_alpha) * fractOfTotalAccept +
                           r_alpha * fractOfIntervalAccept);
diff --git a/src/PRNG.h b/src/PRNG.h
index 96278f1c2..b3abe9db4 100644
--- a/src/PRNG.h
+++ b/src/PRNG.h
@@ -185,7 +185,7 @@ class PRNG {
       lastZero = i;
     }
     lastZero--;
-    if (std::abs(draw - totalWeight) < 0.001) {
+    if (std::fabs(draw - totalWeight) < 0.001) {
       return lastZero;
     }
 
diff --git a/src/Simulation.cpp b/src/Simulation.cpp
index 183a2aa2d..b63d525ce 100644
--- a/src/Simulation.cpp
+++ b/src/Simulation.cpp
@@ -105,7 +105,7 @@ void Simulation::RunSimulation(void) {
 
     if ((step + 1) == cpu->equilSteps) {
       double currEnergy = system->potential.totalEnergy.total;
-      if (std::abs(currEnergy - startEnergy) > 1.0e+10) {
+      if (std::fabs(currEnergy - startEnergy) > 1.0e+10) {
         printf("Info: Recalculating the total energies to insure the accuracy"
                " of the computed \n"
                "      running energies.\n\n");

From 48fa348de9f4d9df6286e302ba98ae02703867d9 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Tue, 9 May 2023 09:38:03 -0400
Subject: [PATCH 05/42] Update for recent code changes

---
 src/CalculateEnergy.cpp                 | 3487 +++++++++++------------
 src/GPU/CalculateMinImageCUDAKernel.cuh |  222 +-
 2 files changed, 1855 insertions(+), 1854 deletions(-)

diff --git a/src/CalculateEnergy.cpp b/src/CalculateEnergy.cpp
index e5dc81117..be4aab1ec 100644
--- a/src/CalculateEnergy.cpp
+++ b/src/CalculateEnergy.cpp
@@ -1,1744 +1,1743 @@
-/*******************************************************************************
-GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
-Copyright (C) 2022 GOMC Group
-A copy of the MIT License can be found in License.txt
-along with this program, also can be found at
-<https://opensource.org/licenses/MIT>.
-********************************************************************************/
-#include "CalculateEnergy.h" //header for this
-
-#include <cassert>
-
-#include "BasicTypes.h" //uint
-#include "BoxDimensions.h"
-#include "BoxDimensionsNonOrth.h"
-#include "Coordinates.h"
-#include "EnergyTypes.h"          //Energy structs
-#include "EnsemblePreprocessor.h" //Flags
-#include "Ewald.h"                //for ewald calculation
-#include "EwaldCached.h"          //for ewald calculation
-#include "Forcefield.h"           //
-#include "GeomLib.h"
-#include "MoleculeKind.h"
-#include "MoleculeLookup.h"
-#include "NoEwald.h" //for ewald calculation
-#include "NumLib.h"
-#include "StaticVals.h" //For init
-#include "System.h"     //For init
-#include "TrialMol.h"
-#ifdef GOMC_CUDA
-#include "CalculateEnergyCUDAKernel.cuh"
-#include "CalculateForceCUDAKernel.cuh"
-#include "ConstantDefinitionsCUDAKernel.cuh"
-#endif
-#include "GOMCEventsProfile.h"
-#define NUMBER_OF_NEIGHBOR_CELL 27
-
-//
-//    CalculateEnergy.cpp
-//    Energy Calculation functions for Monte Carlo simulation
-//    Calculates using const references to a particular Simulation's members
-//    Brock Jackman Sep. 2013
-//
-//    Updated to use radial-based intermolecular pressure
-//    Jason Mick    Feb. 2014
-//
-
-using namespace geom;
-
-CalculateEnergy::CalculateEnergy(StaticVals &stat, System &sys)
-    : forcefield(stat.forcefield), mols(stat.mol),
-      currentCoords(sys.coordinates), currentCOM(sys.com),
-      lambdaRef(sys.lambdaRef), atomForceRef(sys.atomForceRef),
-      molForceRef(sys.molForceRef),
-#ifdef VARIABLE_PARTICLE_NUMBER
-      molLookup(sys.molLookup),
-#else
-      molLookup(stat.molLookup),
-#endif
-      currentAxes(sys.boxDimRef), cellList(sys.cellList) {
-}
-
-void CalculateEnergy::Init(System &sys) {
-  uint maxAtomInMol = 0;
-  calcEwald = sys.GetEwald();
-  electrostatic = forcefield.electrostatic;
-  ewald = forcefield.ewald;
-  multiParticleEnabled = sys.statV.multiParticleEnabled;
-  for (uint m = 0; m < mols.count; ++m) {
-    const MoleculeKind &molKind = mols.GetKind(m);
-    if (molKind.NumAtoms() > maxAtomInMol)
-      maxAtomInMol = molKind.NumAtoms();
-    for (uint a = 0; a < molKind.NumAtoms(); ++a) {
-      particleKind.push_back(molKind.AtomKind(a));
-      particleMol.push_back(m);
-      particleCharge.push_back(molKind.AtomCharge(a));
-      particleIndex.push_back(int(a));
-    }
-  }
-#ifdef GOMC_CUDA
-  InitCoordinatesCUDA(forcefield.particles->getCUDAVars(),
-                      currentCoords.Count(), maxAtomInMol, currentCOM.Count());
-#endif
-}
-
-SystemPotential CalculateEnergy::SystemTotal() {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
-  SystemPotential pot =
-      SystemInter(SystemPotential(), currentCoords, currentAxes);
-
-  // system intra
-  for (uint b = 0; b < BOX_TOTAL; ++b) {
-    GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTRA);
-    double bondEnergy[2] = {0};
-    double bondEn = 0.0, nonbondEn = 0.0, correction = 0.0;
-    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(b);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(b);
-    std::vector<uint> molID;
-
-    while (thisMol != end) {
-      molID.push_back(*thisMol);
-      ++thisMol;
-    }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) private(bondEnergy) shared(b, molID) \
-    reduction(+:bondEn, nonbondEn, correction)
-#endif
-    for (int i = 0; i < (int)molID.size(); i++) {
-      // calculate nonbonded energy
-      MoleculeIntra(molID[i], b, bondEnergy);
-      bondEn += bondEnergy[0];
-      nonbondEn += bondEnergy[1];
-      // calculate correction term of electrostatic interaction
-      correction += calcEwald->MolCorrection(molID[i], b);
-    }
-
-    pot.boxEnergy[b].intraBond = bondEn;
-    pot.boxEnergy[b].intraNonbond = nonbondEn;
-    // calculate self term of electrostatic interaction
-    pot.boxEnergy[b].self = calcEwald->BoxSelf(b);
-    pot.boxEnergy[b].correction = correction;
-
-    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTRA);
-    // Calculate Virial
-    pot.boxVirial[b] = VirialCalc(b);
-  }
-
-  pot.Total();
-
-  if (pot.totalEnergy.total > 1.0e12) {
-    std::cout << "\nWarning: Large energy detected due to the overlap in "
-                 "initial configuration.\n"
-                 "         The total energy will be recalculated at EqStep to "
-                 "ensure the accuracy \n"
-                 "         of the computed running energies.\n";
-  }
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
-  return pot;
-}
-
-SystemPotential CalculateEnergy::SystemInter(SystemPotential potential,
-                                             XYZArray const &coords,
-                                             BoxDimensions const &boxAxes) {
-  for (uint b = 0; b < BOXES_WITH_U_NB; ++b) {
-    // calculate LJ interaction and real term of electrostatic interaction
-    potential = BoxInter(potential, coords, boxAxes, b);
-    // calculate reciprocal term of electrostatic interaction
-    potential.boxEnergy[b].recip = calcEwald->BoxReciprocal(b, false);
-  }
-
-  potential.Total();
-
-  return potential;
-}
-
-// Calculate the inter energy for Box. Fractional molecule are not allowed in
-// this function. Need to implement the GPU function
-SystemPotential CalculateEnergy::BoxInter(SystemPotential potential,
-                                          XYZArray const &coords,
-                                          BoxDimensions const &boxAxes,
-                                          const uint box) {
-  // Handles reservoir box case, returning zeroed structure if
-  // interactions are off.
-  if (box >= BOXES_WITH_U_NB)
-    return potential;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTER);
-  double tempREn = 0.0, tempLJEn = 0.0;
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
-                               cellStartIndex, mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
-                      boxAxes.cellBasis[box].z);
-
-  if (!boxAxes.orthogonal[box]) {
-    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxInterGPU(forcefield.particles->getCUDAVars(), cellVector,
-                  cellStartIndex, neighborList, coords, boxAxes, electrostatic,
-                  particleCharge, particleKind, particleMol, tempREn, tempLJEn,
-                  forcefield.sc_coul, forcefield.sc_sigma_6,
-                  forcefield.sc_alpha, forcefield.sc_power, box);
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
-  cellVector, coords, mapParticleToCell, neighborList) \
-reduction(+:tempREn, tempLJEn) firstprivate(box, num::qqFact)
-#endif
-  // loop over all particles
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    // find the which cell currParticle belong to
-    int currCell = mapParticleToCell[currParticle];
-    // loop over currCell neighboring cells
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      // find the index of neighboring cell
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      // find the ending index in neighboring cell
-      int endIndex = cellStartIndex[neighborCell + 1];
-      // loop over particle inside neighboring cell
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        // avoid same particles and duplicate work
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virComponents;
-          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
-                             nParticle, box)) {
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj_fact = particleCharge[currParticle] *
-                                  particleCharge[nParticle] * num::qqFact;
-              if (qi_qj_fact != 0.0) {
-                tempREn += forcefield.particles->CalcCoulomb(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj_fact, lambdaCoulomb, box);
-              }
-            }
-            tempLJEn += forcefield.particles->CalcEn(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // setting energy and virial of LJ interaction
-  potential.boxEnergy[box].inter = tempLJEn;
-  // setting energy and virial of coulomb interaction
-  potential.boxEnergy[box].real = tempREn;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTER);
-  // set correction energy and virial
-  if (forcefield.useLRC) {
-    EnergyCorrection(potential, boxAxes, box);
-  }
-
-  potential.Total();
-  return potential;
-}
-
-SystemPotential
-CalculateEnergy::BoxForce(SystemPotential potential, XYZArray const &coords,
-                          XYZArray &atomForce, XYZArray &molForce,
-                          BoxDimensions const &boxAxes, const uint box) {
-  // Handles reservoir box case, returning zeroed structure if
-  // interactions are off.
-  if (box >= BOXES_WITH_U_NB)
-    return potential;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_FORCE);
-
-  double tempREn = 0.0, tempLJEn = 0.0;
-  // make a pointer to atom force and mol force for OpenMP
-  double *aForcex = atomForce.x;
-  double *aForcey = atomForce.y;
-  double *aForcez = atomForce.z;
-  double *mForcex = molForce.x;
-  double *mForcey = molForce.y;
-  double *mForcez = molForce.z;
-  int atomCount = atomForce.Count();
-  int molCount = molForce.Count();
-
-  // Reset Force Arrays
-  ResetForce(atomForce, molForce, box);
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, coords.Count(), cellVector, cellStartIndex,
-                               mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
-                      boxAxes.cellBasis[box].z);
-
-  if (!boxAxes.orthogonal[box]) {
-    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxForceGPU(forcefield.particles->getCUDAVars(), cellVector,
-                  cellStartIndex, neighborList, mapParticleToCell, coords,
-                  boxAxes, electrostatic, particleCharge, particleKind,
-                  particleMol, tempREn, tempLJEn, aForcex, aForcey, aForcez,
-                  mForcex, mForcey, mForcez, atomCount, molCount,
-                  forcefield.sc_coul, forcefield.sc_sigma_6,
-                  forcefield.sc_alpha, forcefield.sc_power, box);
-
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
-  cellVector, coords, mapParticleToCell, neighborList) \
-  firstprivate(box, atomCount, molCount, num::qqFact) \
-  reduction(+:tempREn, tempLJEn, aForcex[:atomCount], aForcey[:atomCount], \
-            aForcez[:atomCount], mForcex[:molCount], mForcey[:molCount], \
-            mForcez[:molCount])
-#endif
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    int currCell = mapParticleToCell[currParticle];
-
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      int endIndex = cellStartIndex[neighborCell + 1];
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virComponents, forceLJ, forceReal;
-          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
-                             nParticle, box)) {
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj_fact = particleCharge[currParticle] *
-                                  particleCharge[nParticle] * num::qqFact;
-              if (qi_qj_fact != 0.0) {
-                tempREn += forcefield.particles->CalcCoulomb(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj_fact, lambdaCoulomb, box);
-                // Calculating the force
-                forceReal =
-                    virComponents * forcefield.particles->CalcCoulombVir(
-                                        distSq, particleKind[currParticle],
-                                        particleKind[nParticle], qi_qj_fact,
-                                        lambdaCoulomb, box);
-              }
-            }
-            tempLJEn += forcefield.particles->CalcEn(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-            forceLJ = virComponents * forcefield.particles->CalcVir(
-                                          distSq, particleKind[currParticle],
-                                          particleKind[nParticle], lambdaVDW);
-            aForcex[currParticle] += forceLJ.x + forceReal.x;
-            aForcey[currParticle] += forceLJ.y + forceReal.y;
-            aForcez[currParticle] += forceLJ.z + forceReal.z;
-            aForcex[nParticle] += -(forceLJ.x + forceReal.x);
-            aForcey[nParticle] += -(forceLJ.y + forceReal.y);
-            aForcez[nParticle] += -(forceLJ.z + forceReal.z);
-            mForcex[particleMol[currParticle]] += (forceLJ.x + forceReal.x);
-            mForcey[particleMol[currParticle]] += (forceLJ.y + forceReal.y);
-            mForcez[particleMol[currParticle]] += (forceLJ.z + forceReal.z);
-            mForcex[particleMol[nParticle]] += -(forceLJ.x + forceReal.x);
-            mForcey[particleMol[nParticle]] += -(forceLJ.y + forceReal.y);
-            mForcez[particleMol[nParticle]] += -(forceLJ.z + forceReal.z);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // setting energy and virial of LJ interaction
-  potential.boxEnergy[box].inter = tempLJEn;
-  // setting energy and virial of coulomb interaction
-  potential.boxEnergy[box].real = tempREn;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_FORCE);
-  return potential;
-}
-
-// NOTE: The calculation of W12, W13, and W23 is expensive and would not be
-// required for pressure and surface tension calculation. So, they have been
-// commented out. If you need to calculate them, uncomment them.
-Virial CalculateEnergy::VirialCalc(const uint box) {
-  // store virial and energy of reference and modify the virial
-  Virial tempVir;
-  // no need to calculate the virial for reservoir
-  if (box >= BOXES_WITH_U_NB)
-    return tempVir;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_VIRIAL);
-
-  // tensors for VDW and real part of electrostatic
-  double vT11 = 0.0, vT12 = 0.0, vT13 = 0.0;
-  double vT22 = 0.0, vT23 = 0.0, vT33 = 0.0;
-  double rT11 = 0.0, rT12 = 0.0, rT13 = 0.0;
-  double rT22 = 0.0, rT23 = 0.0, rT33 = 0.0;
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
-                               cellStartIndex, mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(
-      forcefield.particles->getCUDAVars(), box, currentAxes.cellBasis[box].x,
-      currentAxes.cellBasis[box].y, currentAxes.cellBasis[box].z);
-
-  if (!currentAxes.orthogonal[box]) {
-    // In this case, currentAxes is really an object of type
-    // BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&currentAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxInterForceGPU(forcefield.particles->getCUDAVars(), cellVector,
-                       cellStartIndex, neighborList, mapParticleToCell,
-                       currentCoords, currentCOM, currentAxes, electrostatic,
-                       particleCharge, particleKind, particleMol, rT11, rT12,
-                       rT13, rT22, rT23, rT33, vT11, vT12, vT13, vT22, vT23,
-                       vT33, forcefield.sc_coul, forcefield.sc_sigma_6,
-                       forcefield.sc_alpha, forcefield.sc_power, box);
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(cellStartIndex, cellVector, \
-  mapParticleToCell, neighborList) firstprivate(box) \
-reduction(+:vT11, vT12, vT13, vT22, vT23, vT33, rT11, rT12, rT13, rT22, rT23, rT33)
-#endif
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    int currCell = mapParticleToCell[currParticle];
-
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      int endIndex = cellStartIndex[neighborCell + 1];
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        // make sure the pairs are unique and they belong to different molecules
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virC;
-          if (currentAxes.InRcut(distSq, virC, currentCoords, currParticle,
-                                 nParticle, box)) {
-            // calculate the distance between com of two molecules
-            XYZ comC = currentCOM.Difference(particleMol[currParticle],
-                                             particleMol[nParticle]);
-            // calculate the minimum image between com of two molecules
-            comC = currentAxes.MinImage(comC, box);
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj =
-                  particleCharge[currParticle] * particleCharge[nParticle];
-
-              // skip particle pairs with no charge
-              if (qi_qj != 0.0) {
-                double pRF = forcefield.particles->CalcCoulombVir(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj, lambdaCoulomb, box);
-                // calculate the top diagonal of pressure tensor
-                rT11 += pRF * (virC.x * comC.x);
-                // rT12 += pRF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
-                // rT13 += pRF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
-
-                rT22 += pRF * (virC.y * comC.y);
-                // rT23 += pRF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
-
-                rT33 += pRF * (virC.z * comC.z);
-              }
-            }
-
-            double pVF = forcefield.particles->CalcVir(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-            // calculate the top diagonal of pressure tensor
-            vT11 += pVF * (virC.x * comC.x);
-            // vT12 += pVF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
-            // vT13 += pVF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
-
-            vT22 += pVF * (virC.y * comC.y);
-            // vT23 += pVF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
-
-            vT33 += pVF * (virC.z * comC.z);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // set the all tensor values
-  tempVir.interTens[0][0] = vT11;
-  tempVir.interTens[0][1] = vT12;
-  tempVir.interTens[0][2] = vT13;
-
-  tempVir.interTens[1][0] = vT12;
-  tempVir.interTens[1][1] = vT22;
-  tempVir.interTens[1][2] = vT23;
-
-  tempVir.interTens[2][0] = vT13;
-  tempVir.interTens[2][1] = vT23;
-  tempVir.interTens[2][2] = vT33;
-
-  if (electrostatic) {
-    // real part of electrostatic
-    tempVir.realTens[0][0] = rT11 * num::qqFact;
-    tempVir.realTens[0][1] = rT12 * num::qqFact;
-    tempVir.realTens[0][2] = rT13 * num::qqFact;
-
-    tempVir.realTens[1][0] = rT12 * num::qqFact;
-    tempVir.realTens[1][1] = rT22 * num::qqFact;
-    tempVir.realTens[1][2] = rT23 * num::qqFact;
-
-    tempVir.realTens[2][0] = rT13 * num::qqFact;
-    tempVir.realTens[2][1] = rT23 * num::qqFact;
-    tempVir.realTens[2][2] = rT33 * num::qqFact;
-  }
-
-  // setting virial of LJ
-  tempVir.inter = vT11 + vT22 + vT33;
-  // setting virial of coulomb
-  tempVir.real = (rT11 + rT22 + rT33) * num::qqFact;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_VIRIAL);
-
-  if (forcefield.useLRC || forcefield.useIPC) {
-    VirialCorrection(tempVir, currentAxes, box);
-  }
-
-  // calculate reciprocal term of force
-  tempVir = calcEwald->VirialReciprocal(tempVir, box);
-
-  tempVir.Total();
-  return tempVir;
-}
-
-bool CalculateEnergy::MoleculeInter(Intermolecular &inter_LJ,
-                                    Intermolecular &inter_coulomb,
-                                    XYZArray const &molCoords,
-                                    const uint molIndex, const uint box) const {
-  double tempREn = 0.0, tempLJEn = 0.0;
-  bool overlap = false;
-
-  if (box < BOXES_WITH_U_NB) {
-    GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTER);
-    uint length = mols.GetKind(molIndex).NumAtoms();
-    uint start = mols.MolStart(molIndex);
-
-    for (uint p = 0; p < length; ++p) {
-      uint atom = start + p;
-      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-      std::vector<uint> nIndex;
-      // store atom index in neighboring cell
-      while (!n.Done()) {
-        nIndex.push_back(*n);
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex) \
-firstprivate(atom, box, molIndex, num::qqFact) reduction(+:tempREn, tempLJEn)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        // Subtract old energy
-        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                               nIndex[i], box)) {
-          double lambdaVDW =
-              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-          if (electrostatic) {
-            double lambdaCoulomb =
-                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              tempREn += -forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaCoulomb, box);
-            }
-          }
-
-          tempLJEn += -forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
-        }
-      }
-
-      // add new energy
-      n = cellList.EnumerateLocal(molCoords[p], box);
-      // store atom index in neighboring cell
-      nIndex.clear();
-      while (!n.Done()) {
-        nIndex.push_back(*n);
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(molCoords, nIndex, overlap) \
-reduction(+:tempREn, tempLJEn) firstprivate(atom, molIndex, p, box, num::qqFact)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        if (currentAxes.InRcut(distSq, virComponents, molCoords, p,
-                               currentCoords, nIndex[i], box)) {
-          double lambdaVDW =
-              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-          if (distSq < forcefield.rCutLowSq) {
-            overlap |= true;
-          }
-
-          if (electrostatic) {
-            double lambdaCoulomb =
-                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              tempREn += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaCoulomb, box);
-            }
-          }
-
-          tempLJEn += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
-        }
-      }
-    }
-    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTER);
-  }
-
-  inter_LJ.energy = tempLJEn;
-  inter_coulomb.energy = tempREn;
-  return overlap;
-}
-
-// Calculate 1-N nonbonded intra energy
-void CalculateEnergy::ParticleNonbonded(double *inter,
-                                        cbmc::TrialMol const &trialMol,
-                                        XYZArray const &trialPos,
-                                        const uint partIndex, const uint box,
-                                        const uint trials) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
-  const MoleculeKind &kind = trialMol.GetKind();
-  // loop over all partners of the trial particle
-  const uint *partner = kind.sortedNB.Begin(partIndex);
-  const uint *end = kind.sortedNB.End(partIndex);
-  while (partner != end) {
-    if (trialMol.AtomExists(*partner)) {
-      for (uint t = 0; t < trials; ++t) {
-        double distSq;
-        if (currentAxes.InRcut(distSq, trialPos, t, trialMol.GetCoords(),
-                               *partner, box)) {
-          inter[t] += forcefield.particles->CalcEn(
-              distSq, kind.AtomKind(partIndex), kind.AtomKind(*partner), 1.0);
-          if (electrostatic) {
-            double qi_qj_fact = kind.AtomCharge(partIndex) *
-                                kind.AtomCharge(*partner) * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              forcefield.particles->CalcCoulombAdd_1_4(inter[t], distSq,
-                                                       qi_qj_fact, true);
-            }
-          }
-        }
-      }
-    }
-    ++partner;
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
-}
-
-void CalculateEnergy::ParticleInter(double *en, double *real,
-                                    XYZArray const &trialPos, bool *overlap,
-                                    const uint partIndex, const uint molIndex,
-                                    const uint box, const uint trials) const {
-  if (box >= BOXES_WITH_U_NB)
-    return;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTER);
-  double tempLJ, tempReal;
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kindI = thisKind.AtomKind(partIndex);
-  double kindICharge = thisKind.AtomCharge(partIndex);
-  std::vector<uint> nIndex;
-
-  for (uint t = 0; t < trials; ++t) {
-    nIndex.clear();
-    tempReal = 0.0;
-    tempLJ = 0.0;
-    CellList::Neighbors n = cellList.EnumerateLocal(trialPos[t], box);
-    while (!n.Done()) {
-      nIndex.push_back(*n);
-      n.Next();
-    }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex, overlap, trialPos) \
-firstprivate(kindICharge, kindI, t, box, molIndex, num::qqFact) \
-reduction(+:tempLJ, tempReal)
-#endif
-    for (int i = 0; i < (int)nIndex.size(); i++) {
-      double distSq = 0.0;
-      if (currentAxes.InRcut(distSq, trialPos, t, currentCoords, nIndex[i],
-                             box)) {
-        double lambdaVDW = GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-        if (distSq < forcefield.rCutLowSq) {
-          overlap[t] |= true;
-        }
-        tempLJ += forcefield.particles->CalcEn(
-            distSq, kindI, particleKind[nIndex[i]], lambdaVDW);
-        if (electrostatic) {
-          double lambdaCoulomb =
-              GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-          double qi_qj_fact =
-              particleCharge[nIndex[i]] * kindICharge * num::qqFact;
-
-          if (qi_qj_fact != 0.0) {
-            tempReal += forcefield.particles->CalcCoulomb(
-                distSq, kindI, particleKind[nIndex[i]], qi_qj_fact,
-                lambdaCoulomb, box);
-          }
-        }
-      }
-    }
-    en[t] += tempLJ;
-    real[t] += tempReal;
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTER);
-}
-
-// Calculates the change in the TC from adding numChange atoms of a kind
-Intermolecular CalculateEnergy::MoleculeTailChange(const uint box,
-                                                   const uint kind,
-                                                   const bool add) const {
-  Intermolecular delta;
-
-  if (box < BOXES_WITH_U_NB) {
-    double sign = (add ? 1.0 : -1.0);
-    uint mkIdxII = kind * mols.GetKindsCount() + kind;
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
-      double rhoDeltaIJ_2 = sign * 2.0 *
-                            (double)(molLookup.NumKindInBox(j, box)) *
-                            currentAxes.volInv[box];
-      delta.energy += mols.pairEnCorrections[mkIdxIJ] * rhoDeltaIJ_2;
-    }
-
-    // We already calculated part of the change for this type in the loop
-    delta.energy += mols.pairEnCorrections[mkIdxII] * currentAxes.volInv[box];
-  }
-  return delta;
-}
-
-// Calculates the change in the Virial TC from adding numChange atoms of a kind
-Intermolecular CalculateEnergy::MoleculeTailVirChange(const uint box,
-                                                      const uint kind,
-                                                      const bool add) const {
-  Intermolecular delta;
-
-  if (box < BOXES_WITH_U_NB) {
-    double sign = (add ? 1.0 : -1.0);
-    uint mkIdxII = kind * mols.GetKindsCount() + kind;
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
-      double rhoDeltaIJ_2 = sign * 2.0 *
-                            (double)(molLookup.NumKindInBox(j, box)) *
-                            currentAxes.volInv[box];
-      delta.virial += mols.pairVirCorrections[mkIdxIJ] * rhoDeltaIJ_2;
-    }
-
-    // We already calculated part of the change for this type in the loop
-    delta.virial += mols.pairVirCorrections[mkIdxII] * currentAxes.volInv[box];
-  }
-  return delta;
-}
-
-// Calculates intramolecular energy of a full molecule
-void CalculateEnergy::MoleculeIntra(const uint molIndex, const uint box,
-                                    double *bondEn) const {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
-  bondEn[0] = 0.0, bondEn[1] = 0.0;
-
-  MoleculeKind &molKind = mols.kinds[mols.kIndex[molIndex]];
-  // *2 because we'll be storing inverse bond vectors
-  XYZArray bondVec(molKind.bondList.count * 2);
-
-  BondVectors(bondVec, molKind, molIndex, box);
-  MolBond(bondEn[0], molKind, bondVec, molIndex, box);
-  MolAngle(bondEn[0], molKind, bondVec, box);
-  MolDihedral(bondEn[0], molKind, bondVec, box);
-  MolNonbond(bondEn[1], molKind, molIndex, box);
-  MolNonbond_1_4(bondEn[1], molKind, molIndex, box);
-  MolNonbond_1_3(bondEn[1], molKind, molIndex, box);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
-}
-
-// used in molecule exchange for calculating bonded and intraNonbonded energy
-Energy CalculateEnergy::MoleculeIntra(cbmc::TrialMol const &mol) const {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
-  double bondEn = 0.0, intraNonbondEn = 0.0;
-  // *2 because we'll be storing inverse bond vectors
-  const MoleculeKind &molKind = mol.GetKind();
-  uint count = molKind.bondList.count;
-  XYZArray bondVec(count * 2);
-  std::vector<bool> bondExist(count * 2, false);
-
-  BondVectors(bondVec, mol, bondExist, molKind);
-  MolBond(bondEn, mol, bondVec, bondExist, molKind);
-  MolAngle(bondEn, mol, bondVec, bondExist, molKind);
-  MolDihedral(bondEn, mol, bondVec, bondExist, molKind);
-  MolNonbond(intraNonbondEn, mol, molKind);
-  MolNonbond_1_4(intraNonbondEn, mol, molKind);
-  MolNonbond_1_3(intraNonbondEn, mol, molKind);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
-  return Energy(bondEn, intraNonbondEn, 0.0, 0.0, 0.0, 0.0, 0.0);
-}
-
-void CalculateEnergy::BondVectors(XYZArray &vecs, MoleculeKind const &molKind,
-                                  const uint molIndex, const uint box) const {
-  for (uint i = 0; i < molKind.bondList.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.bondList.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.bondList.part2[i];
-    XYZ dist = currentCoords.Difference(p2, p1);
-    dist = currentAxes.MinImage(dist, box);
-
-    // store inverse vectors at i+count
-    vecs.Set(i, dist);
-    vecs.Set(i + molKind.bondList.count, -dist.x, -dist.y, -dist.z);
-  }
-}
-
-void CalculateEnergy::BondVectors(XYZArray &vecs, cbmc::TrialMol const &mol,
-                                  std::vector<bool> &bondExist,
-                                  MoleculeKind const &molKind) const {
-  uint box = mol.GetBox();
-  uint count = molKind.bondList.count;
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.bondList.part1[i];
-    uint p2 = molKind.bondList.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      bondExist[i] = true;
-      bondExist[i + count] = true;
-      XYZ dist = mol.GetCoords().Difference(p2, p1);
-      dist = currentAxes.MinImage(dist, box);
-      // store inverse vectors at i+count
-      vecs.Set(i, dist);
-      vecs.Set(i + count, -dist.x, -dist.y, -dist.z);
-    }
-  }
-}
-
-void CalculateEnergy::MolBond(double &energy, MoleculeKind const &molKind,
-                              XYZArray const &vecs, const uint molIndex,
-                              const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  for (uint b = 0; b < molKind.bondList.count; ++b) {
-    double molLength = vecs.Get(b).Length();
-    energy += forcefield.bonds.Calc(molKind.bondList.kinds[b], molLength);
-    /*if(std::abs(molLength - eqLength) > 0.02) {
-      uint p1 = molKind.bondList.part1[b];
-      uint p2 = molKind.bondList.part2[b];
-      double eqLength = forcefield.bonds.Length(molKind.bondList.kinds[b]);
-      printf("Warning: Box%d, %6d %4s,", box, molIndex, molKind.name.c_str());
-      printf("%3s-%-3s bond: Par-file ", molKind.atomNames[p1].c_str(),
-          molKind.atomNames[p2].c_str());
-      printf("%2.3f A, PDB file %2.3f A!\n", eqLength, molLength);
-    }*/
-  }
-}
-
-void CalculateEnergy::MolBond(double &energy, cbmc::TrialMol const &mol,
-                              XYZArray const &vecs,
-                              std::vector<bool> const &bondExist,
-                              MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.bondList.count;
-  for (uint b = 0; b < count; ++b) {
-    if (bondExist[b]) {
-      energy += forcefield.bonds.Calc(molKind.bondList.kinds[b],
-                                      vecs.Get(b).Length());
-    }
-  }
-}
-
-void CalculateEnergy::MolAngle(double &energy, MoleculeKind const &molKind,
-                               XYZArray const &vecs, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-  for (uint a = 0; a < molKind.angles.Count(); ++a) {
-    // Note: need to reverse the second bond to get angle properly.
-    double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
-                         -vecs.Get(molKind.angles.GetBond(a, 1)));
-    energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
-  }
-}
-
-void CalculateEnergy::MolAngle(double &energy, cbmc::TrialMol const &mol,
-                               XYZArray const &vecs,
-                               std::vector<bool> const &bondExist,
-                               MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.angles.Count();
-  for (uint a = 0; a < count; ++a) {
-    if (bondExist[molKind.angles.GetBond(a, 0)] &&
-        bondExist[molKind.angles.GetBond(a, 1)]) {
-      // Note: need to reverse the second bond to get angle properly.
-      double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
-                           -vecs.Get(molKind.angles.GetBond(a, 1)));
-      energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
-    }
-  }
-}
-
-void CalculateEnergy::MolDihedral(double &energy, MoleculeKind const &molKind,
-                                  XYZArray const &vecs, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-  for (uint d = 0; d < molKind.dihedrals.Count(); ++d) {
-    double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
-                     vecs.Get(molKind.dihedrals.GetBond(d, 1)),
-                     vecs.Get(molKind.dihedrals.GetBond(d, 2)));
-    energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
-  }
-}
-
-void CalculateEnergy::MolDihedral(double &energy, cbmc::TrialMol const &mol,
-                                  XYZArray const &vecs,
-                                  std::vector<bool> const &bondExist,
-                                  MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.dihedrals.Count();
-  for (uint d = 0; d < count; ++d) {
-    if (bondExist[molKind.dihedrals.GetBond(d, 0)] &&
-        bondExist[molKind.dihedrals.GetBond(d, 1)] &&
-        bondExist[molKind.dihedrals.GetBond(d, 2)]) {
-      double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
-                       vecs.Get(molKind.dihedrals.GetBond(d, 1)),
-                       vecs.Get(molKind.dihedrals.GetBond(d, 2)));
-      energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
-    }
-  }
-}
-
-// Calculate 1-N nonbonded intra energy
-void CalculateEnergy::MolNonbond(double &energy, MoleculeKind const &molKind,
-                                 const uint molIndex, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      energy += forcefield.particles->CalcEn(
-          distSq, molKind.AtomKind(molKind.nonBonded.part1[i]),
-          molKind.AtomKind(molKind.nonBonded.part2[i]), 1.0);
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   true);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-N nonbonded intra energy using pos
-void CalculateEnergy::MolNonbond(double &energy, cbmc::TrialMol const &mol,
-                                 MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded.part1[i];
-    uint p2 = molKind.nonBonded.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        energy += forcefield.particles->CalcEn(distSq, molKind.AtomKind(p1),
-                                               molKind.AtomKind(p2), 1.0);
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     true);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-4 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_4(double &energy,
-                                     MoleculeKind const &molKind,
-                                     const uint molIndex,
-                                     const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded_1_4.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_4.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_4.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      forcefield.particles->CalcAdd_1_4(
-          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_4.part1[i]),
-          molKind.AtomKind(molKind.nonBonded_1_4.part2[i]));
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded_1_4.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded_1_4.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   false);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-4 nonbonded intra energy using pos
-void CalculateEnergy::MolNonbond_1_4(double &energy, cbmc::TrialMol const &mol,
-                                     MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded_1_4.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded_1_4.part1[i];
-    uint p2 = molKind.nonBonded_1_4.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
-                                          molKind.AtomKind(p2));
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     false);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_3(double &energy,
-                                     MoleculeKind const &molKind,
-                                     const uint molIndex,
-                                     const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded_1_3.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_3.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_3.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      forcefield.particles->CalcAdd_1_4(
-          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_3.part1[i]),
-          molKind.AtomKind(molKind.nonBonded_1_3.part2[i]));
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded_1_3.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded_1_3.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   false);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_3(double &energy, cbmc::TrialMol const &mol,
-                                     MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded_1_3.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded_1_3.part1[i];
-    uint p2 = molKind.nonBonded_1_3.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
-                                          molKind.AtomKind(p2));
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     false);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-double CalculateEnergy::IntraEnergy_1_3(const double distSq, const uint atom1,
-                                        const uint atom2,
-                                        const uint molIndex) const {
-  if (!forcefield.OneThree)
-    return 0.0;
-
-  double eng = 0.0;
-
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kind1 = thisKind.AtomKind(atom1);
-  uint kind2 = thisKind.AtomKind(atom2);
-
-  if (electrostatic) {
-    double qi_qj_fact =
-        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
-
-    if (qi_qj_fact != 0.0) {
-      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
-    }
-  }
-  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
-
-  if (std::isnan(eng))
-    eng = num::BIGNUM;
-
-  return eng;
-}
-
-// Calculate 1-4 nonbonded intra energy
-double CalculateEnergy::IntraEnergy_1_4(const double distSq, const uint atom1,
-                                        const uint atom2,
-                                        const uint molIndex) const {
-  if (!forcefield.OneFour)
-    return 0.0;
-
-  double eng = 0.0;
-
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kind1 = thisKind.AtomKind(atom1);
-  uint kind2 = thisKind.AtomKind(atom2);
-
-  if (electrostatic) {
-    double qi_qj_fact =
-        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
-
-    if (qi_qj_fact != 0.0) {
-      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
-    }
-  }
-  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
-
-  if (std::isnan(eng))
-    eng = num::BIGNUM;
-
-  return eng;
-}
-
-//! Calculates energy tail corrections for the box
-void CalculateEnergy::EnergyCorrection(SystemPotential &pot,
-                                       BoxDimensions const &boxAxes,
-                                       const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-
-  double en = 0.0;
-  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-    uint numI = molLookup.NumKindInBox(i, box);
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint numJ = molLookup.NumKindInBox(j, box);
-      en += mols.pairEnCorrections[i * mols.GetKindsCount() + j] * numI * numJ *
-            boxAxes.volInv[box];
-    }
-  }
-
-  if (!forcefield.freeEnergy) {
-    pot.boxEnergy[box].tailCorrection = en;
-  }
-#if ENSEMBLE == NVT || ENSEMBLE == NPT
-  else {
-    // Get the kind and lambda value
-    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
-    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
-    // remove the LRC for one molecule with lambda = 1
-    en += MoleculeTailChange(box, fk, false).energy;
-
-    // Add the LRC for fractional molecule
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
-            rhoDeltaIJ_2;
-    }
-    // We already calculated part of the change for this type in the loop
-    en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-          currentAxes.volInv[box];
-    pot.boxEnergy[box].tailCorrection = en;
-  }
-#endif
-}
-
-//! Calculates energy corrections for the box
-double CalculateEnergy::EnergyCorrection(const uint box,
-                                         const uint *kCount) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return 0.0;
-  }
-
-  double tailCorrection = 0.0;
-  for (uint i = 0; i < mols.kindsCount; ++i) {
-    for (uint j = 0; j < mols.kindsCount; ++j) {
-      tailCorrection += mols.pairEnCorrections[i * mols.kindsCount + j] *
-                        kCount[i] * kCount[j] * currentAxes.volInv[box];
-    }
-  }
-  return tailCorrection;
-}
-
-void CalculateEnergy::VirialCorrection(Virial &virial,
-                                       BoxDimensions const &boxAxes,
-                                       const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-  double vir = 0.0;
-
-  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-    uint numI = molLookup.NumKindInBox(i, box);
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint numJ = molLookup.NumKindInBox(j, box);
-      vir += mols.pairVirCorrections[i * mols.GetKindsCount() + j] * numI *
-             numJ * boxAxes.volInv[box];
-    }
-  }
-
-  if (!forcefield.freeEnergy) {
-    virial.tailCorrection = vir;
-  }
-#if ENSEMBLE == NVT || ENSEMBLE == NPT
-  else {
-    // Get the kind and lambda value
-    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
-    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
-    // remove the LRC for one molecule with lambda = 1
-    vir += MoleculeTailVirChange(box, fk, false).virial;
-
-    // Add the LRC for fractional molecule
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      vir += lambdaVDW *
-             mols.pairVirCorrections[fk * mols.GetKindsCount() + i] *
-             rhoDeltaIJ_2;
-    }
-    // We already calculated part of the change for this type in the loop
-    vir += lambdaVDW * mols.pairVirCorrections[fk * mols.GetKindsCount() + fk] *
-           currentAxes.volInv[box];
-    virial.tailCorrection = vir;
-  }
-#endif
-}
-
-//! Calculate Torque
-void CalculateEnergy::CalculateTorque(std::vector<uint> &moleculeIndex,
-                                      XYZArray const &coordinates,
-                                      XYZArray const &com,
-                                      XYZArray const &atomForce,
-                                      XYZArray const &atomForceRec,
-                                      XYZArray &molTorque, const uint box) {
-  if (multiParticleEnabled && (box < BOXES_WITH_U_NB)) {
-    GOMC_EVENT_START(1, GomcProfileEvent::BOX_TORQUE);
-    // make a pointer to mol torque for OpenMP
-    double *torquex = molTorque.x;
-    double *torquey = molTorque.y;
-    double *torquez = molTorque.z;
-
-#if defined _OPENMP
-#pragma omp parallel for default(none) \
-shared(atomForce, atomForceRec, com, coordinates, moleculeIndex, torquex, torquey, torquez) \
-firstprivate(box)
-#endif
-    for (int m = 0; m < (int)moleculeIndex.size(); m++) {
-      int mIndex = moleculeIndex[m];
-      int length = mols.GetKind(mIndex).NumAtoms();
-      int start = mols.MolStart(mIndex);
-      double tx = 0.0;
-      double ty = 0.0;
-      double tz = 0.0;
-      // atom iterator
-      for (int p = start; p < start + length; p++) {
-        XYZ distFromCOM = coordinates.Difference(p, com, mIndex);
-        distFromCOM = currentAxes.MinImage(distFromCOM, box);
-        XYZ tempTorque = Cross(distFromCOM, atomForce[p] + atomForceRec[p]);
-
-        tx += tempTorque.x;
-        ty += tempTorque.y;
-        tz += tempTorque.z;
-      }
-      torquex[mIndex] = tx;
-      torquey[mIndex] = ty;
-      torquez[mIndex] = tz;
-    }
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::BOX_TORQUE);
-}
-
-void CalculateEnergy::ResetForce(XYZArray &atomForce, XYZArray &molForce,
-                                 uint box) {
-  if (multiParticleEnabled) {
-    uint length, start;
-
-    // molecule iterator
-    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
-
-    while (thisMol != end) {
-      length = mols.GetKind(*thisMol).NumAtoms();
-      start = mols.MolStart(*thisMol);
-
-      molForce.Set(*thisMol, 0.0, 0.0, 0.0);
-      for (uint p = start; p < start + length; p++) {
-        atomForce.Set(p, 0.0, 0.0, 0.0);
-      }
-      thisMol++;
-    }
-  }
-}
-
-uint CalculateEnergy::NumberOfParticlesInsideBox(uint box) {
-  uint numberOfAtoms = 0;
-
-  for (int k = 0; k < (int)mols.GetKindsCount(); k++) {
-    MoleculeKind const &thisKind = mols.kinds[k];
-    numberOfAtoms += thisKind.NumAtoms() * molLookup.NumKindInBox(k, box);
-  }
-
-  return numberOfAtoms;
-}
-
-bool CalculateEnergy::FindMolInCavity(std::vector<std::vector<uint>> &mol,
-                                      const XYZ &center, const XYZ &cavDim,
-                                      const XYZArray &invCav, const uint box,
-                                      const uint kind, const uint exRatio) {
-  uint k;
-  mol.clear();
-  mol.resize(molLookup.GetNumKind());
-  double maxLength = cavDim.Max();
-
-  if (maxLength <= currentAxes.rCut[box]) {
-    CellList::Neighbors n = cellList.EnumerateLocal(center, box);
-    while (!n.Done()) {
-      if (currentAxes.InCavity(currentCOM.Get(particleMol[*n]), center, cavDim,
-                               invCav, box)) {
-        uint molIndex = particleMol[*n];
-        // if molecule can be transfer between boxes
-        if (!molLookup.IsNoSwap(molIndex)) {
-          k = mols.GetMolKind(molIndex);
-          bool exist =
-              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
-          if (!exist)
-            mol[k].push_back(molIndex);
-        }
-      }
-      n.Next();
-    }
-  } else {
-    MoleculeLookup::box_iterator n = molLookup.BoxBegin(box);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
-    while (n != end) {
-      if (currentAxes.InCavity(currentCOM.Get(*n), center, cavDim, invCav,
-                               box)) {
-        uint molIndex = *n;
-        // if molecule can be transfer between boxes
-        if (!molLookup.IsNoSwap(molIndex)) {
-          k = mols.GetMolKind(molIndex);
-          bool exist =
-              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
-          if (!exist)
-            mol[k].push_back(molIndex);
-        }
-      }
-      n++;
-    }
-  }
-
-  // If the is exRate and more molecule kind in cavity, return true.
-  if (mol[kind].size() >= exRatio)
-    return true;
-  else
-    return false;
-}
-
-void CalculateEnergy::SingleMoleculeInter(
-    Energy &interEnOld, Energy &interEnNew, const double lambdaOldVDW,
-    const double lambdaNewVDW, const double lambdaOldCoulomb,
-    const double lambdaNewCoulomb, const uint molIndex, const uint box) const {
-  double tempREnOld = 0.0, tempLJEnOld = 0.0;
-  double tempREnNew = 0.0, tempLJEnNew = 0.0;
-  if (box < BOXES_WITH_U_NB) {
-    uint length = mols.GetKind(molIndex).NumAtoms();
-    uint start = mols.MolStart(molIndex);
-
-    for (uint p = 0; p < length; ++p) {
-      uint atom = start + p;
-      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-      std::vector<uint> nIndex;
-      // store atom index in neighboring cell
-      while (!n.Done()) {
-        if (particleMol[*n] != (int)molIndex) {
-          nIndex.push_back(*n);
-        }
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex) \
-firstprivate(atom, box, lambdaNewCoulomb, lambdaOldCoulomb, lambdaOldVDW, \
-lambdaNewVDW, num::qqFact) reduction(+:tempREnOld, tempLJEnOld, tempREnNew, \
-tempLJEnNew)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                               nIndex[i], box)) {
-          if (electrostatic) {
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-            if (qi_qj_fact != 0.0) {
-              tempREnNew += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaNewCoulomb, box);
-              tempREnOld += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaOldCoulomb, box);
-            }
-          }
-
-          tempLJEnNew += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambdaNewVDW);
-          tempLJEnOld += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambdaOldVDW);
-        }
-      }
-    }
-  }
-
-  interEnNew.inter = tempLJEnNew;
-  interEnNew.real = tempREnNew;
-  interEnOld.inter = tempLJEnOld;
-  interEnOld.real = tempREnOld;
-}
-
-double CalculateEnergy::GetLambdaVDW(uint molA, uint molB, uint box) const {
-  double lambda = 1.0;
-  lambda *= lambdaRef.GetLambdaVDW(molA, box);
-  lambda *= lambdaRef.GetLambdaVDW(molB, box);
-  return lambda;
-}
-
-double CalculateEnergy::GetLambdaCoulomb(uint molA, uint molB, uint box) const {
-  double lambda = 1.0;
-  lambda *= lambdaRef.GetLambdaCoulomb(molA, box);
-  lambda *= lambdaRef.GetLambdaCoulomb(molB, box);
-  // no need for sq root for inter energy. Always one of the molecules has
-  // lambda 1
-  return lambda;
-}
-
-// Calculates the change in the TC from adding numChange atoms of a kind
-double CalculateEnergy::MoleculeTailChange(const uint box, const uint kind,
-                                           const std::vector<uint> &kCount,
-                                           const double lambdaOld,
-                                           const double lambdaNew) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return 0.0;
-  }
-
-  double tcDiff = 0.0;
-  uint ktot = mols.GetKindsCount();
-  for (uint i = 0; i < ktot; ++i) {
-    // We should have only one molecule of fractional kind
-    double rhoDeltaIJ_2 = 2.0 * (double)(kCount[i]) * currentAxes.volInv[box];
-    uint index = kind * ktot + i;
-    tcDiff +=
-        (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] * rhoDeltaIJ_2;
-  }
-  uint index = kind * ktot + kind;
-  tcDiff += (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] *
-            currentAxes.volInv[box];
-
-  return tcDiff;
-}
-
-// Calculate the change in energy due to lambda
-void CalculateEnergy::EnergyChange(Energy *energyDiff, Energy &dUdL_VDW,
-                                   Energy &dUdL_Coul,
-                                   const std::vector<double> &lambda_VDW,
-                                   const std::vector<double> &lambda_Coul,
-                                   const uint iState, const uint molIndex,
-                                   const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-
-  GOMC_EVENT_START(1, GomcProfileEvent::FREE_ENERGY);
-  uint length = mols.GetKind(molIndex).NumAtoms();
-  uint start = mols.MolStart(molIndex);
-  uint lambdaSize = lambda_VDW.size();
-  double *tempLJEnDiff = new double[lambdaSize];
-  double *tempREnDiff = new double[lambdaSize];
-  double dudl_VDW = 0.0, dudl_Coul = 0.0;
-  std::fill_n(tempLJEnDiff, lambdaSize, 0.0);
-  std::fill_n(tempREnDiff, lambdaSize, 0.0);
-
-  // Calculate the vdw, short range electrostatic energy
-  for (uint p = 0; p < length; ++p) {
-    uint atom = start + p;
-    CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-    std::vector<uint> nIndex;
-    // store atom index in neighboring cell
-    while (!n.Done()) {
-      if (particleMol[*n] != (int)molIndex) {
-        nIndex.push_back(*n);
-      }
-      n.Next();
-    }
-
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(lambda_Coul, lambda_VDW, nIndex) \
-firstprivate(box, atom, iState, lambdaSize, num::qqFact) \
-reduction(+:dudl_VDW, dudl_Coul, tempREnDiff[:lambdaSize], tempLJEnDiff[:lambdaSize])
-#endif
-    for (int i = 0; i < (int)nIndex.size(); i++) {
-      double distSq = 0.0;
-      XYZ virComponents;
-      if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                             nIndex[i], box)) {
-        double qi_qj_fact = 0.0, energyOldCoul = 0.0;
-        // Calculate the energy of current state
-        double energyOldVDW = forcefield.particles->CalcEn(
-            distSq, particleKind[atom], particleKind[nIndex[i]],
-            lambda_VDW[iState]);
-        // Calculate du/dl in VDW for current state
-        dudl_VDW += forcefield.particles->CalcdEndL(distSq, particleKind[atom],
-                                                    particleKind[nIndex[i]],
-                                                    lambda_VDW[iState]);
-
-        if (electrostatic) {
-          qi_qj_fact =
-              particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-          if (qi_qj_fact != 0.0) {
-            energyOldCoul = forcefield.particles->CalcCoulomb(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[iState], box);
-            // Calculate du/dl in Coulomb for current state.
-            dudl_Coul += forcefield.particles->CalcCoulombdEndL(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[iState], box);
-          }
-        }
-
-        for (int s = 0; s < (int)lambdaSize; s++) {
-          // Calculate the energy of other state
-          tempLJEnDiff[s] += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambda_VDW[s]);
-          tempLJEnDiff[s] += -energyOldVDW;
-          if (electrostatic && qi_qj_fact != 0.0) {
-            tempREnDiff[s] += forcefield.particles->CalcCoulomb(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[s], box);
-            tempREnDiff[s] += -energyOldCoul;
-          }
-        }
-      }
-    }
-  }
-
-  dUdL_VDW.inter = dudl_VDW;
-  dUdL_Coul.real = dudl_Coul;
-  for (int s = 0; s < (int)lambdaSize; s++) {
-    energyDiff[s].inter += tempLJEnDiff[s];
-    energyDiff[s].real += tempREnDiff[s];
-  }
-  delete[] tempLJEnDiff;
-  delete[] tempREnDiff;
-
-  if (forcefield.useLRC) {
-    // Need to calculate change in LRC
-    ChangeLRC(energyDiff, dUdL_VDW, lambda_VDW, iState, molIndex, box);
-  }
-  // Need to calculate change in self
-  calcEwald->ChangeSelf(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
-                        box);
-  // Need to calculate change in correction
-  calcEwald->ChangeCorrection(energyDiff, dUdL_Coul, lambda_Coul, iState,
-                              molIndex, box);
-  // Need to calculate change in Reciprocal
-  calcEwald->ChangeRecip(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
-                         box);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::FREE_ENERGY);
-}
-
-// Calculate the change in LRC for each state
-void CalculateEnergy::ChangeLRC(Energy *energyDiff, Energy &dUdL_VDW,
-                                const std::vector<double> &lambda_VDW,
-                                const uint iState, const uint molIndex,
-                                const uint box) const {
-  // Get the kind and lambda value
-  uint fk = mols.GetMolKind(molIndex);
-  double lambda_istate = lambda_VDW[iState];
-
-  // Add the LRC for fractional molecule
-  for (size_t s = 0; s < lambda_VDW.size(); s++) {
-    double lambdaVDW = lambda_VDW[s];
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      energyDiff[s].tailCorrection +=
-          mols.pairEnCorrections[fk * mols.GetKindsCount() + i] * rhoDeltaIJ_2 *
-          (lambdaVDW - lambda_istate);
-      if (s == iState) {
-        // Calculate du/dl in VDW LRC for current state
-        dUdL_VDW.tailCorrection +=
-            mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
-            rhoDeltaIJ_2;
-      }
-    }
-    energyDiff[s].tailCorrection +=
-        mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-        currentAxes.volInv[box] * (lambdaVDW - lambda_istate);
-    if (s == iState) {
-      // Calculate du/dl in VDW LRC for current state
-      dUdL_VDW.tailCorrection +=
-          mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-          currentAxes.volInv[box];
-    }
-  }
-}
+/*******************************************************************************
+GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
+Copyright (C) 2022 GOMC Group
+A copy of the MIT License can be found in License.txt
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
+********************************************************************************/
+#include "CalculateEnergy.h" //header for this
+
+#include <cassert>
+
+#include "BasicTypes.h" //uint
+#include "BoxDimensions.h"
+#include "BoxDimensionsNonOrth.h"
+#include "Coordinates.h"
+#include "EnergyTypes.h"          //Energy structs
+#include "EnsemblePreprocessor.h" //Flags
+#include "Ewald.h"                //for ewald calculation
+#include "EwaldCached.h"          //for ewald calculation
+#include "Forcefield.h"           //
+#include "GeomLib.h"
+#include "MoleculeKind.h"
+#include "MoleculeLookup.h"
+#include "NoEwald.h" //for ewald calculation
+#include "NumLib.h"
+#include "StaticVals.h" //For init
+#include "System.h"     //For init
+#include "TrialMol.h"
+#ifdef GOMC_CUDA
+#include "CalculateEnergyCUDAKernel.cuh"
+#include "CalculateForceCUDAKernel.cuh"
+#include "ConstantDefinitionsCUDAKernel.cuh"
+#endif
+#include "GOMCEventsProfile.h"
+#define NUMBER_OF_NEIGHBOR_CELL 27
+
+//
+//    CalculateEnergy.cpp
+//    Energy Calculation functions for Monte Carlo simulation
+//    Calculates using const references to a particular Simulation's members
+//    Brock Jackman Sep. 2013
+//
+//    Updated to use radial-based intermolecular pressure
+//    Jason Mick    Feb. 2014
+//
+
+using namespace geom;
+
+CalculateEnergy::CalculateEnergy(StaticVals &stat, System &sys)
+    : forcefield(stat.forcefield), mols(stat.mol),
+      currentCoords(sys.coordinates), currentCOM(sys.com),
+      lambdaRef(sys.lambdaRef), atomForceRef(sys.atomForceRef),
+      molForceRef(sys.molForceRef),
+#ifdef VARIABLE_PARTICLE_NUMBER
+      molLookup(sys.molLookup),
+#else
+      molLookup(stat.molLookup),
+#endif
+      currentAxes(sys.boxDimRef), cellList(sys.cellList) {
+}
+
+void CalculateEnergy::Init(System &sys) {
+  uint maxAtomInMol = 0;
+  calcEwald = sys.GetEwald();
+  electrostatic = forcefield.electrostatic;
+  ewald = forcefield.ewald;
+  multiParticleEnabled = sys.statV.multiParticleEnabled;
+  for (uint m = 0; m < mols.count; ++m) {
+    const MoleculeKind &molKind = mols.GetKind(m);
+    if (molKind.NumAtoms() > maxAtomInMol)
+      maxAtomInMol = molKind.NumAtoms();
+    for (uint a = 0; a < molKind.NumAtoms(); ++a) {
+      particleKind.push_back(molKind.AtomKind(a));
+      particleMol.push_back(m);
+      particleCharge.push_back(molKind.AtomCharge(a));
+      particleIndex.push_back(int(a));
+    }
+  }
+#ifdef GOMC_CUDA
+  InitCoordinatesCUDA(forcefield.particles->getCUDAVars(),
+                      currentCoords.Count(), maxAtomInMol, currentCOM.Count());
+#endif
+}
+
+SystemPotential CalculateEnergy::SystemTotal() {
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
+  SystemPotential pot =
+      SystemInter(SystemPotential(), currentCoords, currentAxes);
+
+  // system intra
+  for (uint b = 0; b < BOX_TOTAL; ++b) {
+    GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTRA);
+    double bondEnergy[2] = {0};
+    double bondEn = 0.0, nonbondEn = 0.0, correction = 0.0;
+    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(b);
+    MoleculeLookup::box_iterator end = molLookup.BoxEnd(b);
+    std::vector<uint> molID;
+
+    while (thisMol != end) {
+      molID.push_back(*thisMol);
+      ++thisMol;
+    }
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) private(bondEnergy) shared(b, molID) \
+    reduction(+:bondEn, nonbondEn, correction)
+#endif
+    for (int i = 0; i < (int)molID.size(); i++) {
+      // calculate nonbonded energy
+      MoleculeIntra(molID[i], b, bondEnergy);
+      bondEn += bondEnergy[0];
+      nonbondEn += bondEnergy[1];
+      // calculate correction term of electrostatic interaction
+      correction += calcEwald->MolCorrection(molID[i], b);
+    }
+
+    pot.boxEnergy[b].intraBond = bondEn;
+    pot.boxEnergy[b].intraNonbond = nonbondEn;
+    // calculate self term of electrostatic interaction
+    pot.boxEnergy[b].self = calcEwald->BoxSelf(b);
+    pot.boxEnergy[b].correction = correction;
+
+    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTRA);
+    // Calculate Virial
+    pot.boxVirial[b] = VirialCalc(b);
+  }
+
+  pot.Total();
+
+  if (pot.totalEnergy.total > 1.0e12) {
+    std::cout << "\nWarning: Large energy detected due to the overlap in "
+                 "initial configuration.\n"
+                 "         The total energy will be recalculated at EqStep to "
+                 "ensure the accuracy \n"
+                 "         of the computed running energies.\n";
+  }
+
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
+  return pot;
+}
+
+SystemPotential CalculateEnergy::SystemInter(SystemPotential potential,
+                                             XYZArray const &coords,
+                                             BoxDimensions const &boxAxes) {
+  for (uint b = 0; b < BOXES_WITH_U_NB; ++b) {
+    // calculate LJ interaction and real term of electrostatic interaction
+    potential = BoxInter(potential, coords, boxAxes, b);
+    // calculate reciprocal term of electrostatic interaction
+    potential.boxEnergy[b].recip = calcEwald->BoxReciprocal(b, false);
+  }
+
+  potential.Total();
+
+  return potential;
+}
+
+// Calculate the inter energy for Box. Fractional molecule are not allowed in
+// this function. Need to implement the GPU function
+SystemPotential CalculateEnergy::BoxInter(SystemPotential potential,
+                                          XYZArray const &coords,
+                                          BoxDimensions const &boxAxes,
+                                          const uint box) {
+  // Handles reservoir box case, returning zeroed structure if
+  // interactions are off.
+  if (box >= BOXES_WITH_U_NB)
+    return potential;
+
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTER);
+  double tempREn = 0.0, tempLJEn = 0.0;
+
+  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
+  std::vector<std::vector<int>> neighborList;
+  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
+                               cellStartIndex, mapParticleToCell);
+  neighborList = cellList.GetNeighborList(box);
+
+#ifdef GOMC_CUDA
+  // update unitcell in GPU
+  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
+                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
+                      boxAxes.cellBasis[box].z);
+
+  if (!boxAxes.orthogonal[box]) {
+    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
+    // so cast and copy the additional data to the GPU
+    const BoxDimensionsNonOrth *NonOrthAxes =
+        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
+    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
+                           NonOrthAxes->cellBasis_Inv[box].x,
+                           NonOrthAxes->cellBasis_Inv[box].y,
+                           NonOrthAxes->cellBasis_Inv[box].z);
+  }
+
+  CallBoxInterGPU(forcefield.particles->getCUDAVars(), cellVector,
+                  cellStartIndex, neighborList, coords, boxAxes, electrostatic,
+                  particleCharge, particleKind, particleMol, tempREn, tempLJEn,
+                  forcefield.sc_coul, forcefield.sc_sigma_6,
+                  forcefield.sc_alpha, forcefield.sc_power, box);
+#else
+#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
+#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
+  cellVector, coords, mapParticleToCell, neighborList) \
+reduction(+:tempREn, tempLJEn) firstprivate(box, num::qqFact)
+#endif
+  // loop over all particles
+  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
+       currParticleIdx++) {
+    int currParticle = cellVector[currParticleIdx];
+    // find the which cell currParticle belong to
+    int currCell = mapParticleToCell[currParticle];
+    // loop over currCell neighboring cells
+    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
+         nCellIndex++) {
+      // find the index of neighboring cell
+      int neighborCell = neighborList[currCell][nCellIndex];
+
+      // find the ending index in neighboring cell
+      int endIndex = cellStartIndex[neighborCell + 1];
+      // loop over particle inside neighboring cell
+      for (int nParticleIndex = cellStartIndex[neighborCell];
+           nParticleIndex < endIndex; nParticleIndex++) {
+        int nParticle = cellVector[nParticleIndex];
+
+        // avoid same particles and duplicate work
+        if (currParticle < nParticle &&
+            particleMol[currParticle] != particleMol[nParticle]) {
+          double distSq;
+          XYZ virComponents;
+          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
+                             nParticle, box)) {
+            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
+                                            particleMol[nParticle], box);
+            if (electrostatic) {
+              double lambdaCoulomb = GetLambdaCoulomb(
+                  particleMol[currParticle], particleMol[nParticle], box);
+              double qi_qj_fact = particleCharge[currParticle] *
+                                  particleCharge[nParticle] * num::qqFact;
+              if (qi_qj_fact != 0.0) {
+                tempREn += forcefield.particles->CalcCoulomb(
+                    distSq, particleKind[currParticle], particleKind[nParticle],
+                    qi_qj_fact, lambdaCoulomb, box);
+              }
+            }
+            tempLJEn += forcefield.particles->CalcEn(
+                distSq, particleKind[currParticle], particleKind[nParticle],
+                lambdaVDW);
+          }
+        }
+      }
+    }
+  }
+#endif
+
+  // setting energy and virial of LJ interaction
+  potential.boxEnergy[box].inter = tempLJEn;
+  // setting energy and virial of coulomb interaction
+  potential.boxEnergy[box].real = tempREn;
+
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTER);
+  // set correction energy and virial
+  if (forcefield.useLRC) {
+    EnergyCorrection(potential, boxAxes, box);
+  }
+
+  potential.Total();
+  return potential;
+}
+
+SystemPotential
+CalculateEnergy::BoxForce(SystemPotential potential, XYZArray const &coords,
+                          XYZArray &atomForce, XYZArray &molForce,
+                          BoxDimensions const &boxAxes, const uint box) {
+  // Handles reservoir box case, returning zeroed structure if
+  // interactions are off.
+  if (box >= BOXES_WITH_U_NB)
+    return potential;
+
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_FORCE);
+
+  double tempREn = 0.0, tempLJEn = 0.0;
+  // make a pointer to atom force and mol force for OpenMP
+  double *aForcex = atomForce.x;
+  double *aForcey = atomForce.y;
+  double *aForcez = atomForce.z;
+  double *mForcex = molForce.x;
+  double *mForcey = molForce.y;
+  double *mForcez = molForce.z;
+  int atomCount = atomForce.Count();
+  int molCount = molForce.Count();
+
+  // Reset Force Arrays
+  ResetForce(atomForce, molForce, box);
+
+  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
+  std::vector<std::vector<int>> neighborList;
+  cellList.GetCellListNeighbor(box, coords.Count(), cellVector, cellStartIndex,
+                               mapParticleToCell);
+  neighborList = cellList.GetNeighborList(box);
+
+#ifdef GOMC_CUDA
+  // update unitcell in GPU
+  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
+                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
+                      boxAxes.cellBasis[box].z);
+
+  if (!boxAxes.orthogonal[box]) {
+    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
+    // so cast and copy the additional data to the GPU
+    const BoxDimensionsNonOrth *NonOrthAxes =
+        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
+    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
+                           NonOrthAxes->cellBasis_Inv[box].x,
+                           NonOrthAxes->cellBasis_Inv[box].y,
+                           NonOrthAxes->cellBasis_Inv[box].z);
+  }
+
+  CallBoxForceGPU(forcefield.particles->getCUDAVars(), cellVector,
+                  cellStartIndex, neighborList, mapParticleToCell, coords,
+                  boxAxes, electrostatic, particleCharge, particleKind,
+                  particleMol, tempREn, tempLJEn, aForcex, aForcey, aForcez,
+                  mForcex, mForcey, mForcez, atomCount, molCount,
+                  forcefield.sc_coul, forcefield.sc_sigma_6,
+                  forcefield.sc_alpha, forcefield.sc_power, box);
+
+#else
+#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
+#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
+  cellVector, coords, mapParticleToCell, neighborList) \
+  firstprivate(box, atomCount, molCount, num::qqFact) \
+  reduction(+:tempREn, tempLJEn, aForcex[:atomCount], aForcey[:atomCount], \
+            aForcez[:atomCount], mForcex[:molCount], mForcey[:molCount], \
+            mForcez[:molCount])
+#endif
+  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
+       currParticleIdx++) {
+    int currParticle = cellVector[currParticleIdx];
+    int currCell = mapParticleToCell[currParticle];
+
+    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
+         nCellIndex++) {
+      int neighborCell = neighborList[currCell][nCellIndex];
+
+      int endIndex = cellStartIndex[neighborCell + 1];
+      for (int nParticleIndex = cellStartIndex[neighborCell];
+           nParticleIndex < endIndex; nParticleIndex++) {
+        int nParticle = cellVector[nParticleIndex];
+
+        if (currParticle < nParticle &&
+            particleMol[currParticle] != particleMol[nParticle]) {
+          double distSq;
+          XYZ virComponents, forceLJ, forceReal;
+          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
+                             nParticle, box)) {
+            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
+                                            particleMol[nParticle], box);
+            if (electrostatic) {
+              double lambdaCoulomb = GetLambdaCoulomb(
+                  particleMol[currParticle], particleMol[nParticle], box);
+              double qi_qj_fact = particleCharge[currParticle] *
+                                  particleCharge[nParticle] * num::qqFact;
+              if (qi_qj_fact != 0.0) {
+                tempREn += forcefield.particles->CalcCoulomb(
+                    distSq, particleKind[currParticle], particleKind[nParticle],
+                    qi_qj_fact, lambdaCoulomb, box);
+                // Calculating the force
+                forceReal =
+                    virComponents * forcefield.particles->CalcCoulombVir(
+                                        distSq, particleKind[currParticle],
+                                        particleKind[nParticle], qi_qj_fact,
+                                        lambdaCoulomb, box);
+              }
+            }
+            tempLJEn += forcefield.particles->CalcEn(
+                distSq, particleKind[currParticle], particleKind[nParticle],
+                lambdaVDW);
+            forceLJ = virComponents * forcefield.particles->CalcVir(
+                                          distSq, particleKind[currParticle],
+                                          particleKind[nParticle], lambdaVDW);
+            aForcex[currParticle] += forceLJ.x + forceReal.x;
+            aForcey[currParticle] += forceLJ.y + forceReal.y;
+            aForcez[currParticle] += forceLJ.z + forceReal.z;
+            aForcex[nParticle] += -(forceLJ.x + forceReal.x);
+            aForcey[nParticle] += -(forceLJ.y + forceReal.y);
+            aForcez[nParticle] += -(forceLJ.z + forceReal.z);
+            mForcex[particleMol[currParticle]] += (forceLJ.x + forceReal.x);
+            mForcey[particleMol[currParticle]] += (forceLJ.y + forceReal.y);
+            mForcez[particleMol[currParticle]] += (forceLJ.z + forceReal.z);
+            mForcex[particleMol[nParticle]] += -(forceLJ.x + forceReal.x);
+            mForcey[particleMol[nParticle]] += -(forceLJ.y + forceReal.y);
+            mForcez[particleMol[nParticle]] += -(forceLJ.z + forceReal.z);
+          }
+        }
+      }
+    }
+  }
+#endif
+
+  // setting energy and virial of LJ interaction
+  potential.boxEnergy[box].inter = tempLJEn;
+  // setting energy and virial of coulomb interaction
+  potential.boxEnergy[box].real = tempREn;
+
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_FORCE);
+  return potential;
+}
+
+// NOTE: The calculation of W12, W13, and W23 is expensive and would not be
+// required for pressure and surface tension calculation. So, they have been
+// commented out. If you need to calculate them, uncomment them.
+Virial CalculateEnergy::VirialCalc(const uint box) {
+  // store virial and energy of reference and modify the virial
+  Virial tempVir;
+  // no need to calculate the virial for reservoir
+  if (box >= BOXES_WITH_U_NB)
+    return tempVir;
+
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_VIRIAL);
+
+  // tensors for VDW and real part of electrostatic
+  double vT11 = 0.0, vT12 = 0.0, vT13 = 0.0;
+  double vT22 = 0.0, vT23 = 0.0, vT33 = 0.0;
+  double rT11 = 0.0, rT12 = 0.0, rT13 = 0.0;
+  double rT22 = 0.0, rT23 = 0.0, rT33 = 0.0;
+
+  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
+  std::vector<std::vector<int>> neighborList;
+  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
+                               cellStartIndex, mapParticleToCell);
+  neighborList = cellList.GetNeighborList(box);
+
+#ifdef GOMC_CUDA
+  // update unitcell in GPU
+  UpdateCellBasisCUDA(
+      forcefield.particles->getCUDAVars(), box, currentAxes.cellBasis[box].x,
+      currentAxes.cellBasis[box].y, currentAxes.cellBasis[box].z);
+
+  if (!currentAxes.orthogonal[box]) {
+    // In this case, currentAxes is really an object of type
+    // BoxDimensionsNonOrth,
+    // so cast and copy the additional data to the GPU
+    const BoxDimensionsNonOrth *NonOrthAxes =
+        static_cast<const BoxDimensionsNonOrth *>(&currentAxes);
+    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
+                           NonOrthAxes->cellBasis_Inv[box].x,
+                           NonOrthAxes->cellBasis_Inv[box].y,
+                           NonOrthAxes->cellBasis_Inv[box].z);
+  }
+
+  CallBoxInterForceGPU(forcefield.particles->getCUDAVars(), cellVector,
+                       cellStartIndex, neighborList, mapParticleToCell,
+                       currentCoords, currentCOM, currentAxes, electrostatic,
+                       particleCharge, particleKind, particleMol, rT11, rT12,
+                       rT13, rT22, rT23, rT33, vT11, vT12, vT13, vT22, vT23,
+                       vT33, forcefield.sc_coul, forcefield.sc_sigma_6,
+                       forcefield.sc_alpha, forcefield.sc_power, box);
+#else
+#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
+#pragma omp parallel for default(none) shared(cellStartIndex, cellVector, \
+  mapParticleToCell, neighborList) firstprivate(box) \
+reduction(+:vT11, vT12, vT13, vT22, vT23, vT33, rT11, rT12, rT13, rT22, rT23, rT33)
+#endif
+  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
+       currParticleIdx++) {
+    int currParticle = cellVector[currParticleIdx];
+    int currCell = mapParticleToCell[currParticle];
+
+    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
+         nCellIndex++) {
+      int neighborCell = neighborList[currCell][nCellIndex];
+
+      int endIndex = cellStartIndex[neighborCell + 1];
+      for (int nParticleIndex = cellStartIndex[neighborCell];
+           nParticleIndex < endIndex; nParticleIndex++) {
+        int nParticle = cellVector[nParticleIndex];
+
+        // make sure the pairs are unique and they belong to different molecules
+        if (currParticle < nParticle &&
+            particleMol[currParticle] != particleMol[nParticle]) {
+          double distSq;
+          XYZ virC;
+          if (currentAxes.InRcut(distSq, virC, currentCoords, currParticle,
+                                 nParticle, box)) {
+            // calculate the distance between com of two molecules
+            XYZ comC = currentCOM.Difference(particleMol[currParticle],
+                                             particleMol[nParticle]);
+            // calculate the minimum image between com of two molecules
+            comC = currentAxes.MinImage(comC, box);
+            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
+                                            particleMol[nParticle], box);
+
+            if (electrostatic) {
+              double lambdaCoulomb = GetLambdaCoulomb(
+                  particleMol[currParticle], particleMol[nParticle], box);
+              double qi_qj =
+                  particleCharge[currParticle] * particleCharge[nParticle];
+
+              // skip particle pairs with no charge
+              if (qi_qj != 0.0) {
+                double pRF = forcefield.particles->CalcCoulombVir(
+                    distSq, particleKind[currParticle], particleKind[nParticle],
+                    qi_qj, lambdaCoulomb, box);
+                // calculate the top diagonal of pressure tensor
+                rT11 += pRF * (virC.x * comC.x);
+                // rT12 += pRF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
+                // rT13 += pRF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
+
+                rT22 += pRF * (virC.y * comC.y);
+                // rT23 += pRF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
+
+                rT33 += pRF * (virC.z * comC.z);
+              }
+            }
+
+            double pVF = forcefield.particles->CalcVir(
+                distSq, particleKind[currParticle], particleKind[nParticle],
+                lambdaVDW);
+            // calculate the top diagonal of pressure tensor
+            vT11 += pVF * (virC.x * comC.x);
+            // vT12 += pVF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
+            // vT13 += pVF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
+
+            vT22 += pVF * (virC.y * comC.y);
+            // vT23 += pVF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
+
+            vT33 += pVF * (virC.z * comC.z);
+          }
+        }
+      }
+    }
+  }
+#endif
+
+  // set the all tensor values
+  tempVir.interTens[0][0] = vT11;
+  tempVir.interTens[0][1] = vT12;
+  tempVir.interTens[0][2] = vT13;
+
+  tempVir.interTens[1][0] = vT12;
+  tempVir.interTens[1][1] = vT22;
+  tempVir.interTens[1][2] = vT23;
+
+  tempVir.interTens[2][0] = vT13;
+  tempVir.interTens[2][1] = vT23;
+  tempVir.interTens[2][2] = vT33;
+
+  if (electrostatic) {
+    // real part of electrostatic
+    tempVir.realTens[0][0] = rT11 * num::qqFact;
+    tempVir.realTens[0][1] = rT12 * num::qqFact;
+    tempVir.realTens[0][2] = rT13 * num::qqFact;
+
+    tempVir.realTens[1][0] = rT12 * num::qqFact;
+    tempVir.realTens[1][1] = rT22 * num::qqFact;
+    tempVir.realTens[1][2] = rT23 * num::qqFact;
+
+    tempVir.realTens[2][0] = rT13 * num::qqFact;
+    tempVir.realTens[2][1] = rT23 * num::qqFact;
+    tempVir.realTens[2][2] = rT33 * num::qqFact;
+  }
+
+  // setting virial of LJ
+  tempVir.inter = vT11 + vT22 + vT33;
+  // setting virial of coulomb
+  tempVir.real = (rT11 + rT22 + rT33) * num::qqFact;
+
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_VIRIAL);
+
+  if (forcefield.useLRC || forcefield.useIPC) {
+    VirialCorrection(tempVir, currentAxes, box);
+  }
+
+  // calculate reciprocal term of force
+  tempVir = calcEwald->VirialReciprocal(tempVir, box);
+
+  tempVir.Total();
+  return tempVir;
+}
+
+bool CalculateEnergy::MoleculeInter(Intermolecular &inter_LJ,
+                                    Intermolecular &inter_coulomb,
+                                    XYZArray const &molCoords,
+                                    const uint molIndex, const uint box) const {
+  double tempREn = 0.0, tempLJEn = 0.0;
+  bool overlap = false;
+
+  if (box < BOXES_WITH_U_NB) {
+    GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTER);
+    uint length = mols.GetKind(molIndex).NumAtoms();
+    uint start = mols.MolStart(molIndex);
+
+    for (uint p = 0; p < length; ++p) {
+      uint atom = start + p;
+      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
+
+      std::vector<uint> nIndex;
+      // store atom index in neighboring cell
+      while (!n.Done()) {
+        nIndex.push_back(*n);
+        n.Next();
+      }
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) shared(nIndex) \
+firstprivate(atom, box, molIndex, num::qqFact) reduction(+:tempREn, tempLJEn)
+#endif
+      for (int i = 0; i < (int)nIndex.size(); i++) {
+        double distSq = 0.0;
+        XYZ virComponents;
+        // Subtract old energy
+        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
+                               nIndex[i], box)) {
+          double lambdaVDW =
+              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
+
+          if (electrostatic) {
+            double lambdaCoulomb =
+                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
+            double qi_qj_fact =
+                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
+
+            if (qi_qj_fact != 0.0) {
+              tempREn += -forcefield.particles->CalcCoulomb(
+                  distSq, particleKind[atom], particleKind[nIndex[i]],
+                  qi_qj_fact, lambdaCoulomb, box);
+            }
+          }
+
+          tempLJEn += -forcefield.particles->CalcEn(
+              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
+        }
+      }
+
+      // add new energy
+      n = cellList.EnumerateLocal(molCoords[p], box);
+      // store atom index in neighboring cell
+      nIndex.clear();
+      while (!n.Done()) {
+        nIndex.push_back(*n);
+        n.Next();
+      }
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) shared(molCoords, nIndex, overlap) \firstprivate(atom, box, molIndex, p, num::qqFact) reduction(+:tempREn, tempLJEn)
+#endif
+      for (int i = 0; i < (int)nIndex.size(); i++) {
+        double distSq = 0.0;
+        XYZ virComponents;
+        if (currentAxes.InRcut(distSq, virComponents, molCoords, p,
+                               currentCoords, nIndex[i], box)) {
+          double lambdaVDW =
+              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
+
+          if (distSq < forcefield.rCutLowSq) {
+            overlap |= true;
+          }
+
+          if (electrostatic) {
+            double lambdaCoulomb =
+                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
+            double qi_qj_fact =
+                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
+
+            if (qi_qj_fact != 0.0) {
+              tempREn += forcefield.particles->CalcCoulomb(
+                  distSq, particleKind[atom], particleKind[nIndex[i]],
+                  qi_qj_fact, lambdaCoulomb, box);
+            }
+          }
+
+          tempLJEn += forcefield.particles->CalcEn(
+              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
+         }
+      }
+    }
+    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTER);
+  }
+
+  inter_LJ.energy = tempLJEn;
+  inter_coulomb.energy = tempREn;
+  return overlap;
+}
+
+// Calculate 1-N nonbonded intra energy
+void CalculateEnergy::ParticleNonbonded(double *inter,
+                                        cbmc::TrialMol const &trialMol,
+                                        XYZArray const &trialPos,
+                                        const uint partIndex, const uint box,
+                                        const uint trials) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
+  const MoleculeKind &kind = trialMol.GetKind();
+  // loop over all partners of the trial particle
+  const uint *partner = kind.sortedNB.Begin(partIndex);
+  const uint *end = kind.sortedNB.End(partIndex);
+  while (partner != end) {
+    if (trialMol.AtomExists(*partner)) {
+      for (uint t = 0; t < trials; ++t) {
+        double distSq;
+        if (currentAxes.InRcut(distSq, trialPos, t, trialMol.GetCoords(),
+                               *partner, box)) {
+          inter[t] += forcefield.particles->CalcEn(
+              distSq, kind.AtomKind(partIndex), kind.AtomKind(*partner), 1.0);
+          if (electrostatic) {
+            double qi_qj_fact = kind.AtomCharge(partIndex) *
+                                kind.AtomCharge(*partner) * num::qqFact;
+
+            if (qi_qj_fact != 0.0) {
+              forcefield.particles->CalcCoulombAdd_1_4(inter[t], distSq,
+                                                       qi_qj_fact, true);
+            }
+          }
+        }
+      }
+    }
+    ++partner;
+  }
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
+}
+
+void CalculateEnergy::ParticleInter(double *en, double *real,
+                                    XYZArray const &trialPos, bool *overlap,
+                                    const uint partIndex, const uint molIndex,
+                                    const uint box, const uint trials) const {
+  if (box >= BOXES_WITH_U_NB)
+    return;
+
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTER);
+  double tempLJ, tempReal;
+  MoleculeKind const &thisKind = mols.GetKind(molIndex);
+  uint kindI = thisKind.AtomKind(partIndex);
+  double kindICharge = thisKind.AtomCharge(partIndex);
+  std::vector<uint> nIndex;
+
+  for (uint t = 0; t < trials; ++t) {
+    nIndex.clear();
+    tempReal = 0.0;
+    tempLJ = 0.0;
+    CellList::Neighbors n = cellList.EnumerateLocal(trialPos[t], box);
+    while (!n.Done()) {
+      nIndex.push_back(*n);
+      n.Next();
+    }
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) shared(nIndex, overlap, trialPos) \
+firstprivate(kindICharge, kindI, t, box, molIndex, num::qqFact) \
+reduction(+:tempLJ, tempReal)
+#endif
+    for (int i = 0; i < (int)nIndex.size(); i++) {
+      double distSq = 0.0;
+      if (currentAxes.InRcut(distSq, trialPos, t, currentCoords, nIndex[i],
+                             box)) {
+        double lambdaVDW = GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
+
+        if (distSq < forcefield.rCutLowSq) {
+          overlap[t] |= true;
+        }
+        tempLJ += forcefield.particles->CalcEn(
+            distSq, kindI, particleKind[nIndex[i]], lambdaVDW);
+        if (electrostatic) {
+          double lambdaCoulomb =
+              GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
+          double qi_qj_fact =
+              particleCharge[nIndex[i]] * kindICharge * num::qqFact;
+
+          if (qi_qj_fact != 0.0) {
+            tempReal += forcefield.particles->CalcCoulomb(
+                distSq, kindI, particleKind[nIndex[i]], qi_qj_fact,
+                lambdaCoulomb, box);
+          }
+        }
+      }
+    }
+    en[t] += tempLJ;
+    real[t] += tempReal;
+  }
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTER);
+}
+
+// Calculates the change in the TC from adding numChange atoms of a kind
+Intermolecular CalculateEnergy::MoleculeTailChange(const uint box,
+                                                   const uint kind,
+                                                   const bool add) const {
+  Intermolecular delta;
+
+  if (box < BOXES_WITH_U_NB) {
+    double sign = (add ? 1.0 : -1.0);
+    uint mkIdxII = kind * mols.GetKindsCount() + kind;
+    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
+      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
+      double rhoDeltaIJ_2 = sign * 2.0 *
+                            (double)(molLookup.NumKindInBox(j, box)) *
+                            currentAxes.volInv[box];
+      delta.energy += mols.pairEnCorrections[mkIdxIJ] * rhoDeltaIJ_2;
+    }
+
+    // We already calculated part of the change for this type in the loop
+    delta.energy += mols.pairEnCorrections[mkIdxII] * currentAxes.volInv[box];
+  }
+  return delta;
+}
+
+// Calculates the change in the Virial TC from adding numChange atoms of a kind
+Intermolecular CalculateEnergy::MoleculeTailVirChange(const uint box,
+                                                      const uint kind,
+                                                      const bool add) const {
+  Intermolecular delta;
+
+  if (box < BOXES_WITH_U_NB) {
+    double sign = (add ? 1.0 : -1.0);
+    uint mkIdxII = kind * mols.GetKindsCount() + kind;
+    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
+      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
+      double rhoDeltaIJ_2 = sign * 2.0 *
+                            (double)(molLookup.NumKindInBox(j, box)) *
+                            currentAxes.volInv[box];
+      delta.virial += mols.pairVirCorrections[mkIdxIJ] * rhoDeltaIJ_2;
+    }
+
+    // We already calculated part of the change for this type in the loop
+    delta.virial += mols.pairVirCorrections[mkIdxII] * currentAxes.volInv[box];
+  }
+  return delta;
+}
+
+// Calculates intramolecular energy of a full molecule
+void CalculateEnergy::MoleculeIntra(const uint molIndex, const uint box,
+                                    double *bondEn) const {
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
+  bondEn[0] = 0.0, bondEn[1] = 0.0;
+
+  MoleculeKind &molKind = mols.kinds[mols.kIndex[molIndex]];
+  // *2 because we'll be storing inverse bond vectors
+  XYZArray bondVec(molKind.bondList.count * 2);
+
+  BondVectors(bondVec, molKind, molIndex, box);
+  MolBond(bondEn[0], molKind, bondVec, molIndex, box);
+  MolAngle(bondEn[0], molKind, bondVec, box);
+  MolDihedral(bondEn[0], molKind, bondVec, box);
+  MolNonbond(bondEn[1], molKind, molIndex, box);
+  MolNonbond_1_4(bondEn[1], molKind, molIndex, box);
+  MolNonbond_1_3(bondEn[1], molKind, molIndex, box);
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
+}
+
+// used in molecule exchange for calculating bonded and intraNonbonded energy
+Energy CalculateEnergy::MoleculeIntra(cbmc::TrialMol const &mol) const {
+  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
+  double bondEn = 0.0, intraNonbondEn = 0.0;
+  // *2 because we'll be storing inverse bond vectors
+  const MoleculeKind &molKind = mol.GetKind();
+  uint count = molKind.bondList.count;
+  XYZArray bondVec(count * 2);
+  std::vector<bool> bondExist(count * 2, false);
+
+  BondVectors(bondVec, mol, bondExist, molKind);
+  MolBond(bondEn, mol, bondVec, bondExist, molKind);
+  MolAngle(bondEn, mol, bondVec, bondExist, molKind);
+  MolDihedral(bondEn, mol, bondVec, bondExist, molKind);
+  MolNonbond(intraNonbondEn, mol, molKind);
+  MolNonbond_1_4(intraNonbondEn, mol, molKind);
+  MolNonbond_1_3(intraNonbondEn, mol, molKind);
+  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
+  return Energy(bondEn, intraNonbondEn, 0.0, 0.0, 0.0, 0.0, 0.0);
+}
+
+void CalculateEnergy::BondVectors(XYZArray &vecs, MoleculeKind const &molKind,
+                                  const uint molIndex, const uint box) const {
+  for (uint i = 0; i < molKind.bondList.count; ++i) {
+    uint p1 = mols.start[molIndex] + molKind.bondList.part1[i];
+    uint p2 = mols.start[molIndex] + molKind.bondList.part2[i];
+    XYZ dist = currentCoords.Difference(p2, p1);
+    dist = currentAxes.MinImage(dist, box);
+
+    // store inverse vectors at i+count
+    vecs.Set(i, dist);
+    vecs.Set(i + molKind.bondList.count, -dist.x, -dist.y, -dist.z);
+  }
+}
+
+void CalculateEnergy::BondVectors(XYZArray &vecs, cbmc::TrialMol const &mol,
+                                  std::vector<bool> &bondExist,
+                                  MoleculeKind const &molKind) const {
+  uint box = mol.GetBox();
+  uint count = molKind.bondList.count;
+  for (uint i = 0; i < count; ++i) {
+    uint p1 = molKind.bondList.part1[i];
+    uint p2 = molKind.bondList.part2[i];
+    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
+      bondExist[i] = true;
+      bondExist[i + count] = true;
+      XYZ dist = mol.GetCoords().Difference(p2, p1);
+      dist = currentAxes.MinImage(dist, box);
+      // store inverse vectors at i+count
+      vecs.Set(i, dist);
+      vecs.Set(i + count, -dist.x, -dist.y, -dist.z);
+    }
+  }
+}
+
+void CalculateEnergy::MolBond(double &energy, MoleculeKind const &molKind,
+                              XYZArray const &vecs, const uint molIndex,
+                              const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+
+  for (uint b = 0; b < molKind.bondList.count; ++b) {
+    double molLength = vecs.Get(b).Length();
+    energy += forcefield.bonds.Calc(molKind.bondList.kinds[b], molLength);
+    /*if(std::abs(molLength - eqLength) > 0.02) {
+      uint p1 = molKind.bondList.part1[b];
+      uint p2 = molKind.bondList.part2[b];
+      double eqLength = forcefield.bonds.Length(molKind.bondList.kinds[b]);
+      printf("Warning: Box%d, %6d %4s,", box, molIndex, molKind.name.c_str());
+      printf("%3s-%-3s bond: Par-file ", molKind.atomNames[p1].c_str(),
+          molKind.atomNames[p2].c_str());
+      printf("%2.3f A, PDB file %2.3f A!\n", eqLength, molLength);
+    }*/
+  }
+}
+
+void CalculateEnergy::MolBond(double &energy, cbmc::TrialMol const &mol,
+                              XYZArray const &vecs,
+                              std::vector<bool> const &bondExist,
+                              MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  uint count = molKind.bondList.count;
+  for (uint b = 0; b < count; ++b) {
+    if (bondExist[b]) {
+      energy += forcefield.bonds.Calc(molKind.bondList.kinds[b],
+                                      vecs.Get(b).Length());
+    }
+  }
+}
+
+void CalculateEnergy::MolAngle(double &energy, MoleculeKind const &molKind,
+                               XYZArray const &vecs, const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+  for (uint a = 0; a < molKind.angles.Count(); ++a) {
+    // Note: need to reverse the second bond to get angle properly.
+    double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
+                         -vecs.Get(molKind.angles.GetBond(a, 1)));
+    energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
+  }
+}
+
+void CalculateEnergy::MolAngle(double &energy, cbmc::TrialMol const &mol,
+                               XYZArray const &vecs,
+                               std::vector<bool> const &bondExist,
+                               MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  uint count = molKind.angles.Count();
+  for (uint a = 0; a < count; ++a) {
+    if (bondExist[molKind.angles.GetBond(a, 0)] &&
+        bondExist[molKind.angles.GetBond(a, 1)]) {
+      // Note: need to reverse the second bond to get angle properly.
+      double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
+                           -vecs.Get(molKind.angles.GetBond(a, 1)));
+      energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
+    }
+  }
+}
+
+void CalculateEnergy::MolDihedral(double &energy, MoleculeKind const &molKind,
+                                  XYZArray const &vecs, const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+  for (uint d = 0; d < molKind.dihedrals.Count(); ++d) {
+    double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
+                     vecs.Get(molKind.dihedrals.GetBond(d, 1)),
+                     vecs.Get(molKind.dihedrals.GetBond(d, 2)));
+    energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
+  }
+}
+
+void CalculateEnergy::MolDihedral(double &energy, cbmc::TrialMol const &mol,
+                                  XYZArray const &vecs,
+                                  std::vector<bool> const &bondExist,
+                                  MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  uint count = molKind.dihedrals.Count();
+  for (uint d = 0; d < count; ++d) {
+    if (bondExist[molKind.dihedrals.GetBond(d, 0)] &&
+        bondExist[molKind.dihedrals.GetBond(d, 1)] &&
+        bondExist[molKind.dihedrals.GetBond(d, 2)]) {
+      double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
+                       vecs.Get(molKind.dihedrals.GetBond(d, 1)),
+                       vecs.Get(molKind.dihedrals.GetBond(d, 2)));
+      energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
+    }
+  }
+}
+
+// Calculate 1-N nonbonded intra energy
+void CalculateEnergy::MolNonbond(double &energy, MoleculeKind const &molKind,
+                                 const uint molIndex, const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+
+  for (uint i = 0; i < molKind.nonBonded.count; ++i) {
+    uint p1 = mols.start[molIndex] + molKind.nonBonded.part1[i];
+    uint p2 = mols.start[molIndex] + molKind.nonBonded.part2[i];
+    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
+      energy += forcefield.particles->CalcEn(
+          distSq, molKind.AtomKind(molKind.nonBonded.part1[i]),
+          molKind.AtomKind(molKind.nonBonded.part2[i]), 1.0);
+      if (electrostatic) {
+        qi_qj_fact = num::qqFact *
+                     molKind.AtomCharge(molKind.nonBonded.part1[i]) *
+                     molKind.AtomCharge(molKind.nonBonded.part2[i]);
+
+        if (qi_qj_fact != 0.0) {
+          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                   true);
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-N nonbonded intra energy using pos
+void CalculateEnergy::MolNonbond(double &energy, cbmc::TrialMol const &mol,
+                                 MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+  uint count = molKind.nonBonded.count;
+
+  for (uint i = 0; i < count; ++i) {
+    uint p1 = molKind.nonBonded.part1[i];
+    uint p2 = molKind.nonBonded.part2[i];
+    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
+      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
+        energy += forcefield.particles->CalcEn(distSq, molKind.AtomKind(p1),
+                                               molKind.AtomKind(p2), 1.0);
+        if (electrostatic) {
+          qi_qj_fact =
+              num::qqFact * molKind.AtomCharge(1) * molKind.AtomCharge(p2);
+
+          if (qi_qj_fact != 0.0) {
+            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                     true);
+          }
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-4 nonbonded intra energy
+void CalculateEnergy::MolNonbond_1_4(double &energy,
+                                     MoleculeKind const &molKind,
+                                     const uint molIndex,
+                                     const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+
+  for (uint i = 0; i < molKind.nonBonded_1_4.count; ++i) {
+    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_4.part1[i];
+    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_4.part2[i];
+    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
+      forcefield.particles->CalcAdd_1_4(
+          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_4.part1[i]),
+          molKind.AtomKind(molKind.nonBonded_1_4.part2[i]));
+      if (electrostatic) {
+        qi_qj_fact = num::qqFact *
+                     molKind.AtomCharge(molKind.nonBonded_1_4.part1[i]) *
+                     molKind.AtomCharge(molKind.nonBonded_1_4.part2[i]);
+
+        if (qi_qj_fact != 0.0) {
+          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                   false);
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-4 nonbonded intra energy using pos
+void CalculateEnergy::MolNonbond_1_4(double &energy, cbmc::TrialMol const &mol,
+                                     MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+  uint count = molKind.nonBonded_1_4.count;
+
+  for (uint i = 0; i < count; ++i) {
+    uint p1 = molKind.nonBonded_1_4.part1[i];
+    uint p2 = molKind.nonBonded_1_4.part2[i];
+    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
+      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
+        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
+                                          molKind.AtomKind(p2));
+        if (electrostatic) {
+          qi_qj_fact =
+              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
+
+          if (qi_qj_fact != 0.0) {
+            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                     false);
+          }
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-3 nonbonded intra energy
+void CalculateEnergy::MolNonbond_1_3(double &energy,
+                                     MoleculeKind const &molKind,
+                                     const uint molIndex,
+                                     const uint box) const {
+  if (box >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+
+  for (uint i = 0; i < molKind.nonBonded_1_3.count; ++i) {
+    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_3.part1[i];
+    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_3.part2[i];
+    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
+      forcefield.particles->CalcAdd_1_4(
+          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_3.part1[i]),
+          molKind.AtomKind(molKind.nonBonded_1_3.part2[i]));
+      if (electrostatic) {
+        qi_qj_fact = num::qqFact *
+                     molKind.AtomCharge(molKind.nonBonded_1_3.part1[i]) *
+                     molKind.AtomCharge(molKind.nonBonded_1_3.part2[i]);
+
+        if (qi_qj_fact != 0.0) {
+          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                   false);
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-3 nonbonded intra energy
+void CalculateEnergy::MolNonbond_1_3(double &energy, cbmc::TrialMol const &mol,
+                                     MoleculeKind const &molKind) const {
+  if (mol.GetBox() >= BOXES_WITH_U_B)
+    return;
+
+  double distSq;
+  double qi_qj_fact;
+  uint count = molKind.nonBonded_1_3.count;
+
+  for (uint i = 0; i < count; ++i) {
+    uint p1 = molKind.nonBonded_1_3.part1[i];
+    uint p2 = molKind.nonBonded_1_3.part2[i];
+    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
+      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
+        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
+                                          molKind.AtomKind(p2));
+        if (electrostatic) {
+          qi_qj_fact =
+              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
+
+          if (qi_qj_fact != 0.0) {
+            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
+                                                     false);
+          }
+        }
+      }
+    }
+  }
+}
+
+// Calculate 1-3 nonbonded intra energy
+double CalculateEnergy::IntraEnergy_1_3(const double distSq, const uint atom1,
+                                        const uint atom2,
+                                        const uint molIndex) const {
+  if (!forcefield.OneThree)
+    return 0.0;
+
+  double eng = 0.0;
+
+  MoleculeKind const &thisKind = mols.GetKind(molIndex);
+  uint kind1 = thisKind.AtomKind(atom1);
+  uint kind2 = thisKind.AtomKind(atom2);
+
+  if (electrostatic) {
+    double qi_qj_fact =
+        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
+
+    if (qi_qj_fact != 0.0) {
+      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
+    }
+  }
+  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
+
+  if (std::isnan(eng))
+    eng = num::BIGNUM;
+
+  return eng;
+}
+
+// Calculate 1-4 nonbonded intra energy
+double CalculateEnergy::IntraEnergy_1_4(const double distSq, const uint atom1,
+                                        const uint atom2,
+                                        const uint molIndex) const {
+  if (!forcefield.OneFour)
+    return 0.0;
+
+  double eng = 0.0;
+
+  MoleculeKind const &thisKind = mols.GetKind(molIndex);
+  uint kind1 = thisKind.AtomKind(atom1);
+  uint kind2 = thisKind.AtomKind(atom2);
+
+  if (electrostatic) {
+    double qi_qj_fact =
+        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
+
+    if (qi_qj_fact != 0.0) {
+      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
+    }
+  }
+  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
+
+  if (std::isnan(eng))
+    eng = num::BIGNUM;
+
+  return eng;
+}
+
+//! Calculates energy tail corrections for the box
+void CalculateEnergy::EnergyCorrection(SystemPotential &pot,
+                                       BoxDimensions const &boxAxes,
+                                       const uint box) const {
+  if (box >= BOXES_WITH_U_NB) {
+    return;
+  }
+
+  double en = 0.0;
+  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
+    uint numI = molLookup.NumKindInBox(i, box);
+    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
+      uint numJ = molLookup.NumKindInBox(j, box);
+      en += mols.pairEnCorrections[i * mols.GetKindsCount() + j] * numI * numJ *
+            boxAxes.volInv[box];
+    }
+  }
+
+  if (!forcefield.freeEnergy) {
+    pot.boxEnergy[box].tailCorrection = en;
+  }
+#if ENSEMBLE == NVT || ENSEMBLE == NPT
+  else {
+    // Get the kind and lambda value
+    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
+    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
+    // remove the LRC for one molecule with lambda = 1
+    en += MoleculeTailChange(box, fk, false).energy;
+
+    // Add the LRC for fractional molecule
+    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
+      uint molNum = molLookup.NumKindInBox(i, box);
+      if (i == fk) {
+        --molNum; // We have one less molecule (it is fractional molecule)
+      }
+      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
+      en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
+            rhoDeltaIJ_2;
+    }
+    // We already calculated part of the change for this type in the loop
+    en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
+          currentAxes.volInv[box];
+    pot.boxEnergy[box].tailCorrection = en;
+  }
+#endif
+}
+
+//! Calculates energy corrections for the box
+double CalculateEnergy::EnergyCorrection(const uint box,
+                                         const uint *kCount) const {
+  if (box >= BOXES_WITH_U_NB) {
+    return 0.0;
+  }
+
+  double tailCorrection = 0.0;
+  for (uint i = 0; i < mols.kindsCount; ++i) {
+    for (uint j = 0; j < mols.kindsCount; ++j) {
+      tailCorrection += mols.pairEnCorrections[i * mols.kindsCount + j] *
+                        kCount[i] * kCount[j] * currentAxes.volInv[box];
+    }
+  }
+  return tailCorrection;
+}
+
+void CalculateEnergy::VirialCorrection(Virial &virial,
+                                       BoxDimensions const &boxAxes,
+                                       const uint box) const {
+  if (box >= BOXES_WITH_U_NB) {
+    return;
+  }
+  double vir = 0.0;
+
+  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
+    uint numI = molLookup.NumKindInBox(i, box);
+    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
+      uint numJ = molLookup.NumKindInBox(j, box);
+      vir += mols.pairVirCorrections[i * mols.GetKindsCount() + j] * numI *
+             numJ * boxAxes.volInv[box];
+    }
+  }
+
+  if (!forcefield.freeEnergy) {
+    virial.tailCorrection = vir;
+  }
+#if ENSEMBLE == NVT || ENSEMBLE == NPT
+  else {
+    // Get the kind and lambda value
+    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
+    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
+    // remove the LRC for one molecule with lambda = 1
+    vir += MoleculeTailVirChange(box, fk, false).virial;
+
+    // Add the LRC for fractional molecule
+    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
+      uint molNum = molLookup.NumKindInBox(i, box);
+      if (i == fk) {
+        --molNum; // We have one less molecule (it is fractional molecule)
+      }
+      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
+      vir += lambdaVDW *
+             mols.pairVirCorrections[fk * mols.GetKindsCount() + i] *
+             rhoDeltaIJ_2;
+    }
+    // We already calculated part of the change for this type in the loop
+    vir += lambdaVDW * mols.pairVirCorrections[fk * mols.GetKindsCount() + fk] *
+           currentAxes.volInv[box];
+    virial.tailCorrection = vir;
+  }
+#endif
+}
+
+//! Calculate Torque
+void CalculateEnergy::CalculateTorque(std::vector<uint> &moleculeIndex,
+                                      XYZArray const &coordinates,
+                                      XYZArray const &com,
+                                      XYZArray const &atomForce,
+                                      XYZArray const &atomForceRec,
+                                      XYZArray &molTorque, const uint box) {
+  if (multiParticleEnabled && (box < BOXES_WITH_U_NB)) {
+    GOMC_EVENT_START(1, GomcProfileEvent::BOX_TORQUE);
+    // make a pointer to mol torque for OpenMP
+    double *torquex = molTorque.x;
+    double *torquey = molTorque.y;
+    double *torquez = molTorque.z;
+
+#if defined _OPENMP
+#pragma omp parallel for default(none)                                         \
+    shared(atomForce, atomForceRec, com, coordinates, moleculeIndex, torquex,  \
+           torquey, torquez) firstprivate(box)
+#endif
+    for (int m = 0; m < (int)moleculeIndex.size(); m++) {
+      int mIndex = moleculeIndex[m];
+      int length = mols.GetKind(mIndex).NumAtoms();
+      int start = mols.MolStart(mIndex);
+      double tx = 0.0;
+      double ty = 0.0;
+      double tz = 0.0;
+      // atom iterator
+      for (int p = start; p < start + length; p++) {
+        XYZ distFromCOM = coordinates.Difference(p, com, mIndex);
+        distFromCOM = currentAxes.MinImage(distFromCOM, box);
+        XYZ tempTorque = Cross(distFromCOM, atomForce[p] + atomForceRec[p]);
+
+        tx += tempTorque.x;
+        ty += tempTorque.y;
+        tz += tempTorque.z;
+      }
+      torquex[mIndex] = tx;
+      torquey[mIndex] = ty;
+      torquez[mIndex] = tz;
+    }
+  }
+  GOMC_EVENT_STOP(1, GomcProfileEvent::BOX_TORQUE);
+}
+
+void CalculateEnergy::ResetForce(XYZArray &atomForce, XYZArray &molForce,
+                                 uint box) {
+  if (multiParticleEnabled) {
+    uint length, start;
+
+    // molecule iterator
+    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box);
+    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
+
+    while (thisMol != end) {
+      length = mols.GetKind(*thisMol).NumAtoms();
+      start = mols.MolStart(*thisMol);
+
+      molForce.Set(*thisMol, 0.0, 0.0, 0.0);
+      for (uint p = start; p < start + length; p++) {
+        atomForce.Set(p, 0.0, 0.0, 0.0);
+      }
+      thisMol++;
+    }
+  }
+}
+
+uint CalculateEnergy::NumberOfParticlesInsideBox(uint box) {
+  uint numberOfAtoms = 0;
+
+  for (int k = 0; k < (int)mols.GetKindsCount(); k++) {
+    MoleculeKind const &thisKind = mols.kinds[k];
+    numberOfAtoms += thisKind.NumAtoms() * molLookup.NumKindInBox(k, box);
+  }
+
+  return numberOfAtoms;
+}
+
+bool CalculateEnergy::FindMolInCavity(std::vector<std::vector<uint>> &mol,
+                                      const XYZ &center, const XYZ &cavDim,
+                                      const XYZArray &invCav, const uint box,
+                                      const uint kind, const uint exRatio) {
+  uint k;
+  mol.clear();
+  mol.resize(molLookup.GetNumKind());
+  double maxLength = cavDim.Max();
+
+  if (maxLength <= currentAxes.rCut[box]) {
+    CellList::Neighbors n = cellList.EnumerateLocal(center, box);
+    while (!n.Done()) {
+      if (currentAxes.InCavity(currentCOM.Get(particleMol[*n]), center, cavDim,
+                               invCav, box)) {
+        uint molIndex = particleMol[*n];
+        // if molecule can be transfer between boxes
+        if (!molLookup.IsNoSwap(molIndex)) {
+          k = mols.GetMolKind(molIndex);
+          bool exist =
+              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
+          if (!exist)
+            mol[k].push_back(molIndex);
+        }
+      }
+      n.Next();
+    }
+  } else {
+    MoleculeLookup::box_iterator n = molLookup.BoxBegin(box);
+    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
+    while (n != end) {
+      if (currentAxes.InCavity(currentCOM.Get(*n), center, cavDim, invCav,
+                               box)) {
+        uint molIndex = *n;
+        // if molecule can be transfer between boxes
+        if (!molLookup.IsNoSwap(molIndex)) {
+          k = mols.GetMolKind(molIndex);
+          bool exist =
+              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
+          if (!exist)
+            mol[k].push_back(molIndex);
+        }
+      }
+      n++;
+    }
+  }
+
+  // If the is exRate and more molecule kind in cavity, return true.
+  if (mol[kind].size() >= exRatio)
+    return true;
+  else
+    return false;
+}
+
+void CalculateEnergy::SingleMoleculeInter(
+    Energy &interEnOld, Energy &interEnNew, const double lambdaOldVDW,
+    const double lambdaNewVDW, const double lambdaOldCoulomb,
+    const double lambdaNewCoulomb, const uint molIndex, const uint box) const {
+  double tempREnOld = 0.0, tempLJEnOld = 0.0;
+  double tempREnNew = 0.0, tempLJEnNew = 0.0;
+  if (box < BOXES_WITH_U_NB) {
+    uint length = mols.GetKind(molIndex).NumAtoms();
+    uint start = mols.MolStart(molIndex);
+
+    for (uint p = 0; p < length; ++p) {
+      uint atom = start + p;
+      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
+
+      std::vector<uint> nIndex;
+      // store atom index in neighboring cell
+      while (!n.Done()) {
+        if (particleMol[*n] != (int)molIndex) {
+          nIndex.push_back(*n);
+        }
+        n.Next();
+      }
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) shared(nIndex) \
+firstprivate(atom, box, lambdaNewCoulomb, lambdaOldCoulomb, lambdaOldVDW, \
+lambdaNewVDW, num::qqFact) reduction(+:tempREnOld, tempLJEnOld, tempREnNew, \
+tempLJEnNew)
+#endif
+      for (int i = 0; i < (int)nIndex.size(); i++) {
+        double distSq = 0.0;
+        XYZ virComponents;
+        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
+                               nIndex[i], box)) {
+          if (electrostatic) {
+            double qi_qj_fact =
+                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
+            if (qi_qj_fact != 0.0) {
+              tempREnNew += forcefield.particles->CalcCoulomb(
+                  distSq, particleKind[atom], particleKind[nIndex[i]],
+                  qi_qj_fact, lambdaNewCoulomb, box);
+              tempREnOld += forcefield.particles->CalcCoulomb(
+                  distSq, particleKind[atom], particleKind[nIndex[i]],
+                  qi_qj_fact, lambdaOldCoulomb, box);
+            }
+          }
+
+          tempLJEnNew += forcefield.particles->CalcEn(
+              distSq, particleKind[atom], particleKind[nIndex[i]],
+              lambdaNewVDW);
+          tempLJEnOld += forcefield.particles->CalcEn(
+              distSq, particleKind[atom], particleKind[nIndex[i]],
+              lambdaOldVDW);
+        }
+      }
+    }
+  }
+
+  interEnNew.inter = tempLJEnNew;
+  interEnNew.real = tempREnNew;
+  interEnOld.inter = tempLJEnOld;
+  interEnOld.real = tempREnOld;
+}
+
+double CalculateEnergy::GetLambdaVDW(uint molA, uint molB, uint box) const {
+  double lambda = 1.0;
+  lambda *= lambdaRef.GetLambdaVDW(molA, box);
+  lambda *= lambdaRef.GetLambdaVDW(molB, box);
+  return lambda;
+}
+
+double CalculateEnergy::GetLambdaCoulomb(uint molA, uint molB, uint box) const {
+  double lambda = 1.0;
+  lambda *= lambdaRef.GetLambdaCoulomb(molA, box);
+  lambda *= lambdaRef.GetLambdaCoulomb(molB, box);
+  // no need for sq root for inter energy. Always one of the molecules has
+  // lambda 1
+  return lambda;
+}
+
+// Calculates the change in the TC from adding numChange atoms of a kind
+double CalculateEnergy::MoleculeTailChange(const uint box, const uint kind,
+                                           const std::vector<uint> &kCount,
+                                           const double lambdaOld,
+                                           const double lambdaNew) const {
+  if (box >= BOXES_WITH_U_NB) {
+    return 0.0;
+  }
+
+  double tcDiff = 0.0;
+  uint ktot = mols.GetKindsCount();
+  for (uint i = 0; i < ktot; ++i) {
+    // We should have only one molecule of fractional kind
+    double rhoDeltaIJ_2 = 2.0 * (double)(kCount[i]) * currentAxes.volInv[box];
+    uint index = kind * ktot + i;
+    tcDiff +=
+        (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] * rhoDeltaIJ_2;
+  }
+  uint index = kind * ktot + kind;
+  tcDiff += (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] *
+            currentAxes.volInv[box];
+
+  return tcDiff;
+}
+
+// Calculate the change in energy due to lambda
+void CalculateEnergy::EnergyChange(Energy *energyDiff, Energy &dUdL_VDW,
+                                   Energy &dUdL_Coul,
+                                   const std::vector<double> &lambda_VDW,
+                                   const std::vector<double> &lambda_Coul,
+                                   const uint iState, const uint molIndex,
+                                   const uint box) const {
+  if (box >= BOXES_WITH_U_NB) {
+    return;
+  }
+
+  GOMC_EVENT_START(1, GomcProfileEvent::FREE_ENERGY);
+  uint length = mols.GetKind(molIndex).NumAtoms();
+  uint start = mols.MolStart(molIndex);
+  uint lambdaSize = lambda_VDW.size();
+  double *tempLJEnDiff = new double[lambdaSize];
+  double *tempREnDiff = new double[lambdaSize];
+  double dudl_VDW = 0.0, dudl_Coul = 0.0;
+  std::fill_n(tempLJEnDiff, lambdaSize, 0.0);
+  std::fill_n(tempREnDiff, lambdaSize, 0.0);
+
+  // Calculate the vdw, short range electrostatic energy
+  for (uint p = 0; p < length; ++p) {
+    uint atom = start + p;
+    CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
+
+    std::vector<uint> nIndex;
+    // store atom index in neighboring cell
+    while (!n.Done()) {
+      if (particleMol[*n] != (int)molIndex) {
+        nIndex.push_back(*n);
+      }
+      n.Next();
+    }
+
+#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
+#pragma omp parallel for default(none) shared(lambda_Coul, lambda_VDW, nIndex) \
+firstprivate(box, atom, iState, lambdaSize, num::qqFact) \
+reduction(+:dudl_VDW, dudl_Coul, tempREnDiff[:lambdaSize], tempLJEnDiff[:lambdaSize])
+#endif
+    for (int i = 0; i < (int)nIndex.size(); i++) {
+      double distSq = 0.0;
+      XYZ virComponents;
+      if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
+                             nIndex[i], box)) {
+        double qi_qj_fact = 0.0, energyOldCoul = 0.0;
+        // Calculate the energy of current state
+        double energyOldVDW = forcefield.particles->CalcEn(
+            distSq, particleKind[atom], particleKind[nIndex[i]],
+            lambda_VDW[iState]);
+        // Calculate du/dl in VDW for current state
+        dudl_VDW += forcefield.particles->CalcdEndL(distSq, particleKind[atom],
+                                                    particleKind[nIndex[i]],
+                                                    lambda_VDW[iState]);
+
+        if (electrostatic) {
+          qi_qj_fact =
+              particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
+          if (qi_qj_fact != 0.0) {
+            energyOldCoul = forcefield.particles->CalcCoulomb(
+                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
+                lambda_Coul[iState], box);
+            // Calculate du/dl in Coulomb for current state.
+            dudl_Coul += forcefield.particles->CalcCoulombdEndL(
+                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
+                lambda_Coul[iState], box);
+          }
+        }
+
+        for (int s = 0; s < (int)lambdaSize; s++) {
+          // Calculate the energy of other state
+          tempLJEnDiff[s] += forcefield.particles->CalcEn(
+              distSq, particleKind[atom], particleKind[nIndex[i]],
+              lambda_VDW[s]);
+          tempLJEnDiff[s] += -energyOldVDW;
+          if (electrostatic && qi_qj_fact != 0.0) {
+            tempREnDiff[s] += forcefield.particles->CalcCoulomb(
+                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
+                lambda_Coul[s], box);
+            tempREnDiff[s] += -energyOldCoul;
+          }
+        }
+      }
+    }
+  }
+
+  dUdL_VDW.inter = dudl_VDW;
+  dUdL_Coul.real = dudl_Coul;
+  for (int s = 0; s < (int)lambdaSize; s++) {
+    energyDiff[s].inter += tempLJEnDiff[s];
+    energyDiff[s].real += tempREnDiff[s];
+  }
+  delete[] tempLJEnDiff;
+  delete[] tempREnDiff;
+
+  if (forcefield.useLRC) {
+    // Need to calculate change in LRC
+    ChangeLRC(energyDiff, dUdL_VDW, lambda_VDW, iState, molIndex, box);
+  }
+  // Need to calculate change in self
+  calcEwald->ChangeSelf(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
+                        box);
+  // Need to calculate change in correction
+  calcEwald->ChangeCorrection(energyDiff, dUdL_Coul, lambda_Coul, iState,
+                              molIndex, box);
+  // Need to calculate change in Reciprocal
+  calcEwald->ChangeRecip(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
+                         box);
+  GOMC_EVENT_STOP(1, GomcProfileEvent::FREE_ENERGY);
+}
+
+// Calculate the change in LRC for each state
+void CalculateEnergy::ChangeLRC(Energy *energyDiff, Energy &dUdL_VDW,
+                                const std::vector<double> &lambda_VDW,
+                                const uint iState, const uint molIndex,
+                                const uint box) const {
+  // Get the kind and lambda value
+  uint fk = mols.GetMolKind(molIndex);
+  double lambda_istate = lambda_VDW[iState];
+
+  // Add the LRC for fractional molecule
+  for (size_t s = 0; s < lambda_VDW.size(); s++) {
+    double lambdaVDW = lambda_VDW[s];
+    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
+      uint molNum = molLookup.NumKindInBox(i, box);
+      if (i == fk) {
+        --molNum; // We have one less molecule (it is fractional molecule)
+      }
+      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
+      energyDiff[s].tailCorrection +=
+          mols.pairEnCorrections[fk * mols.GetKindsCount() + i] * rhoDeltaIJ_2 *
+          (lambdaVDW - lambda_istate);
+      if (s == iState) {
+        // Calculate du/dl in VDW LRC for current state
+        dUdL_VDW.tailCorrection +=
+            mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
+            rhoDeltaIJ_2;
+      }
+    }
+    energyDiff[s].tailCorrection +=
+        mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
+        currentAxes.volInv[box] * (lambdaVDW - lambda_istate);
+    if (s == iState) {
+      // Calculate du/dl in VDW LRC for current state
+      dUdL_VDW.tailCorrection +=
+          mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
+          currentAxes.volInv[box];
+    }
+  }
+}
diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh
index b5657afa4..a375e2db6 100644
--- a/src/GPU/CalculateMinImageCUDAKernel.cuh
+++ b/src/GPU/CalculateMinImageCUDAKernel.cuh
@@ -2,59 +2,63 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #pragma once
 #ifdef GOMC_CUDA
 
+#include "ConstantDefinitionsCUDAKernel.cuh"
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include "ConstantDefinitionsCUDAKernel.cuh"
 
-__device__ inline double3 Difference3(const double *x, const double *y, const double *z,
-                                     uint i, uint j)
-{
+__device__ inline double3 Difference3(const double *x, const double *y,
+                                      const double *z, uint i, uint j) {
   return make_double3(x[i] - x[j], y[i] - y[j], z[i] - z[j]);
 }
 
 __device__ inline void TransformSlantGPU(double3 &dist, const double3 &slant,
-                                         const double *gpu_cell_x, const double *gpu_cell_y,
-                                         const double *gpu_cell_z)
-{
-  dist.x = slant.x * gpu_cell_x[0] + slant.y * gpu_cell_x[1] + slant.z * gpu_cell_x[2];
-  dist.y = slant.x * gpu_cell_y[0] + slant.y * gpu_cell_y[1] + slant.z * gpu_cell_y[2];
-  dist.z = slant.x * gpu_cell_z[0] + slant.y * gpu_cell_z[1] + slant.z * gpu_cell_z[2];
+                                         const double *gpu_cell_x,
+                                         const double *gpu_cell_y,
+                                         const double *gpu_cell_z) {
+  dist.x = slant.x * gpu_cell_x[0] + slant.y * gpu_cell_x[1] +
+           slant.z * gpu_cell_x[2];
+  dist.y = slant.x * gpu_cell_y[0] + slant.y * gpu_cell_y[1] +
+           slant.z * gpu_cell_y[2];
+  dist.z = slant.x * gpu_cell_z[0] + slant.y * gpu_cell_z[1] +
+           slant.z * gpu_cell_z[2];
 }
 
 __device__ inline void TransformUnSlantGPU(double3 &dist, const double3 &slant,
-                                           const double *gpu_Invcell_x, const double *gpu_Invcell_y,
-                                           const double *gpu_Invcell_z)
-{
-  dist.x = slant.x * gpu_Invcell_x[0] + slant.y * gpu_Invcell_x[1] + slant.z * gpu_Invcell_x[2];
-  dist.y = slant.x * gpu_Invcell_y[0] + slant.y * gpu_Invcell_y[1] + slant.z * gpu_Invcell_y[2];
-  dist.z = slant.x * gpu_Invcell_z[0] + slant.y * gpu_Invcell_z[1] + slant.z * gpu_Invcell_z[2];
+                                           const double *gpu_Invcell_x,
+                                           const double *gpu_Invcell_y,
+                                           const double *gpu_Invcell_z) {
+  dist.x = slant.x * gpu_Invcell_x[0] + slant.y * gpu_Invcell_x[1] +
+           slant.z * gpu_Invcell_x[2];
+  dist.y = slant.x * gpu_Invcell_y[0] + slant.y * gpu_Invcell_y[1] +
+           slant.z * gpu_Invcell_y[2];
+  dist.z = slant.x * gpu_Invcell_z[0] + slant.y * gpu_Invcell_z[1] +
+           slant.z * gpu_Invcell_z[2];
 }
 
-__device__ inline void WrapPBC(double &v, const double &ax)
-{
-  if(v >= ax)
+__device__ inline void WrapPBC(double &v, const double &ax) {
+  if (v >= ax)
     v -= ax;
-  else if(v < 0)
+  else if (v < 0)
     v += ax;
 }
 
-__device__ inline void WrapPBC3(double3 &v, const double3 &ax)
-{
+__device__ inline void WrapPBC3(double3 &v, const double3 &ax) {
   WrapPBC(v.x, ax.x);
   WrapPBC(v.y, ax.y);
   WrapPBC(v.z, ax.z);
 }
 
-__device__ inline void WrapPBCNonOrth3(double3 &v, const double3 &ax,
-                                       const double *gpu_cell_x, const double *gpu_cell_y,
-                                       const double *gpu_cell_z, const double *gpu_Invcell_x, 
-                                       const double *gpu_Invcell_y, const double *gpu_Invcell_z)
-{
+__device__ inline void
+WrapPBCNonOrth3(double3 &v, const double3 &ax, const double *gpu_cell_x,
+                const double *gpu_cell_y, const double *gpu_cell_z,
+                const double *gpu_Invcell_x, const double *gpu_Invcell_y,
+                const double *gpu_Invcell_z) {
   double3 t;
   TransformUnSlantGPU(t, v, gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
   WrapPBC(t.x, ax.x);
@@ -63,31 +67,29 @@ __device__ inline void WrapPBCNonOrth3(double3 &v, const double3 &ax,
   TransformSlantGPU(v, t, gpu_cell_x, gpu_cell_y, gpu_cell_z);
 }
 
-__device__ inline void  UnwrapPBC(double &v, const double &ref, const double &ax,
-                                  const double &halfax)
-{
-  if(std::fabs(ref - v) > halfax) {
-    if(ref < halfax)
+__device__ inline void UnwrapPBC(double &v, const double &ref, const double &ax,
+                                 const double &halfax) {
+  if (std::fabs(ref - v) > halfax) {
+    if (ref < halfax)
       v -= ax;
     else
       v += ax;
   }
 }
 
-__device__ inline void UnwrapPBC3(double3 &v, const double3 &ref, const double3 &ax,
-                                  const double3 &halfax)
-{
+__device__ inline void UnwrapPBC3(double3 &v, const double3 &ref,
+                                  const double3 &ax, const double3 &halfax) {
   UnwrapPBC(v.x, ref.x, ax.x, halfax.x);
   UnwrapPBC(v.y, ref.y, ax.y, halfax.y);
   UnwrapPBC(v.z, ref.z, ax.z, halfax.z);
 }
 
-__device__ inline void UnwrapPBCNonOrth3(double3 &v, const double3 &ref, const double3 &ax,
-                                         const double3 &halfax,
-                                         const double *gpu_cell_x, const double *gpu_cell_y,
-                                         const double *gpu_cell_z, const double *gpu_Invcell_x, 
-                                         const double *gpu_Invcell_y, const double *gpu_Invcell_z)
-{
+__device__ inline void
+UnwrapPBCNonOrth3(double3 &v, const double3 &ref, const double3 &ax,
+                  const double3 &halfax, const double *gpu_cell_x,
+                  const double *gpu_cell_y, const double *gpu_cell_z,
+                  const double *gpu_Invcell_x, const double *gpu_Invcell_y,
+                  const double *gpu_Invcell_z) {
   double3 t, tref;
   TransformUnSlantGPU(t, v, gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
   TransformUnSlantGPU(tref, ref, gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
@@ -97,8 +99,8 @@ __device__ inline void UnwrapPBCNonOrth3(double3 &v, const double3 &ref, const d
   TransformSlantGPU(v, t, gpu_cell_x, gpu_cell_y, gpu_cell_z);
 }
 
-__device__ inline double MinImageSignedGPU(double raw, const double ax, const double halfAx)
-{
+__device__ inline double MinImageSignedGPU(double raw, const double ax,
+                                           const double halfAx) {
   if (raw > halfAx)
     raw -= ax;
   else if (raw < -halfAx)
@@ -106,19 +108,19 @@ __device__ inline double MinImageSignedGPU(double raw, const double ax, const do
   return raw;
 }
 
-__device__ inline double3 MinImageGPU(double3 rawVec, const double3 axis, const double3 halfAx)
-{
+__device__ inline double3 MinImageGPU(double3 rawVec, const double3 axis,
+                                      const double3 halfAx) {
   rawVec.x = MinImageSignedGPU(rawVec.x, axis.x, halfAx.x);
   rawVec.y = MinImageSignedGPU(rawVec.y, axis.y, halfAx.y);
   rawVec.z = MinImageSignedGPU(rawVec.z, axis.z, halfAx.z);
   return rawVec;
 }
 
-__device__ inline double3 MinImageNonOrthGPU(double3 rawVec, const double3 &axis, const double3 &halfAx,
-                                             const double *gpu_cell_x, const double *gpu_cell_y,
-                                             const double *gpu_cell_z, const double *gpu_Invcell_x, 
-                                             const double *gpu_Invcell_y, const double *gpu_Invcell_z)
-{
+__device__ inline double3
+MinImageNonOrthGPU(double3 rawVec, const double3 &axis, const double3 &halfAx,
+                   const double *gpu_cell_x, const double *gpu_cell_y,
+                   const double *gpu_cell_z, const double *gpu_Invcell_x,
+                   const double *gpu_Invcell_y, const double *gpu_Invcell_z) {
   double3 t;
   TransformUnSlantGPU(t, rawVec, gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
   t = MinImageGPU(t, axis, halfAx);
@@ -126,29 +128,31 @@ __device__ inline double3 MinImageNonOrthGPU(double3 rawVec, const double3 &axis
   return rawVec;
 }
 
-
-__device__ inline void DeviceInRcut(double &distSq, double3 &dist, const double *gpu_x,
-    const double *gpu_y, const double *gpu_z, int particleID, int otherParticle, double axx,
-    double axy, double axz, int gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y,
-    double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z)
-{
+__device__ inline void
+DeviceInRcut(double &distSq, double3 &dist, const double *gpu_x,
+             const double *gpu_y, const double *gpu_z, int particleID,
+             int otherParticle, double axx, double axy, double axz,
+             int gpu_nonOrth, double *gpu_cell_x, double *gpu_cell_y,
+             double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
+             double *gpu_Invcell_z) {
   // calculate distance
   double3 axes, halfAx;
   dist.x = gpu_x[particleID] - gpu_x[otherParticle];
   dist.y = gpu_y[particleID] - gpu_y[otherParticle];
   dist.z = gpu_z[particleID] - gpu_z[otherParticle];
-  
+
   axes.x = axx;
   halfAx.x = axx * 0.5;
   axes.y = axy;
   halfAx.y = axy * 0.5;
   axes.z = axz;
   halfAx.z = axz * 0.5;
-  
+
   // minimum image
-  if(gpu_nonOrth) {
-    dist = MinImageNonOrthGPU(dist, axes, halfAx, gpu_cell_x, gpu_cell_y, gpu_cell_z,
-                              gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
+  if (gpu_nonOrth) {
+    dist = MinImageNonOrthGPU(dist, axes, halfAx, gpu_cell_x, gpu_cell_y,
+                              gpu_cell_z, gpu_Invcell_x, gpu_Invcell_y,
+                              gpu_Invcell_z);
   } else {
     dist = MinImageGPU(dist, axes, halfAx);
   }
@@ -157,19 +161,20 @@ __device__ inline void DeviceInRcut(double &distSq, double3 &dist, const double
 }
 
 // Call by calculate energy whether it is in rCut
-__device__ inline bool InRcutGPU(double &distSq, const double *x, const double *y, const double *z,
-                                 uint i, uint j, const double3 &axis, const double3 &halfAx,
-                                 double gpu_rCut, int gpu_nonOrth,
-                                 const double *gpu_cell_x, const double *gpu_cell_y,
-                                 const double *gpu_cell_z, const double *gpu_Invcell_x,
-                                 const double *gpu_Invcell_y, const double *gpu_Invcell_z)
-{
+__device__ inline bool
+InRcutGPU(double &distSq, const double *x, const double *y, const double *z,
+          uint i, uint j, const double3 &axis, const double3 &halfAx,
+          double gpu_rCut, int gpu_nonOrth, const double *gpu_cell_x,
+          const double *gpu_cell_y, const double *gpu_cell_z,
+          const double *gpu_Invcell_x, const double *gpu_Invcell_y,
+          const double *gpu_Invcell_z) {
   double3 dist;
   dist = Difference3(x, y, z, i, j);
   // Do a binary print here of dist
-  if(gpu_nonOrth) {
-    dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y, gpu_cell_z,
-                              gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
+  if (gpu_nonOrth) {
+    dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y,
+                              gpu_cell_z, gpu_Invcell_x, gpu_Invcell_y,
+                              gpu_Invcell_z);
   } else {
     dist = MinImageGPU(dist, axis, halfAx);
   }
@@ -180,18 +185,18 @@ __device__ inline bool InRcutGPU(double &distSq, const double *x, const double *
 }
 
 // Call by force calculate to return the distance and virial component
-__device__ inline bool InRcutGPU(double &distSq, double3 &dist,
-                                 const double *x, const double *y, const double *z,
-                                 uint i, uint j, const double3 &axis, const double3 &halfAx,
-                                 double gpu_rCut, int gpu_nonOrth,
-                                 const double *gpu_cell_x, const double *gpu_cell_y,
-                                 const double *gpu_cell_z, const double *gpu_Invcell_x,
-                                 const double *gpu_Invcell_y, const double *gpu_Invcell_z)
-{
+__device__ inline bool
+InRcutGPU(double &distSq, double3 &dist, const double *x, const double *y,
+          const double *z, uint i, uint j, const double3 &axis,
+          const double3 &halfAx, double gpu_rCut, int gpu_nonOrth,
+          const double *gpu_cell_x, const double *gpu_cell_y,
+          const double *gpu_cell_z, const double *gpu_Invcell_x,
+          const double *gpu_Invcell_y, const double *gpu_Invcell_z) {
   dist = Difference3(x, y, z, i, j);
-  if(gpu_nonOrth) {
-    dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y, gpu_cell_z,
-                              gpu_Invcell_x, gpu_Invcell_y, gpu_Invcell_z);
+  if (gpu_nonOrth) {
+    dist = MinImageNonOrthGPU(dist, axis, halfAx, gpu_cell_x, gpu_cell_y,
+                              gpu_cell_z, gpu_Invcell_x, gpu_Invcell_y,
+                              gpu_Invcell_z);
   } else {
     dist = MinImageGPU(dist, axis, halfAx);
   }
@@ -201,56 +206,54 @@ __device__ inline bool InRcutGPU(double &distSq, double3 &dist,
   return ((gpu_rCut * gpu_rCut) > distSq);
 }
 
-__device__ inline int FlatIndexGPU(int i, int j, int gpu_count)
-{
+__device__ inline int FlatIndexGPU(int i, int j, int gpu_count) {
   return i + j * gpu_count;
 }
 
 __device__ inline double DotProductGPU(double kx, double ky, double kz,
-                                       double x, double y, double z)
-{
+                                       double x, double y, double z) {
   return (kx * x + ky * y + kz * z);
 }
 
-__device__ inline double DeviceGetLambdaVDW(int molA, int molB,
-    int box, const bool *gpu_isFraction, const int *gpu_molIndex,
-    const double *gpu_lambdaVDW)
-{
+__device__ inline double DeviceGetLambdaVDW(int molA, int molB, int box,
+                                            const bool *gpu_isFraction,
+                                            const int *gpu_molIndex,
+                                            const double *gpu_lambdaVDW) {
   double lambda = 1.0;
-  if(gpu_isFraction[box]) {
-    if(gpu_molIndex[box] == molA) {
+  if (gpu_isFraction[box]) {
+    if (gpu_molIndex[box] == molA) {
       lambda *= gpu_lambdaVDW[box];
     }
-    if(gpu_molIndex[box] == molB) {
+    if (gpu_molIndex[box] == molB) {
       lambda *= gpu_lambdaVDW[box];
     }
   }
   return lambda;
 }
 
-__device__ inline double DeviceGetLambdaCoulomb(int molA, int molB,
-    int box, const bool *gpu_isFraction, const int *gpu_molIndex,
-    const double *gpu_lambdaCoulomb)
-{
+__device__ inline double
+DeviceGetLambdaCoulomb(int molA, int molB, int box, const bool *gpu_isFraction,
+                       const int *gpu_molIndex,
+                       const double *gpu_lambdaCoulomb) {
   double lambda = 1.0;
-  if(gpu_isFraction[box]) {
-    if(gpu_molIndex[box] == molA) {
+  if (gpu_isFraction[box]) {
+    if (gpu_molIndex[box] == molA) {
       lambda *= gpu_lambdaCoulomb[box];
     }
-    if(gpu_molIndex[box] == molB) {
+    if (gpu_molIndex[box] == molB) {
       lambda *= gpu_lambdaCoulomb[box];
     }
   }
   return lambda;
 }
 
-__device__ inline double DeviceGetLambdaCoulomb(int mol, int box,
-    const bool *gpu_isFraction, const int *gpu_molIndex,
-    const double *gpu_lambdaCoulomb)
-{
+__device__ inline double
+DeviceGetLambdaCoulomb(int mol, int box, const bool *gpu_isFraction,
+                       const int *gpu_molIndex,
+                       const double *gpu_lambdaCoulomb) {
   double lambda = 1.0;
-  if(gpu_isFraction[box]) {
-    if(gpu_molIndex[box] == mol) {
+  if (gpu_isFraction[box]) {
+    if (gpu_molIndex[box] == mol) {
       lambda = gpu_lambdaCoulomb[box];
     }
   }
@@ -260,9 +263,8 @@ __device__ inline double DeviceGetLambdaCoulomb(int mol, int box,
 // Add atomic operations for GPUs that do not support it
 // atomicAdd and atomicSub only support double for Compute Capability >= 6.0
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
-static __inline__ __device__ double atomicAdd(double *address, double val)
-{
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
+static __inline__ __device__ double atomicAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
   unsigned long long int old = *address_as_ull, assumed;
   if (val == 0.0)
     return __longlong_as_double(old);

From e31e3a995690b82815b1ca3bf9a63ba1e253a0cb Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Wed, 17 May 2023 14:39:15 -0400
Subject: [PATCH 06/42] Add support for Clang on Linux

---
 CMake/GOMCCPUSetup.cmake  | 24 ++++++++++++++++--------
 CMake/GOMCCUDASetup.cmake |  8 ++++++++
 CMakeLists.txt            | 17 ++++++++++-------
 metamake.sh               | 24 +++++++++++++++++++-----
 4 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/CMake/GOMCCPUSetup.cmake b/CMake/GOMCCPUSetup.cmake
index 684410575..a7a937905 100644
--- a/CMake/GOMCCPUSetup.cmake
+++ b/CMake/GOMCCPUSetup.cmake
@@ -24,10 +24,12 @@ if(ENSEMBLE_NVT)
    # Set Compiler and linker flags for each compiler
    target_compile_options(NVT
       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
    target_link_options(NVT
       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
    set_target_properties(NVT PROPERTIES 
       OUTPUT_NAME ${NVT_name}
       COMPILE_FLAGS "${NVT_flags}")
@@ -45,10 +47,12 @@ if(ENSEMBLE_GEMC)
    # Set Compiler and linker flags for each compiler
    target_compile_options(GEMC
       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
    target_link_options(GEMC
       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
    set_target_properties(GEMC PROPERTIES 
       OUTPUT_NAME ${GE_name}
       COMPILE_FLAGS "${GE_flags}")
@@ -66,10 +70,12 @@ if(ENSEMBLE_GCMC)
    # Set Compiler and linker flags for each compiler
    target_compile_options(GCMC
       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
    target_link_options(GCMC
       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
    set_target_properties(GCMC PROPERTIES 
       OUTPUT_NAME ${GC_name}
       COMPILE_FLAGS "${GC_flags}")
@@ -87,10 +93,12 @@ if(ENSEMBLE_NPT)
    # Set Compiler and linker flags for each compiler
    target_compile_options(NPT
       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>)
+             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
    target_link_options(NPT
       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>)
+             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
    set_target_properties(NPT PROPERTIES 
       OUTPUT_NAME ${NPT_name}
       COMPILE_FLAGS "${NPT_flags}")
diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index f66a8bcb9..ef05adb8c 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -60,10 +60,12 @@ if(ENSEMBLE_GPU_NVT)
     target_compile_options(GPU_NVT
        PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
               $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
     target_link_options(GPU_NVT
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
               $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
     set_target_properties(GPU_NVT PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
@@ -88,10 +90,12 @@ if(ENSEMBLE_GPU_GEMC)
     target_compile_options(GPU_GEMC
        PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
               $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
     target_link_options(GPU_GEMC
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
               $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
     set_target_properties(GPU_GEMC PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
@@ -116,10 +120,12 @@ if(ENSEMBLE_GPU_GCMC)
     target_compile_options(GPU_GCMC
        PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
               $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
     target_link_options(GPU_GCMC
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
               $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
     set_target_properties(GPU_GCMC PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
@@ -144,10 +150,12 @@ if(ENSEMBLE_GPU_NPT)
     target_compile_options(GPU_NPT
        PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
               $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
+              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
     target_link_options(GPU_NPT
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
               $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
     set_target_properties(GPU_NPT PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 30680d5c2..d54269168 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,11 +33,15 @@ if(GOMC_OPT)
    set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
    set(CMAKE_GNU_COMP_FLAGS -flto -O3 -march=native)
    set(CMAKE_GNU_LINK_FLAGS -flto -O3 -march=native)
+   set(CMAKE_CLANG_COMP_FLAGS -flto -Ofast3 -march=native)
+   set(CMAKE_CLANG_LINK_FLAGS -flto -Ofast3 -march=native)
 endif()
 
 if(GOMC_ASAN)
    set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
 endif()
 
 # find OpenMP and set it up
@@ -45,10 +49,12 @@ endif()
 if(NOT GOMC_ASAN)
     find_package(OpenMP)
     if(OPENMP_FOUND)
-       set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
-       set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
        set(CMAKE_INTEL_COMP_FLAGS ${CMAKE_INTEL_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
        set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
     endif()
 endif()
 
@@ -65,11 +71,8 @@ include(${PROJECT_SOURCE_DIR}/CMake/GOMCMPI.cmake)
 
 include_directories("${PROJECT_BINARY_DIR}")
 
-# Additional flags for GNU and Intel compilers set elsewhere
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -stdlib=libc++") 
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -stdlib=libc++")
-elseif(MSVC)
+# Additional flags for Intel, GNU and Clang compilers set elsewhere
+if(MSVC)
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
     set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
     set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} /MT /O1 /Ob1 /D NDEBUG")
diff --git a/metamake.sh b/metamake.sh
index 2210c570a..a6c342dac 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -5,6 +5,7 @@ use_cuda=0
 use_profiler=0
 use_gtest=0
 use_gcc=0
+use_clang=0
 use_mpi=0
 use_asan=0
 use_opt=1
@@ -70,7 +71,7 @@ then
 	fi
 fi
 
-while getopts 'acdgmnpt' opt; do
+while getopts 'acdglmnpt' opt; do
     case "$opt" in
         a)
             use_asan=1;;
@@ -81,6 +82,8 @@ while getopts 'acdgmnpt' opt; do
             use_debug=1;;
         g)
             use_gcc=1;;
+        l)
+            use_clang=1;;
         m)
             use_mpi=1
             CMAKEARGS+="-DGOMC_MPI=on ";;
@@ -96,7 +99,8 @@ while getopts 'acdgmnpt' opt; do
             echo "-a, enables address sanitizer runtime checking"
             echo "-c, enables clang-tidy source code checks"
             echo "-d, enables Debug Mode compilation"
-            echo "-g, use the gcc compiler"
+            echo "-g, use the GNU compiler"
+			echo "-l, use the Clang compiler"
             echo "-m, enables MPI support (Required for Parallel Tempering)"
             echo "-n, disables most optimizing compiler flags"
             echo "-p enables GPU code profiling (NVTX tags)"
@@ -136,8 +140,7 @@ mkdir -p bin
 cd bin
 
 if (( !use_gtest )); then
-    if (( !use_gcc )); 
-    then
+    if (( !use_gcc && !use_clang )); then
         ICC_PATH="$(which icx 2> /dev/null)"
         ICPC_PATH="$(which icpx 2> /dev/null)"
         if [ -z "$ICC_PATH" ]
@@ -157,10 +160,21 @@ if (( !use_gtest )); then
             export CC=${ICC_PATH}
             export CXX=${ICPC_PATH}
         fi
+	elif (( use_clang )); then
+        CLANG_PATH="$(which clang 2> /dev/null)"
+        CLANGXX_PATH="$(which clang++ 2> /dev/null)"
+        if [ -z "$CLANG_PATH" ]
+        then
+            export CC="$(which gcc 2> /dev/null)"
+            export CXX="$(which g++ 2> /dev/null)"
+	    else
+            export CC=${CLANG_PATH}
+            export CXX=${CLANGXX_PATH}
+		fi
     else
         export CC="$(which gcc 2> /dev/null)"
         export CXX="$(which g++ 2> /dev/null)"
-    fi
+	fi
 else
     if (( use_mpi )); 
     then

From 7fa1e74d3de410c2acbe3b3d70cb21ffd91231f1 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 22 May 2023 20:37:54 -0400
Subject: [PATCH 07/42] Restore to development version

---
 src/CalculateEnergy.cpp | 1744 ---------------------------------------
 1 file changed, 1744 deletions(-)

diff --git a/src/CalculateEnergy.cpp b/src/CalculateEnergy.cpp
index 560b16ed3..42bf4a0d2 100644
--- a/src/CalculateEnergy.cpp
+++ b/src/CalculateEnergy.cpp
@@ -1,1747 +1,3 @@
-/*******************************************************************************
-GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
-Copyright (C) 2022 GOMC Group
-A copy of the MIT License can be found in License.txt
-along with this program, also can be found at
-<https://opensource.org/licenses/MIT>.
-********************************************************************************/
-#include "CalculateEnergy.h" //header for this
-
-#include <cassert>
-
-#include "BasicTypes.h" //uint
-#include "BoxDimensions.h"
-#include "BoxDimensionsNonOrth.h"
-#include "Coordinates.h"
-#include "EnergyTypes.h"          //Energy structs
-#include "EnsemblePreprocessor.h" //Flags
-#include "Ewald.h"                //for ewald calculation
-#include "EwaldCached.h"          //for ewald calculation
-#include "Forcefield.h"           //
-#include "GeomLib.h"
-#include "MoleculeKind.h"
-#include "MoleculeLookup.h"
-#include "NoEwald.h" //for ewald calculation
-#include "NumLib.h"
-#include "StaticVals.h" //For init
-#include "System.h"     //For init
-#include "TrialMol.h"
-#ifdef GOMC_CUDA
-#include "CalculateEnergyCUDAKernel.cuh"
-#include "CalculateForceCUDAKernel.cuh"
-#include "ConstantDefinitionsCUDAKernel.cuh"
-#endif
-#include "GOMCEventsProfile.h"
-#define NUMBER_OF_NEIGHBOR_CELL 27
-
-//
-//    CalculateEnergy.cpp
-//    Energy Calculation functions for Monte Carlo simulation
-//    Calculates using const references to a particular Simulation's members
-//    Brock Jackman Sep. 2013
-//
-//    Updated to use radial-based intermolecular pressure
-//    Jason Mick    Feb. 2014
-//
-
-using namespace geom;
-
-CalculateEnergy::CalculateEnergy(StaticVals &stat, System &sys)
-    : forcefield(stat.forcefield), mols(stat.mol),
-      currentCoords(sys.coordinates), currentCOM(sys.com),
-      lambdaRef(sys.lambdaRef), atomForceRef(sys.atomForceRef),
-      molForceRef(sys.molForceRef),
-#ifdef VARIABLE_PARTICLE_NUMBER
-      molLookup(sys.molLookup),
-#else
-      molLookup(stat.molLookup),
-#endif
-      currentAxes(sys.boxDimRef), cellList(sys.cellList) {
-}
-
-void CalculateEnergy::Init(System &sys) {
-  uint maxAtomInMol = 0;
-  calcEwald = sys.GetEwald();
-  electrostatic = forcefield.electrostatic;
-  ewald = forcefield.ewald;
-  multiParticleEnabled = sys.statV.multiParticleEnabled;
-  for (uint m = 0; m < mols.count; ++m) {
-    const MoleculeKind &molKind = mols.GetKind(m);
-    if (molKind.NumAtoms() > maxAtomInMol)
-      maxAtomInMol = molKind.NumAtoms();
-    for (uint a = 0; a < molKind.NumAtoms(); ++a) {
-      particleKind.push_back(molKind.AtomKind(a));
-      particleMol.push_back(m);
-      particleCharge.push_back(molKind.AtomCharge(a));
-      particleIndex.push_back(int(a));
-    }
-  }
-#ifdef GOMC_CUDA
-  InitCoordinatesCUDA(forcefield.particles->getCUDAVars(),
-                      currentCoords.Count(), maxAtomInMol, currentCOM.Count());
-#endif
-}
-
-SystemPotential CalculateEnergy::SystemTotal() {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
-  SystemPotential pot =
-      SystemInter(SystemPotential(), currentCoords, currentAxes);
-
-  // system intra
-  for (uint b = 0; b < BOX_TOTAL; ++b) {
-    GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTRA);
-    double bondEnergy[2] = {0};
-    double bondEn = 0.0, nonbondEn = 0.0, correction = 0.0;
-    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(b);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(b);
-    std::vector<uint> molID;
-
-    while (thisMol != end) {
-      molID.push_back(*thisMol);
-      ++thisMol;
-    }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) private(bondEnergy) shared(b, molID) \
-    reduction(+:bondEn, nonbondEn, correction)
-#endif
-    for (int i = 0; i < (int)molID.size(); i++) {
-      // calculate nonbonded energy
-      MoleculeIntra(molID[i], b, bondEnergy);
-      bondEn += bondEnergy[0];
-      nonbondEn += bondEnergy[1];
-      // calculate correction term of electrostatic interaction
-      correction += calcEwald->MolCorrection(molID[i], b);
-    }
-
-    pot.boxEnergy[b].intraBond = bondEn;
-    pot.boxEnergy[b].intraNonbond = nonbondEn;
-    // calculate self term of electrostatic interaction
-    pot.boxEnergy[b].self = calcEwald->BoxSelf(b);
-    pot.boxEnergy[b].correction = correction;
-
-    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTRA);
-    // Calculate Virial
-    pot.boxVirial[b] = VirialCalc(b);
-  }
-
-  pot.Total();
-
-  if (pot.totalEnergy.total > 1.0e12) {
-    std::cout << "\nWarning: Large energy detected due to the overlap in "
-                 "initial configuration.\n"
-                 "         The total energy will be recalculated at EqStep to "
-                 "ensure the accuracy \n"
-                 "         of the computed running energies.\n";
-  }
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_SYSTEM_TOTAL);
-  return pot;
-}
-
-SystemPotential CalculateEnergy::SystemInter(SystemPotential potential,
-                                             XYZArray const &coords,
-                                             BoxDimensions const &boxAxes) {
-  for (uint b = 0; b < BOXES_WITH_U_NB; ++b) {
-    // calculate LJ interaction and real term of electrostatic interaction
-    potential = BoxInter(potential, coords, boxAxes, b);
-    // calculate reciprocal term of electrostatic interaction
-    potential.boxEnergy[b].recip = calcEwald->BoxReciprocal(b, false);
-  }
-
-  potential.Total();
-
-  return potential;
-}
-
-// Calculate the inter energy for Box. Fractional molecule are not allowed in
-// this function. Need to implement the GPU function
-SystemPotential CalculateEnergy::BoxInter(SystemPotential potential,
-                                          XYZArray const &coords,
-                                          BoxDimensions const &boxAxes,
-                                          const uint box) {
-  // Handles reservoir box case, returning zeroed structure if
-  // interactions are off.
-  if (box >= BOXES_WITH_U_NB)
-    return potential;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_INTER);
-  double tempREn = 0.0, tempLJEn = 0.0;
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
-                               cellStartIndex, mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
-                      boxAxes.cellBasis[box].z);
-
-  if (!boxAxes.orthogonal[box]) {
-    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxInterGPU(forcefield.particles->getCUDAVars(), cellVector,
-                  cellStartIndex, neighborList, coords, boxAxes, electrostatic,
-                  particleCharge, particleKind, particleMol, tempREn, tempLJEn,
-                  forcefield.sc_coul, forcefield.sc_sigma_6,
-                  forcefield.sc_alpha, forcefield.sc_power, box);
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
-  cellVector, coords, mapParticleToCell, neighborList) \
-reduction(+:tempREn, tempLJEn) firstprivate(box, num::qqFact)
-#endif
-  // loop over all particles
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    // find the which cell currParticle belong to
-    int currCell = mapParticleToCell[currParticle];
-    // loop over currCell neighboring cells
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      // find the index of neighboring cell
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      // find the ending index in neighboring cell
-      int endIndex = cellStartIndex[neighborCell + 1];
-      // loop over particle inside neighboring cell
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        // avoid same particles and duplicate work
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virComponents;
-          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
-                             nParticle, box)) {
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj_fact = particleCharge[currParticle] *
-                                  particleCharge[nParticle] * num::qqFact;
-              if (qi_qj_fact != 0.0) {
-                tempREn += forcefield.particles->CalcCoulomb(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj_fact, lambdaCoulomb, box);
-              }
-            }
-            tempLJEn += forcefield.particles->CalcEn(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // setting energy and virial of LJ interaction
-  potential.boxEnergy[box].inter = tempLJEn;
-  // setting energy and virial of coulomb interaction
-  potential.boxEnergy[box].real = tempREn;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_INTER);
-  // set correction energy and virial
-  if (forcefield.useLRC) {
-    EnergyCorrection(potential, boxAxes, box);
-  }
-
-  potential.Total();
-  return potential;
-}
-
-SystemPotential
-CalculateEnergy::BoxForce(SystemPotential potential, XYZArray const &coords,
-                          XYZArray &atomForce, XYZArray &molForce,
-                          BoxDimensions const &boxAxes, const uint box) {
-  // Handles reservoir box case, returning zeroed structure if
-  // interactions are off.
-  if (box >= BOXES_WITH_U_NB)
-    return potential;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_FORCE);
-
-  double tempREn = 0.0, tempLJEn = 0.0;
-  // make a pointer to atom force and mol force for OpenMP
-  double *aForcex = atomForce.x;
-  double *aForcey = atomForce.y;
-  double *aForcez = atomForce.z;
-  double *mForcex = molForce.x;
-  double *mForcey = molForce.y;
-  double *mForcez = molForce.z;
-  int atomCount = atomForce.Count();
-  int molCount = molForce.Count();
-
-  // Reset Force Arrays
-  ResetForce(atomForce, molForce, box);
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, coords.Count(), cellVector, cellStartIndex,
-                               mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                      boxAxes.cellBasis[box].x, boxAxes.cellBasis[box].y,
-                      boxAxes.cellBasis[box].z);
-
-  if (!boxAxes.orthogonal[box]) {
-    // In this case, boxAxes is really an object of type BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&boxAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxForceGPU(forcefield.particles->getCUDAVars(), cellVector,
-                  cellStartIndex, neighborList, mapParticleToCell, coords,
-                  boxAxes, electrostatic, particleCharge, particleKind,
-                  particleMol, tempREn, tempLJEn, aForcex, aForcey, aForcez,
-                  mForcex, mForcey, mForcez, atomCount, molCount,
-                  forcefield.sc_coul, forcefield.sc_sigma_6,
-                  forcefield.sc_alpha, forcefield.sc_power, box);
-
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(boxAxes, cellStartIndex, \
-  cellVector, coords, mapParticleToCell, neighborList) \
-  firstprivate(box, atomCount, molCount, num::qqFact) \
-  reduction(+:tempREn, tempLJEn, aForcex[:atomCount], aForcey[:atomCount], \
-            aForcez[:atomCount], mForcex[:molCount], mForcey[:molCount], \
-            mForcez[:molCount])
-#endif
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    int currCell = mapParticleToCell[currParticle];
-
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      int endIndex = cellStartIndex[neighborCell + 1];
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virComponents, forceLJ, forceReal;
-          if (boxAxes.InRcut(distSq, virComponents, coords, currParticle,
-                             nParticle, box)) {
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj_fact = particleCharge[currParticle] *
-                                  particleCharge[nParticle] * num::qqFact;
-              if (qi_qj_fact != 0.0) {
-                tempREn += forcefield.particles->CalcCoulomb(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj_fact, lambdaCoulomb, box);
-                // Calculating the force
-                forceReal =
-                    virComponents * forcefield.particles->CalcCoulombVir(
-                                        distSq, particleKind[currParticle],
-                                        particleKind[nParticle], qi_qj_fact,
-                                        lambdaCoulomb, box);
-              }
-            }
-            tempLJEn += forcefield.particles->CalcEn(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-            forceLJ = virComponents * forcefield.particles->CalcVir(
-                                          distSq, particleKind[currParticle],
-                                          particleKind[nParticle], lambdaVDW);
-            aForcex[currParticle] += forceLJ.x + forceReal.x;
-            aForcey[currParticle] += forceLJ.y + forceReal.y;
-            aForcez[currParticle] += forceLJ.z + forceReal.z;
-            aForcex[nParticle] += -(forceLJ.x + forceReal.x);
-            aForcey[nParticle] += -(forceLJ.y + forceReal.y);
-            aForcez[nParticle] += -(forceLJ.z + forceReal.z);
-            mForcex[particleMol[currParticle]] += (forceLJ.x + forceReal.x);
-            mForcey[particleMol[currParticle]] += (forceLJ.y + forceReal.y);
-            mForcez[particleMol[currParticle]] += (forceLJ.z + forceReal.z);
-            mForcex[particleMol[nParticle]] += -(forceLJ.x + forceReal.x);
-            mForcey[particleMol[nParticle]] += -(forceLJ.y + forceReal.y);
-            mForcez[particleMol[nParticle]] += -(forceLJ.z + forceReal.z);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // setting energy and virial of LJ interaction
-  potential.boxEnergy[box].inter = tempLJEn;
-  // setting energy and virial of coulomb interaction
-  potential.boxEnergy[box].real = tempREn;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_FORCE);
-  return potential;
-}
-
-// NOTE: The calculation of W12, W13, and W23 is expensive and would not be
-// required for pressure and surface tension calculation. So, they have been
-// commented out. If you need to calculate them, uncomment them.
-Virial CalculateEnergy::VirialCalc(const uint box) {
-  // store virial and energy of reference and modify the virial
-  Virial tempVir;
-  // no need to calculate the virial for reservoir
-  if (box >= BOXES_WITH_U_NB)
-    return tempVir;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_BOX_VIRIAL);
-
-  // tensors for VDW and real part of electrostatic
-  double vT11 = 0.0, vT12 = 0.0, vT13 = 0.0;
-  double vT22 = 0.0, vT23 = 0.0, vT33 = 0.0;
-  double rT11 = 0.0, rT12 = 0.0, rT13 = 0.0;
-  double rT22 = 0.0, rT23 = 0.0, rT33 = 0.0;
-
-  std::vector<int> cellVector, cellStartIndex, mapParticleToCell;
-  std::vector<std::vector<int>> neighborList;
-  cellList.GetCellListNeighbor(box, currentCoords.Count(), cellVector,
-                               cellStartIndex, mapParticleToCell);
-  neighborList = cellList.GetNeighborList(box);
-
-#ifdef GOMC_CUDA
-  // update unitcell in GPU
-  UpdateCellBasisCUDA(
-      forcefield.particles->getCUDAVars(), box, currentAxes.cellBasis[box].x,
-      currentAxes.cellBasis[box].y, currentAxes.cellBasis[box].z);
-
-  if (!currentAxes.orthogonal[box]) {
-    // In this case, currentAxes is really an object of type
-    // BoxDimensionsNonOrth,
-    // so cast and copy the additional data to the GPU
-    const BoxDimensionsNonOrth *NonOrthAxes =
-        static_cast<const BoxDimensionsNonOrth *>(&currentAxes);
-    UpdateInvCellBasisCUDA(forcefield.particles->getCUDAVars(), box,
-                           NonOrthAxes->cellBasis_Inv[box].x,
-                           NonOrthAxes->cellBasis_Inv[box].y,
-                           NonOrthAxes->cellBasis_Inv[box].z);
-  }
-
-  CallBoxInterForceGPU(forcefield.particles->getCUDAVars(), cellVector,
-                       cellStartIndex, neighborList, mapParticleToCell,
-                       currentCoords, currentCOM, currentAxes, electrostatic,
-                       particleCharge, particleKind, particleMol, rT11, rT12,
-                       rT13, rT22, rT23, rT33, vT11, vT12, vT13, vT22, vT23,
-                       vT33, forcefield.sc_coul, forcefield.sc_sigma_6,
-                       forcefield.sc_alpha, forcefield.sc_power, box);
-#else
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(cellStartIndex, cellVector, \
-  mapParticleToCell, neighborList) firstprivate(box) \
-reduction(+:vT11, vT12, vT13, vT22, vT23, vT33, rT11, rT12, rT13, rT22, rT23, rT33)
-#endif
-  for (int currParticleIdx = 0; currParticleIdx < (int)cellVector.size();
-       currParticleIdx++) {
-    int currParticle = cellVector[currParticleIdx];
-    int currCell = mapParticleToCell[currParticle];
-
-    for (int nCellIndex = 0; nCellIndex < NUMBER_OF_NEIGHBOR_CELL;
-         nCellIndex++) {
-      int neighborCell = neighborList[currCell][nCellIndex];
-
-      int endIndex = cellStartIndex[neighborCell + 1];
-      for (int nParticleIndex = cellStartIndex[neighborCell];
-           nParticleIndex < endIndex; nParticleIndex++) {
-        int nParticle = cellVector[nParticleIndex];
-
-        // make sure the pairs are unique and they belong to different molecules
-        if (currParticle < nParticle &&
-            particleMol[currParticle] != particleMol[nParticle]) {
-          double distSq;
-          XYZ virC;
-          if (currentAxes.InRcut(distSq, virC, currentCoords, currParticle,
-                                 nParticle, box)) {
-            // calculate the distance between com of two molecules
-            XYZ comC = currentCOM.Difference(particleMol[currParticle],
-                                             particleMol[nParticle]);
-            // calculate the minimum image between com of two molecules
-            comC = currentAxes.MinImage(comC, box);
-            double lambdaVDW = GetLambdaVDW(particleMol[currParticle],
-                                            particleMol[nParticle], box);
-
-            if (electrostatic) {
-              double lambdaCoulomb = GetLambdaCoulomb(
-                  particleMol[currParticle], particleMol[nParticle], box);
-              double qi_qj =
-                  particleCharge[currParticle] * particleCharge[nParticle];
-
-              // skip particle pairs with no charge
-              if (qi_qj != 0.0) {
-                double pRF = forcefield.particles->CalcCoulombVir(
-                    distSq, particleKind[currParticle], particleKind[nParticle],
-                    qi_qj, lambdaCoulomb, box);
-                // calculate the top diagonal of pressure tensor
-                rT11 += pRF * (virC.x * comC.x);
-                // rT12 += pRF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
-                // rT13 += pRF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
-
-                rT22 += pRF * (virC.y * comC.y);
-                // rT23 += pRF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
-
-                rT33 += pRF * (virC.z * comC.z);
-              }
-            }
-
-            double pVF = forcefield.particles->CalcVir(
-                distSq, particleKind[currParticle], particleKind[nParticle],
-                lambdaVDW);
-            // calculate the top diagonal of pressure tensor
-            vT11 += pVF * (virC.x * comC.x);
-            // vT12 += pVF * (0.5 * (virC.x * comC.y + virC.y * comC.x));
-            // vT13 += pVF * (0.5 * (virC.x * comC.z + virC.z * comC.x));
-
-            vT22 += pVF * (virC.y * comC.y);
-            // vT23 += pVF * (0.5 * (virC.y * comC.z + virC.z * comC.y));
-
-            vT33 += pVF * (virC.z * comC.z);
-          }
-        }
-      }
-    }
-  }
-#endif
-
-  // set the all tensor values
-  tempVir.interTens[0][0] = vT11;
-  tempVir.interTens[0][1] = vT12;
-  tempVir.interTens[0][2] = vT13;
-
-  tempVir.interTens[1][0] = vT12;
-  tempVir.interTens[1][1] = vT22;
-  tempVir.interTens[1][2] = vT23;
-
-  tempVir.interTens[2][0] = vT13;
-  tempVir.interTens[2][1] = vT23;
-  tempVir.interTens[2][2] = vT33;
-
-  if (electrostatic) {
-    // real part of electrostatic
-    tempVir.realTens[0][0] = rT11 * num::qqFact;
-    tempVir.realTens[0][1] = rT12 * num::qqFact;
-    tempVir.realTens[0][2] = rT13 * num::qqFact;
-
-    tempVir.realTens[1][0] = rT12 * num::qqFact;
-    tempVir.realTens[1][1] = rT22 * num::qqFact;
-    tempVir.realTens[1][2] = rT23 * num::qqFact;
-
-    tempVir.realTens[2][0] = rT13 * num::qqFact;
-    tempVir.realTens[2][1] = rT23 * num::qqFact;
-    tempVir.realTens[2][2] = rT33 * num::qqFact;
-  }
-
-  // setting virial of LJ
-  tempVir.inter = vT11 + vT22 + vT33;
-  // setting virial of coulomb
-  tempVir.real = (rT11 + rT22 + rT33) * num::qqFact;
-
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_BOX_VIRIAL);
-
-  if (forcefield.useLRC || forcefield.useIPC) {
-    VirialCorrection(tempVir, currentAxes, box);
-  }
-
-  // calculate reciprocal term of force
-  tempVir = calcEwald->VirialReciprocal(tempVir, box);
-
-  tempVir.Total();
-  return tempVir;
-}
-
-bool CalculateEnergy::MoleculeInter(Intermolecular &inter_LJ,
-                                    Intermolecular &inter_coulomb,
-                                    XYZArray const &molCoords,
-                                    const uint molIndex, const uint box) const {
-  double tempREn = 0.0, tempLJEn = 0.0;
-  bool overlap = false;
-
-  if (box < BOXES_WITH_U_NB) {
-    GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTER);
-    uint length = mols.GetKind(molIndex).NumAtoms();
-    uint start = mols.MolStart(molIndex);
-
-    for (uint p = 0; p < length; ++p) {
-      uint atom = start + p;
-      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-      std::vector<uint> nIndex;
-      // store atom index in neighboring cell
-      while (!n.Done()) {
-        nIndex.push_back(*n);
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex) \
-firstprivate(atom, box, molIndex, num::qqFact) reduction(+:tempREn, tempLJEn)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        // Subtract old energy
-        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                               nIndex[i], box)) {
-          double lambdaVDW =
-              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-          if (electrostatic) {
-            double lambdaCoulomb =
-                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              tempREn += -forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaCoulomb, box);
-            }
-          }
-
-          tempLJEn += -forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
-        }
-      }
-
-      // add new energy
-      n = cellList.EnumerateLocal(molCoords[p], box);
-      // store atom index in neighboring cell
-      nIndex.clear();
-      while (!n.Done()) {
-        nIndex.push_back(*n);
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(molCoords, nIndex, overlap) \firstprivate(atom, box, molIndex, p, num::qqFact) reduction(+:tempREn, tempLJEn)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        if (currentAxes.InRcut(distSq, virComponents, molCoords, p,
-                               currentCoords, nIndex[i], box)) {
-          double lambdaVDW =
-              GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-          if (distSq < forcefield.rCutLowSq) {
-            overlap |= true;
-          }
-
-          if (electrostatic) {
-            double lambdaCoulomb =
-                GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              tempREn += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaCoulomb, box);
-            }
-          }
-
-          tempLJEn += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]], lambdaVDW);
-         }
-      }
-    }
-    GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTER);
-  }
-
-  inter_LJ.energy = tempLJEn;
-  inter_coulomb.energy = tempREn;
-  return overlap;
-}
-
-// Calculate 1-N nonbonded intra energy
-void CalculateEnergy::ParticleNonbonded(double *inter,
-                                        cbmc::TrialMol const &trialMol,
-                                        XYZArray const &trialPos,
-                                        const uint partIndex, const uint box,
-                                        const uint trials) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
-  const MoleculeKind &kind = trialMol.GetKind();
-  // loop over all partners of the trial particle
-  const uint *partner = kind.sortedNB.Begin(partIndex);
-  const uint *end = kind.sortedNB.End(partIndex);
-  while (partner != end) {
-    if (trialMol.AtomExists(*partner)) {
-      for (uint t = 0; t < trials; ++t) {
-        double distSq;
-        if (currentAxes.InRcut(distSq, trialPos, t, trialMol.GetCoords(),
-                               *partner, box)) {
-          inter[t] += forcefield.particles->CalcEn(
-              distSq, kind.AtomKind(partIndex), kind.AtomKind(*partner), 1.0);
-          if (electrostatic) {
-            double qi_qj_fact = kind.AtomCharge(partIndex) *
-                                kind.AtomCharge(*partner) * num::qqFact;
-
-            if (qi_qj_fact != 0.0) {
-              forcefield.particles->CalcCoulombAdd_1_4(inter[t], distSq,
-                                                       qi_qj_fact, true);
-            }
-          }
-        }
-      }
-    }
-    ++partner;
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTRA_NB);
-}
-
-void CalculateEnergy::ParticleInter(double *en, double *real,
-                                    XYZArray const &trialPos, bool *overlap,
-                                    const uint partIndex, const uint molIndex,
-                                    const uint box, const uint trials) const {
-  if (box >= BOXES_WITH_U_NB)
-    return;
-
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_CBMC_INTER);
-  double tempLJ, tempReal;
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kindI = thisKind.AtomKind(partIndex);
-  double kindICharge = thisKind.AtomCharge(partIndex);
-  std::vector<uint> nIndex;
-
-  for (uint t = 0; t < trials; ++t) {
-    nIndex.clear();
-    tempReal = 0.0;
-    tempLJ = 0.0;
-    CellList::Neighbors n = cellList.EnumerateLocal(trialPos[t], box);
-    while (!n.Done()) {
-      nIndex.push_back(*n);
-      n.Next();
-    }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex, overlap, trialPos) \
-firstprivate(kindICharge, kindI, t, box, molIndex, num::qqFact) \
-reduction(+:tempLJ, tempReal)
-#endif
-    for (int i = 0; i < (int)nIndex.size(); i++) {
-      double distSq = 0.0;
-      if (currentAxes.InRcut(distSq, trialPos, t, currentCoords, nIndex[i],
-                             box)) {
-        double lambdaVDW = GetLambdaVDW(molIndex, particleMol[nIndex[i]], box);
-
-        if (distSq < forcefield.rCutLowSq) {
-          overlap[t] |= true;
-        }
-        tempLJ += forcefield.particles->CalcEn(
-            distSq, kindI, particleKind[nIndex[i]], lambdaVDW);
-        if (electrostatic) {
-          double lambdaCoulomb =
-              GetLambdaCoulomb(molIndex, particleMol[nIndex[i]], box);
-          double qi_qj_fact =
-              particleCharge[nIndex[i]] * kindICharge * num::qqFact;
-
-          if (qi_qj_fact != 0.0) {
-            tempReal += forcefield.particles->CalcCoulomb(
-                distSq, kindI, particleKind[nIndex[i]], qi_qj_fact,
-                lambdaCoulomb, box);
-          }
-        }
-      }
-    }
-    en[t] += tempLJ;
-    real[t] += tempReal;
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_CBMC_INTER);
-}
-
-// Calculates the change in the TC from adding numChange atoms of a kind
-Intermolecular CalculateEnergy::MoleculeTailChange(const uint box,
-                                                   const uint kind,
-                                                   const bool add) const {
-  Intermolecular delta;
-
-  if (box < BOXES_WITH_U_NB) {
-    double sign = (add ? 1.0 : -1.0);
-    uint mkIdxII = kind * mols.GetKindsCount() + kind;
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
-      double rhoDeltaIJ_2 = sign * 2.0 *
-                            (double)(molLookup.NumKindInBox(j, box)) *
-                            currentAxes.volInv[box];
-      delta.energy += mols.pairEnCorrections[mkIdxIJ] * rhoDeltaIJ_2;
-    }
-
-    // We already calculated part of the change for this type in the loop
-    delta.energy += mols.pairEnCorrections[mkIdxII] * currentAxes.volInv[box];
-  }
-  return delta;
-}
-
-// Calculates the change in the Virial TC from adding numChange atoms of a kind
-Intermolecular CalculateEnergy::MoleculeTailVirChange(const uint box,
-                                                      const uint kind,
-                                                      const bool add) const {
-  Intermolecular delta;
-
-  if (box < BOXES_WITH_U_NB) {
-    double sign = (add ? 1.0 : -1.0);
-    uint mkIdxII = kind * mols.GetKindsCount() + kind;
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint mkIdxIJ = j * mols.GetKindsCount() + kind;
-      double rhoDeltaIJ_2 = sign * 2.0 *
-                            (double)(molLookup.NumKindInBox(j, box)) *
-                            currentAxes.volInv[box];
-      delta.virial += mols.pairVirCorrections[mkIdxIJ] * rhoDeltaIJ_2;
-    }
-
-    // We already calculated part of the change for this type in the loop
-    delta.virial += mols.pairVirCorrections[mkIdxII] * currentAxes.volInv[box];
-  }
-  return delta;
-}
-
-// Calculates intramolecular energy of a full molecule
-void CalculateEnergy::MoleculeIntra(const uint molIndex, const uint box,
-                                    double *bondEn) const {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
-  bondEn[0] = 0.0, bondEn[1] = 0.0;
-
-  MoleculeKind &molKind = mols.kinds[mols.kIndex[molIndex]];
-  // *2 because we'll be storing inverse bond vectors
-  XYZArray bondVec(molKind.bondList.count * 2);
-
-  BondVectors(bondVec, molKind, molIndex, box);
-  MolBond(bondEn[0], molKind, bondVec, molIndex, box);
-  MolAngle(bondEn[0], molKind, bondVec, box);
-  MolDihedral(bondEn[0], molKind, bondVec, box);
-  MolNonbond(bondEn[1], molKind, molIndex, box);
-  MolNonbond_1_4(bondEn[1], molKind, molIndex, box);
-  MolNonbond_1_3(bondEn[1], molKind, molIndex, box);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
-}
-
-// used in molecule exchange for calculating bonded and intraNonbonded energy
-Energy CalculateEnergy::MoleculeIntra(cbmc::TrialMol const &mol) const {
-  GOMC_EVENT_START(1, GomcProfileEvent::EN_MOL_INTRA);
-  double bondEn = 0.0, intraNonbondEn = 0.0;
-  // *2 because we'll be storing inverse bond vectors
-  const MoleculeKind &molKind = mol.GetKind();
-  uint count = molKind.bondList.count;
-  XYZArray bondVec(count * 2);
-  std::vector<bool> bondExist(count * 2, false);
-
-  BondVectors(bondVec, mol, bondExist, molKind);
-  MolBond(bondEn, mol, bondVec, bondExist, molKind);
-  MolAngle(bondEn, mol, bondVec, bondExist, molKind);
-  MolDihedral(bondEn, mol, bondVec, bondExist, molKind);
-  MolNonbond(intraNonbondEn, mol, molKind);
-  MolNonbond_1_4(intraNonbondEn, mol, molKind);
-  MolNonbond_1_3(intraNonbondEn, mol, molKind);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::EN_MOL_INTRA);
-  return Energy(bondEn, intraNonbondEn, 0.0, 0.0, 0.0, 0.0, 0.0);
-}
-
-void CalculateEnergy::BondVectors(XYZArray &vecs, MoleculeKind const &molKind,
-                                  const uint molIndex, const uint box) const {
-  for (uint i = 0; i < molKind.bondList.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.bondList.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.bondList.part2[i];
-    XYZ dist = currentCoords.Difference(p2, p1);
-    dist = currentAxes.MinImage(dist, box);
-
-    // store inverse vectors at i+count
-    vecs.Set(i, dist);
-    vecs.Set(i + molKind.bondList.count, -dist.x, -dist.y, -dist.z);
-  }
-}
-
-void CalculateEnergy::BondVectors(XYZArray &vecs, cbmc::TrialMol const &mol,
-                                  std::vector<bool> &bondExist,
-                                  MoleculeKind const &molKind) const {
-  uint box = mol.GetBox();
-  uint count = molKind.bondList.count;
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.bondList.part1[i];
-    uint p2 = molKind.bondList.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      bondExist[i] = true;
-      bondExist[i + count] = true;
-      XYZ dist = mol.GetCoords().Difference(p2, p1);
-      dist = currentAxes.MinImage(dist, box);
-      // store inverse vectors at i+count
-      vecs.Set(i, dist);
-      vecs.Set(i + count, -dist.x, -dist.y, -dist.z);
-    }
-  }
-}
-
-void CalculateEnergy::MolBond(double &energy, MoleculeKind const &molKind,
-                              XYZArray const &vecs, const uint molIndex,
-                              const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  for (uint b = 0; b < molKind.bondList.count; ++b) {
-    double molLength = vecs.Get(b).Length();
-    energy += forcefield.bonds.Calc(molKind.bondList.kinds[b], molLength);
-    /*if(std::abs(molLength - eqLength) > 0.02) {
-      uint p1 = molKind.bondList.part1[b];
-      uint p2 = molKind.bondList.part2[b];
-      double eqLength = forcefield.bonds.Length(molKind.bondList.kinds[b]);
-      printf("Warning: Box%d, %6d %4s,", box, molIndex, molKind.name.c_str());
-      printf("%3s-%-3s bond: Par-file ", molKind.atomNames[p1].c_str(),
-          molKind.atomNames[p2].c_str());
-      printf("%2.3f A, PDB file %2.3f A!\n", eqLength, molLength);
-    }*/
-  }
-}
-
-void CalculateEnergy::MolBond(double &energy, cbmc::TrialMol const &mol,
-                              XYZArray const &vecs,
-                              std::vector<bool> const &bondExist,
-                              MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.bondList.count;
-  for (uint b = 0; b < count; ++b) {
-    if (bondExist[b]) {
-      energy += forcefield.bonds.Calc(molKind.bondList.kinds[b],
-                                      vecs.Get(b).Length());
-    }
-  }
-}
-
-void CalculateEnergy::MolAngle(double &energy, MoleculeKind const &molKind,
-                               XYZArray const &vecs, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-  for (uint a = 0; a < molKind.angles.Count(); ++a) {
-    // Note: need to reverse the second bond to get angle properly.
-    double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
-                         -vecs.Get(molKind.angles.GetBond(a, 1)));
-    energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
-  }
-}
-
-void CalculateEnergy::MolAngle(double &energy, cbmc::TrialMol const &mol,
-                               XYZArray const &vecs,
-                               std::vector<bool> const &bondExist,
-                               MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.angles.Count();
-  for (uint a = 0; a < count; ++a) {
-    if (bondExist[molKind.angles.GetBond(a, 0)] &&
-        bondExist[molKind.angles.GetBond(a, 1)]) {
-      // Note: need to reverse the second bond to get angle properly.
-      double theta = Theta(vecs.Get(molKind.angles.GetBond(a, 0)),
-                           -vecs.Get(molKind.angles.GetBond(a, 1)));
-      energy += forcefield.angles->Calc(molKind.angles.GetKind(a), theta);
-    }
-  }
-}
-
-void CalculateEnergy::MolDihedral(double &energy, MoleculeKind const &molKind,
-                                  XYZArray const &vecs, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-  for (uint d = 0; d < molKind.dihedrals.Count(); ++d) {
-    double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
-                     vecs.Get(molKind.dihedrals.GetBond(d, 1)),
-                     vecs.Get(molKind.dihedrals.GetBond(d, 2)));
-    energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
-  }
-}
-
-void CalculateEnergy::MolDihedral(double &energy, cbmc::TrialMol const &mol,
-                                  XYZArray const &vecs,
-                                  std::vector<bool> const &bondExist,
-                                  MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  uint count = molKind.dihedrals.Count();
-  for (uint d = 0; d < count; ++d) {
-    if (bondExist[molKind.dihedrals.GetBond(d, 0)] &&
-        bondExist[molKind.dihedrals.GetBond(d, 1)] &&
-        bondExist[molKind.dihedrals.GetBond(d, 2)]) {
-      double phi = Phi(vecs.Get(molKind.dihedrals.GetBond(d, 0)),
-                       vecs.Get(molKind.dihedrals.GetBond(d, 1)),
-                       vecs.Get(molKind.dihedrals.GetBond(d, 2)));
-      energy += forcefield.dihedrals.Calc(molKind.dihedrals.GetKind(d), phi);
-    }
-  }
-}
-
-// Calculate 1-N nonbonded intra energy
-void CalculateEnergy::MolNonbond(double &energy, MoleculeKind const &molKind,
-                                 const uint molIndex, const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      energy += forcefield.particles->CalcEn(
-          distSq, molKind.AtomKind(molKind.nonBonded.part1[i]),
-          molKind.AtomKind(molKind.nonBonded.part2[i]), 1.0);
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   true);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-N nonbonded intra energy using pos
-void CalculateEnergy::MolNonbond(double &energy, cbmc::TrialMol const &mol,
-                                 MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded.part1[i];
-    uint p2 = molKind.nonBonded.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        energy += forcefield.particles->CalcEn(distSq, molKind.AtomKind(p1),
-                                               molKind.AtomKind(p2), 1.0);
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     true);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-4 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_4(double &energy,
-                                     MoleculeKind const &molKind,
-                                     const uint molIndex,
-                                     const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded_1_4.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_4.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_4.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      forcefield.particles->CalcAdd_1_4(
-          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_4.part1[i]),
-          molKind.AtomKind(molKind.nonBonded_1_4.part2[i]));
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded_1_4.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded_1_4.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   false);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-4 nonbonded intra energy using pos
-void CalculateEnergy::MolNonbond_1_4(double &energy, cbmc::TrialMol const &mol,
-                                     MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded_1_4.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded_1_4.part1[i];
-    uint p2 = molKind.nonBonded_1_4.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
-                                          molKind.AtomKind(p2));
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     false);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_3(double &energy,
-                                     MoleculeKind const &molKind,
-                                     const uint molIndex,
-                                     const uint box) const {
-  if (box >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-
-  for (uint i = 0; i < molKind.nonBonded_1_3.count; ++i) {
-    uint p1 = mols.start[molIndex] + molKind.nonBonded_1_3.part1[i];
-    uint p2 = mols.start[molIndex] + molKind.nonBonded_1_3.part2[i];
-    if (currentAxes.InRcut(distSq, currentCoords, p1, p2, box)) {
-      forcefield.particles->CalcAdd_1_4(
-          energy, distSq, molKind.AtomKind(molKind.nonBonded_1_3.part1[i]),
-          molKind.AtomKind(molKind.nonBonded_1_3.part2[i]));
-      if (electrostatic) {
-        qi_qj_fact = num::qqFact *
-                     molKind.AtomCharge(molKind.nonBonded_1_3.part1[i]) *
-                     molKind.AtomCharge(molKind.nonBonded_1_3.part2[i]);
-
-        if (qi_qj_fact != 0.0) {
-          forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                   false);
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-void CalculateEnergy::MolNonbond_1_3(double &energy, cbmc::TrialMol const &mol,
-                                     MoleculeKind const &molKind) const {
-  if (mol.GetBox() >= BOXES_WITH_U_B)
-    return;
-
-  double distSq;
-  double qi_qj_fact;
-  uint count = molKind.nonBonded_1_3.count;
-
-  for (uint i = 0; i < count; ++i) {
-    uint p1 = molKind.nonBonded_1_3.part1[i];
-    uint p2 = molKind.nonBonded_1_3.part2[i];
-    if (mol.AtomExists(p1) && mol.AtomExists(p2)) {
-      if (currentAxes.InRcut(distSq, mol.GetCoords(), p1, p2, mol.GetBox())) {
-        forcefield.particles->CalcAdd_1_4(energy, distSq, molKind.AtomKind(p1),
-                                          molKind.AtomKind(p2));
-        if (electrostatic) {
-          qi_qj_fact =
-              num::qqFact * molKind.AtomCharge(p1) * molKind.AtomCharge(p2);
-
-          if (qi_qj_fact != 0.0) {
-            forcefield.particles->CalcCoulombAdd_1_4(energy, distSq, qi_qj_fact,
-                                                     false);
-          }
-        }
-      }
-    }
-  }
-}
-
-// Calculate 1-3 nonbonded intra energy
-double CalculateEnergy::IntraEnergy_1_3(const double distSq, const uint atom1,
-                                        const uint atom2,
-                                        const uint molIndex) const {
-  if (!forcefield.OneThree)
-    return 0.0;
-
-  double eng = 0.0;
-
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kind1 = thisKind.AtomKind(atom1);
-  uint kind2 = thisKind.AtomKind(atom2);
-
-  if (electrostatic) {
-    double qi_qj_fact =
-        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
-
-    if (qi_qj_fact != 0.0) {
-      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
-    }
-  }
-  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
-
-  if (std::isnan(eng))
-    eng = num::BIGNUM;
-
-  return eng;
-}
-
-// Calculate 1-4 nonbonded intra energy
-double CalculateEnergy::IntraEnergy_1_4(const double distSq, const uint atom1,
-                                        const uint atom2,
-                                        const uint molIndex) const {
-  if (!forcefield.OneFour)
-    return 0.0;
-
-  double eng = 0.0;
-
-  MoleculeKind const &thisKind = mols.GetKind(molIndex);
-  uint kind1 = thisKind.AtomKind(atom1);
-  uint kind2 = thisKind.AtomKind(atom2);
-
-  if (electrostatic) {
-    double qi_qj_fact =
-        num::qqFact * thisKind.AtomCharge(atom1) * thisKind.AtomCharge(atom2);
-
-    if (qi_qj_fact != 0.0) {
-      forcefield.particles->CalcCoulombAdd_1_4(eng, distSq, qi_qj_fact, false);
-    }
-  }
-  forcefield.particles->CalcAdd_1_4(eng, distSq, kind1, kind2);
-
-  if (std::isnan(eng))
-    eng = num::BIGNUM;
-
-  return eng;
-}
-
-//! Calculates energy tail corrections for the box
-void CalculateEnergy::EnergyCorrection(SystemPotential &pot,
-                                       BoxDimensions const &boxAxes,
-                                       const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-
-  double en = 0.0;
-  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-    uint numI = molLookup.NumKindInBox(i, box);
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint numJ = molLookup.NumKindInBox(j, box);
-      en += mols.pairEnCorrections[i * mols.GetKindsCount() + j] * numI * numJ *
-            boxAxes.volInv[box];
-    }
-  }
-
-  if (!forcefield.freeEnergy) {
-    pot.boxEnergy[box].tailCorrection = en;
-  }
-#if ENSEMBLE == NVT || ENSEMBLE == NPT
-  else {
-    // Get the kind and lambda value
-    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
-    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
-    // remove the LRC for one molecule with lambda = 1
-    en += MoleculeTailChange(box, fk, false).energy;
-
-    // Add the LRC for fractional molecule
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
-            rhoDeltaIJ_2;
-    }
-    // We already calculated part of the change for this type in the loop
-    en += lambdaVDW * mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-          currentAxes.volInv[box];
-    pot.boxEnergy[box].tailCorrection = en;
-  }
-#endif
-}
-
-//! Calculates energy corrections for the box
-double CalculateEnergy::EnergyCorrection(const uint box,
-                                         const uint *kCount) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return 0.0;
-  }
-
-  double tailCorrection = 0.0;
-  for (uint i = 0; i < mols.kindsCount; ++i) {
-    for (uint j = 0; j < mols.kindsCount; ++j) {
-      tailCorrection += mols.pairEnCorrections[i * mols.kindsCount + j] *
-                        kCount[i] * kCount[j] * currentAxes.volInv[box];
-    }
-  }
-  return tailCorrection;
-}
-
-void CalculateEnergy::VirialCorrection(Virial &virial,
-                                       BoxDimensions const &boxAxes,
-                                       const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-  double vir = 0.0;
-
-  for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-    uint numI = molLookup.NumKindInBox(i, box);
-    for (uint j = 0; j < mols.GetKindsCount(); ++j) {
-      uint numJ = molLookup.NumKindInBox(j, box);
-      vir += mols.pairVirCorrections[i * mols.GetKindsCount() + j] * numI *
-             numJ * boxAxes.volInv[box];
-    }
-  }
-
-  if (!forcefield.freeEnergy) {
-    virial.tailCorrection = vir;
-  }
-#if ENSEMBLE == NVT || ENSEMBLE == NPT
-  else {
-    // Get the kind and lambda value
-    uint fk = mols.GetMolKind(lambdaRef.GetMolIndex(box));
-    double lambdaVDW = lambdaRef.GetLambdaVDW(lambdaRef.GetMolIndex(box), box);
-    // remove the LRC for one molecule with lambda = 1
-    vir += MoleculeTailVirChange(box, fk, false).virial;
-
-    // Add the LRC for fractional molecule
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      vir += lambdaVDW *
-             mols.pairVirCorrections[fk * mols.GetKindsCount() + i] *
-             rhoDeltaIJ_2;
-    }
-    // We already calculated part of the change for this type in the loop
-    vir += lambdaVDW * mols.pairVirCorrections[fk * mols.GetKindsCount() + fk] *
-           currentAxes.volInv[box];
-    virial.tailCorrection = vir;
-  }
-#endif
-}
-
-//! Calculate Torque
-void CalculateEnergy::CalculateTorque(std::vector<uint> &moleculeIndex,
-                                      XYZArray const &coordinates,
-                                      XYZArray const &com,
-                                      XYZArray const &atomForce,
-                                      XYZArray const &atomForceRec,
-                                      XYZArray &molTorque, const uint box) {
-  if (multiParticleEnabled && (box < BOXES_WITH_U_NB)) {
-    GOMC_EVENT_START(1, GomcProfileEvent::BOX_TORQUE);
-    // make a pointer to mol torque for OpenMP
-    double *torquex = molTorque.x;
-    double *torquey = molTorque.y;
-    double *torquez = molTorque.z;
-
-#if defined _OPENMP
-#pragma omp parallel for default(none)                                         \
-    shared(atomForce, atomForceRec, com, coordinates, moleculeIndex, torquex,  \
-           torquey, torquez) firstprivate(box)
-#endif
-    for (int m = 0; m < (int)moleculeIndex.size(); m++) {
-      int mIndex = moleculeIndex[m];
-      int length = mols.GetKind(mIndex).NumAtoms();
-      int start = mols.MolStart(mIndex);
-      double tx = 0.0;
-      double ty = 0.0;
-      double tz = 0.0;
-      // atom iterator
-      for (int p = start; p < start + length; p++) {
-        XYZ distFromCOM = coordinates.Difference(p, com, mIndex);
-        distFromCOM = currentAxes.MinImage(distFromCOM, box);
-        XYZ tempTorque = Cross(distFromCOM, atomForce[p] + atomForceRec[p]);
-
-        tx += tempTorque.x;
-        ty += tempTorque.y;
-        tz += tempTorque.z;
-      }
-      torquex[mIndex] = tx;
-      torquey[mIndex] = ty;
-      torquez[mIndex] = tz;
-    }
-  }
-  GOMC_EVENT_STOP(1, GomcProfileEvent::BOX_TORQUE);
-}
-
-void CalculateEnergy::ResetForce(XYZArray &atomForce, XYZArray &molForce,
-                                 uint box) {
-  if (multiParticleEnabled) {
-    uint length, start;
-
-    // molecule iterator
-    MoleculeLookup::box_iterator thisMol = molLookup.BoxBegin(box);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
-
-    while (thisMol != end) {
-      length = mols.GetKind(*thisMol).NumAtoms();
-      start = mols.MolStart(*thisMol);
-
-      molForce.Set(*thisMol, 0.0, 0.0, 0.0);
-      for (uint p = start; p < start + length; p++) {
-        atomForce.Set(p, 0.0, 0.0, 0.0);
-      }
-      thisMol++;
-    }
-  }
-}
-
-uint CalculateEnergy::NumberOfParticlesInsideBox(uint box) {
-  uint numberOfAtoms = 0;
-
-  for (int k = 0; k < (int)mols.GetKindsCount(); k++) {
-    MoleculeKind const &thisKind = mols.kinds[k];
-    numberOfAtoms += thisKind.NumAtoms() * molLookup.NumKindInBox(k, box);
-  }
-
-  return numberOfAtoms;
-}
-
-bool CalculateEnergy::FindMolInCavity(std::vector<std::vector<uint>> &mol,
-                                      const XYZ &center, const XYZ &cavDim,
-                                      const XYZArray &invCav, const uint box,
-                                      const uint kind, const uint exRatio) {
-  uint k;
-  mol.clear();
-  mol.resize(molLookup.GetNumKind());
-  double maxLength = cavDim.Max();
-
-  if (maxLength <= currentAxes.rCut[box]) {
-    CellList::Neighbors n = cellList.EnumerateLocal(center, box);
-    while (!n.Done()) {
-      if (currentAxes.InCavity(currentCOM.Get(particleMol[*n]), center, cavDim,
-                               invCav, box)) {
-        uint molIndex = particleMol[*n];
-        // if molecule can be transfer between boxes
-        if (!molLookup.IsNoSwap(molIndex)) {
-          k = mols.GetMolKind(molIndex);
-          bool exist =
-              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
-          if (!exist)
-            mol[k].push_back(molIndex);
-        }
-      }
-      n.Next();
-    }
-  } else {
-    MoleculeLookup::box_iterator n = molLookup.BoxBegin(box);
-    MoleculeLookup::box_iterator end = molLookup.BoxEnd(box);
-    while (n != end) {
-      if (currentAxes.InCavity(currentCOM.Get(*n), center, cavDim, invCav,
-                               box)) {
-        uint molIndex = *n;
-        // if molecule can be transfer between boxes
-        if (!molLookup.IsNoSwap(molIndex)) {
-          k = mols.GetMolKind(molIndex);
-          bool exist =
-              std::find(mol[k].begin(), mol[k].end(), molIndex) != mol[k].end();
-          if (!exist)
-            mol[k].push_back(molIndex);
-        }
-      }
-      n++;
-    }
-  }
-
-  // If the is exRate and more molecule kind in cavity, return true.
-  if (mol[kind].size() >= exRatio)
-    return true;
-  else
-    return false;
-}
-
-void CalculateEnergy::SingleMoleculeInter(
-    Energy &interEnOld, Energy &interEnNew, const double lambdaOldVDW,
-    const double lambdaNewVDW, const double lambdaOldCoulomb,
-    const double lambdaNewCoulomb, const uint molIndex, const uint box) const {
-  double tempREnOld = 0.0, tempLJEnOld = 0.0;
-  double tempREnNew = 0.0, tempLJEnNew = 0.0;
-  if (box < BOXES_WITH_U_NB) {
-    uint length = mols.GetKind(molIndex).NumAtoms();
-    uint start = mols.MolStart(molIndex);
-
-    for (uint p = 0; p < length; ++p) {
-      uint atom = start + p;
-      CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-      std::vector<uint> nIndex;
-      // store atom index in neighboring cell
-      while (!n.Done()) {
-        if (particleMol[*n] != (int)molIndex) {
-          nIndex.push_back(*n);
-        }
-        n.Next();
-      }
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) shared(nIndex) \
-firstprivate(atom, box, lambdaNewCoulomb, lambdaOldCoulomb, lambdaOldVDW, \
-lambdaNewVDW, num::qqFact) reduction(+:tempREnOld, tempLJEnOld, tempREnNew, \
-tempLJEnNew)
-#endif
-      for (int i = 0; i < (int)nIndex.size(); i++) {
-        double distSq = 0.0;
-        XYZ virComponents;
-        if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                               nIndex[i], box)) {
-          if (electrostatic) {
-            double qi_qj_fact =
-                particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-            if (qi_qj_fact != 0.0) {
-              tempREnNew += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaNewCoulomb, box);
-              tempREnOld += forcefield.particles->CalcCoulomb(
-                  distSq, particleKind[atom], particleKind[nIndex[i]],
-                  qi_qj_fact, lambdaOldCoulomb, box);
-            }
-          }
-
-          tempLJEnNew += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambdaNewVDW);
-          tempLJEnOld += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambdaOldVDW);
-        }
-      }
-    }
-  }
-
-  interEnNew.inter = tempLJEnNew;
-  interEnNew.real = tempREnNew;
-  interEnOld.inter = tempLJEnOld;
-  interEnOld.real = tempREnOld;
-}
-
-double CalculateEnergy::GetLambdaVDW(uint molA, uint molB, uint box) const {
-  double lambda = 1.0;
-  lambda *= lambdaRef.GetLambdaVDW(molA, box);
-  lambda *= lambdaRef.GetLambdaVDW(molB, box);
-  return lambda;
-}
-
-double CalculateEnergy::GetLambdaCoulomb(uint molA, uint molB, uint box) const {
-  double lambda = 1.0;
-  lambda *= lambdaRef.GetLambdaCoulomb(molA, box);
-  lambda *= lambdaRef.GetLambdaCoulomb(molB, box);
-  // no need for sq root for inter energy. Always one of the molecules has
-  // lambda 1
-  return lambda;
-}
-
-// Calculates the change in the TC from adding numChange atoms of a kind
-double CalculateEnergy::MoleculeTailChange(const uint box, const uint kind,
-                                           const std::vector<uint> &kCount,
-                                           const double lambdaOld,
-                                           const double lambdaNew) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return 0.0;
-  }
-
-  double tcDiff = 0.0;
-  uint ktot = mols.GetKindsCount();
-  for (uint i = 0; i < ktot; ++i) {
-    // We should have only one molecule of fractional kind
-    double rhoDeltaIJ_2 = 2.0 * (double)(kCount[i]) * currentAxes.volInv[box];
-    uint index = kind * ktot + i;
-    tcDiff +=
-        (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] * rhoDeltaIJ_2;
-  }
-  uint index = kind * ktot + kind;
-  tcDiff += (lambdaNew - lambdaOld) * mols.pairEnCorrections[index] *
-            currentAxes.volInv[box];
-
-  return tcDiff;
-}
-
-// Calculate the change in energy due to lambda
-void CalculateEnergy::EnergyChange(Energy *energyDiff, Energy &dUdL_VDW,
-                                   Energy &dUdL_Coul,
-                                   const std::vector<double> &lambda_VDW,
-                                   const std::vector<double> &lambda_Coul,
-                                   const uint iState, const uint molIndex,
-                                   const uint box) const {
-  if (box >= BOXES_WITH_U_NB) {
-    return;
-  }
-
-  GOMC_EVENT_START(1, GomcProfileEvent::FREE_ENERGY);
-  uint length = mols.GetKind(molIndex).NumAtoms();
-  uint start = mols.MolStart(molIndex);
-  uint lambdaSize = lambda_VDW.size();
-  double *tempLJEnDiff = new double[lambdaSize];
-  double *tempREnDiff = new double[lambdaSize];
-  double dudl_VDW = 0.0, dudl_Coul = 0.0;
-  std::fill_n(tempLJEnDiff, lambdaSize, 0.0);
-  std::fill_n(tempREnDiff, lambdaSize, 0.0);
-
-  // Calculate the vdw, short range electrostatic energy
-  for (uint p = 0; p < length; ++p) {
-    uint atom = start + p;
-    CellList::Neighbors n = cellList.EnumerateLocal(currentCoords[atom], box);
-
-    std::vector<uint> nIndex;
-    // store atom index in neighboring cell
-    while (!n.Done()) {
-      if (particleMol[*n] != (int)molIndex) {
-        nIndex.push_back(*n);
-      }
-      n.Next();
-    }
-
-#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
-#pragma omp parallel for default(none) shared(lambda_Coul, lambda_VDW, nIndex) \
-firstprivate(box, atom, iState, lambdaSize, num::qqFact) \
-reduction(+:dudl_VDW, dudl_Coul, tempREnDiff[:lambdaSize], tempLJEnDiff[:lambdaSize])
-#endif
-    for (int i = 0; i < (int)nIndex.size(); i++) {
-      double distSq = 0.0;
-      XYZ virComponents;
-      if (currentAxes.InRcut(distSq, virComponents, currentCoords, atom,
-                             nIndex[i], box)) {
-        double qi_qj_fact = 0.0, energyOldCoul = 0.0;
-        // Calculate the energy of current state
-        double energyOldVDW = forcefield.particles->CalcEn(
-            distSq, particleKind[atom], particleKind[nIndex[i]],
-            lambda_VDW[iState]);
-        // Calculate du/dl in VDW for current state
-        dudl_VDW += forcefield.particles->CalcdEndL(distSq, particleKind[atom],
-                                                    particleKind[nIndex[i]],
-                                                    lambda_VDW[iState]);
-
-        if (electrostatic) {
-          qi_qj_fact =
-              particleCharge[atom] * particleCharge[nIndex[i]] * num::qqFact;
-          if (qi_qj_fact != 0.0) {
-            energyOldCoul = forcefield.particles->CalcCoulomb(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[iState], box);
-            // Calculate du/dl in Coulomb for current state.
-            dudl_Coul += forcefield.particles->CalcCoulombdEndL(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[iState], box);
-          }
-        }
-
-        for (int s = 0; s < (int)lambdaSize; s++) {
-          // Calculate the energy of other state
-          tempLJEnDiff[s] += forcefield.particles->CalcEn(
-              distSq, particleKind[atom], particleKind[nIndex[i]],
-              lambda_VDW[s]);
-          tempLJEnDiff[s] += -energyOldVDW;
-          if (electrostatic && qi_qj_fact != 0.0) {
-            tempREnDiff[s] += forcefield.particles->CalcCoulomb(
-                distSq, particleKind[atom], particleKind[nIndex[i]], qi_qj_fact,
-                lambda_Coul[s], box);
-            tempREnDiff[s] += -energyOldCoul;
-          }
-        }
-      }
-    }
-  }
-
-  dUdL_VDW.inter = dudl_VDW;
-  dUdL_Coul.real = dudl_Coul;
-  for (int s = 0; s < (int)lambdaSize; s++) {
-    energyDiff[s].inter += tempLJEnDiff[s];
-    energyDiff[s].real += tempREnDiff[s];
-  }
-  delete[] tempLJEnDiff;
-  delete[] tempREnDiff;
-
-  if (forcefield.useLRC) {
-    // Need to calculate change in LRC
-    ChangeLRC(energyDiff, dUdL_VDW, lambda_VDW, iState, molIndex, box);
-  }
-  // Need to calculate change in self
-  calcEwald->ChangeSelf(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
-                        box);
-  // Need to calculate change in correction
-  calcEwald->ChangeCorrection(energyDiff, dUdL_Coul, lambda_Coul, iState,
-                              molIndex, box);
-  // Need to calculate change in Reciprocal
-  calcEwald->ChangeRecip(energyDiff, dUdL_Coul, lambda_Coul, iState, molIndex,
-                         box);
-  GOMC_EVENT_STOP(1, GomcProfileEvent::FREE_ENERGY);
-}
-
-// Calculate the change in LRC for each state
-void CalculateEnergy::ChangeLRC(Energy *energyDiff, Energy &dUdL_VDW,
-                                const std::vector<double> &lambda_VDW,
-                                const uint iState, const uint molIndex,
-                                const uint box) const {
-  // Get the kind and lambda value
-  uint fk = mols.GetMolKind(molIndex);
-  double lambda_istate = lambda_VDW[iState];
-
-  // Add the LRC for fractional molecule
-  for (size_t s = 0; s < lambda_VDW.size(); s++) {
-    double lambdaVDW = lambda_VDW[s];
-    for (uint i = 0; i < mols.GetKindsCount(); ++i) {
-      uint molNum = molLookup.NumKindInBox(i, box);
-      if (i == fk) {
-        --molNum; // We have one less molecule (it is fractional molecule)
-      }
-      double rhoDeltaIJ_2 = 2.0 * (double)(molNum)*currentAxes.volInv[box];
-      energyDiff[s].tailCorrection +=
-          mols.pairEnCorrections[fk * mols.GetKindsCount() + i] * rhoDeltaIJ_2 *
-          (lambdaVDW - lambda_istate);
-      if (s == iState) {
-        // Calculate du/dl in VDW LRC for current state
-        dUdL_VDW.tailCorrection +=
-            mols.pairEnCorrections[fk * mols.GetKindsCount() + i] *
-            rhoDeltaIJ_2;
-      }
-    }
-    energyDiff[s].tailCorrection +=
-        mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-        currentAxes.volInv[box] * (lambdaVDW - lambda_istate);
-    if (s == iState) {
-      // Calculate du/dl in VDW LRC for current state
-      dUdL_VDW.tailCorrection +=
-          mols.pairEnCorrections[fk * mols.GetKindsCount() + fk] *
-          currentAxes.volInv[box];
-    }
-  }
-}
-=======
 /*******************************************************************************
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group

From 414a644beab547430b28e4e26e023cd84d027e4c Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 11 Aug 2023 22:59:45 -0400
Subject: [PATCH 08/42] Fixes the gcc compiler issues but not linker

---
 CMake/GOMCCPUSetup.cmake  |  57 +++++++-----------
 CMake/GOMCCUDASetup.cmake | 121 +++++++++++++++++++-------------------
 CMakeLists.txt            |  34 +++++++++--
 README.md                 |   4 +-
 src/FFSetup.cpp           |   3 +-
 5 files changed, 111 insertions(+), 108 deletions(-)

diff --git a/CMake/GOMCCPUSetup.cmake b/CMake/GOMCCPUSetup.cmake
index a7a937905..d96bb8e1b 100644
--- a/CMake/GOMCCPUSetup.cmake
+++ b/CMake/GOMCCPUSetup.cmake
@@ -21,15 +21,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED true)
 
 if(ENSEMBLE_NVT)
    add_executable(NVT ${sources} ${headers} ${libHeaders} ${libSources})
-   # Set Compiler and linker flags for each compiler
-   target_compile_options(NVT
-      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
-   target_link_options(NVT
-      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
+   # Set compiler and linker flags for each compiler
+    target_compile_options(NVT
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>)
+    target_link_options(NVT
+       PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_LINK_FLAGS}>)
    set_target_properties(NVT PROPERTIES 
       OUTPUT_NAME ${NVT_name}
       COMPILE_FLAGS "${NVT_flags}")
@@ -44,15 +40,11 @@ endif()
 
 if(ENSEMBLE_GEMC)
    add_executable(GEMC ${sources} ${headers} ${libHeaders} ${libSources})
-   # Set Compiler and linker flags for each compiler
-   target_compile_options(GEMC
-      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
-   target_link_options(GEMC
-      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
+   # Set compiler and linker flags for each compiler
+    target_compile_options(GEMC
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>)
+    target_link_options(GEMC
+       PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_LINK_FLAGS}>)
    set_target_properties(GEMC PROPERTIES 
       OUTPUT_NAME ${GE_name}
       COMPILE_FLAGS "${GE_flags}")
@@ -67,15 +59,11 @@ endif()
 
 if(ENSEMBLE_GCMC)
    add_executable(GCMC ${sources} ${headers} ${libHeaders} ${libSources})
-   # Set Compiler and linker flags for each compiler
-   target_compile_options(GCMC
-      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
-   target_link_options(GCMC
-      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
+   # Set compiler and linker flags for each compiler
+    target_compile_options(GCMC
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>)
+    target_link_options(GCMC
+       PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_LINK_FLAGS}>)
    set_target_properties(GCMC PROPERTIES 
       OUTPUT_NAME ${GC_name}
       COMPILE_FLAGS "${GC_flags}")
@@ -90,15 +78,11 @@ endif()
 
 if(ENSEMBLE_NPT)
    add_executable(NPT ${sources} ${headers} ${libHeaders} ${libSources})
-   # Set Compiler and linker flags for each compiler
-   target_compile_options(NPT
-      PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>)
-   target_link_options(NPT
-      PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-             $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>)
+   # Set compiler and linker flags for each compiler
+    target_compile_options(NPT
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>)
+    target_link_options(NPT
+       PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_LINK_FLAGS}>)
    set_target_properties(NPT PROPERTIES 
       OUTPUT_NAME ${NPT_name}
       COMPILE_FLAGS "${NPT_flags}")
@@ -110,4 +94,3 @@ if(ENSEMBLE_NPT)
       target_link_libraries(NPT ${MPI_LIBRARIES})
    endif()
 endif()
-
diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index ef05adb8c..dd537ea43 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -3,26 +3,31 @@
 set(CMAKE_CUDA_COMP_FLAGS -DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT)
 
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-	message("-- Debug build type detected, passing '-g -G --keep' to nvcc")
-	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -g -G --keep)
-endif()
-
-if(GOMC_OPT)
-	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -O3)
+    message("-- Debug build type detected, passing '-g -G --keep' to nvcc")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -g -G --keep)
 endif()
 
 if(GOMC_NVTX_ENABLED)
-	message("-- Enabling profiling with NVTX for GPU")
-	set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -DGOMC_NVTX_ENABLED)
+    message("-- Enabling profiling with NVTX for GPU")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -DGOMC_NVTX_ENABLED)
 endif()
 
 # Set architecture flags based on the CMake version
 # Once CMake 3.23 has been available for a while, we should just use
 # set(CMAKE_CUDA_ARCHITECTURES all) and remove the if block
-if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
-    set(CMAKE_CUDA_ARCHITECTURES all)
+# Can't get CUDA link time optimization enabled for all architectures directly, so need to do one-by-one.
+if(NOT GOMC_OPT)
+   if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
+       set(CMAKE_CUDA_ARCHITECTURES all)
+   else()
+       set(CMAKE_CUDA_ARCHITECTURES 50;60;70;75;80)
+   endif()
 else()
-    set(CMAKE_CUDA_ARCHITECTURES 50;60;70;80)
+    set(CMAKE_CUDA_ARCHITECTURES OFF)
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_50,code=lto_50")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_60,code=lto_60")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_70,code=lto_70")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_80,code=lto_80")
 endif()
 
 include_directories(src/GPU)
@@ -42,12 +47,11 @@ set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 
 # Turn off warning that CUDA files were not compiled with the -ipo flag
-if(GOMC_OPT)
-   set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} -diag-disable=11003)
-endif()
+# if(GOMC_OPT)
+   # set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} -diag-disable=11003)
+# endif()
 
-# Only disable the warning on deprecated GPU targets when compiling, not linking
-set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_COMP_FLAGS})
+# Disable the warning on deprecated GPU targets
 set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -Wno-deprecated-gpu-targets)
 
 include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
@@ -58,28 +62,27 @@ if(ENSEMBLE_GPU_NVT)
     ${sources} ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for each compiler
     target_compile_options(GPU_NVT
-       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-             $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
-              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
-    target_link_options(GPU_NVT
-       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
+    # target_link_options(GPU_NVT
+       # PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+              # $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              # $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
+              # $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+              # $<$<LINK_LANG_AND_ID:CUDA,NVIDIA>: -dlto>)
     set_target_properties(GPU_NVT PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_NVT_name}
         COMPILE_FLAGS "${GPU_NVT_flags}")
-	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_NVT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_NVT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        message("-- Debug build type detected, GPU_NVT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+        set_property(TARGET GPU_NVT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    endif()
     if(WIN32)
         target_link_libraries(GPU_NVT ws2_32)
     endif()
     if(MPI_FOUND)
-	    target_link_libraries(GPU_NVT ${MPI_LIBRARIES})
+        target_link_libraries(GPU_NVT ${MPI_LIBRARIES})
     endif()
 endif()
 
@@ -88,28 +91,26 @@ if(ENSEMBLE_GPU_GEMC)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for each compiler
     target_compile_options(GPU_GEMC
-       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
-              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
-    target_link_options(GPU_GEMC
-       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
+    # target_link_options(GPU_GEMC
+       # PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
+              # $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
+              # $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
+              # $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
     set_target_properties(GPU_GEMC PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_GE_name}
         COMPILE_FLAGS "${GPU_GE_flags}")
-	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_GEMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_GEMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        message("-- Debug build type detected, GPU_GEMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+        set_property(TARGET GPU_GEMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    endif()
     if(WIN32)
         target_link_libraries(GPU_GEMC ws2_32)
     endif()
     if(MPI_FOUND)
-	    target_link_libraries(GPU_GEMC ${MPI_LIBRARIES})
+        target_link_libraries(GPU_GEMC ${MPI_LIBRARIES})
     endif()
 endif()
 
@@ -118,10 +119,8 @@ if(ENSEMBLE_GPU_GCMC)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for each compiler
     target_compile_options(GPU_GCMC
-       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
-              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_GCMC
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
@@ -131,15 +130,15 @@ if(ENSEMBLE_GPU_GCMC)
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_GC_name}
         COMPILE_FLAGS "${GPU_GC_flags}")
-	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_GCMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_GCMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        message("-- Debug build type detected, GPU_GCMC setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+        set_property(TARGET GPU_GCMC PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    endif()
     if(WIN32)
         target_link_libraries(GPU_GCMC ws2_32)
     endif()
     if(MPI_FOUND)
-	    target_link_libraries(GPU_GCMC ${MPI_LIBRARIES})
+        target_link_libraries(GPU_GCMC ${MPI_LIBRARIES})
     endif()
 endif()
 
@@ -148,10 +147,8 @@ if(ENSEMBLE_GPU_NPT)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for each compiler
     target_compile_options(GPU_NPT
-       PUBLIC $<$<COMPILE_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_COMP_FLAGS}>
-              $<$<COMPILE_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_COMP_FLAGS}>
-              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS}>)
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+              $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_NPT
        PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
               $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
@@ -161,14 +158,14 @@ if(ENSEMBLE_GPU_NPT)
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_NPT_name}
         COMPILE_FLAGS "${GPU_NPT_flags}")
-	if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-		message("-- Debug build type detected, GPU_NPT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
-    	set_property(TARGET GPU_NPT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-	endif()
+    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+        message("-- Debug build type detected, GPU_NPT setting CUDA_RESOLVE_DEVICE_SYMBOLS ON")
+        set_property(TARGET GPU_NPT PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    endif()
     if(WIN32)
         target_link_libraries(GPU_NPT ws2_32)
     endif()
     if(MPI_FOUND)
-	    target_link_libraries(GPU_NPT ${MPI_LIBRARIES})
+        target_link_libraries(GPU_NPT ${MPI_LIBRARIES})
     endif()
 endif()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d54269168..33e4aa71c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.18)
 
 project(GOMC)
+set(CMAKE_VERBOSE_MAKEFILE ON)
 
 include_directories(lib)
 include_directories(src)
@@ -30,17 +31,23 @@ endif(NOT CMAKE_BUILD_TYPE)
 #the source compiler flags to NVCC properly.
 if(GOMC_OPT)
    set(CMAKE_INTEL_COMP_FLAGS -Ofast -ipo -xHost)
+   set(CMAKE_INTEL_CUDA_COMP_FLAGS "SHELL:-Xcompiler -Ofast" "SHELL:-Xcompiler -ipo" "SHELL:-Xcompiler -xHost")
    set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
-   set(CMAKE_GNU_COMP_FLAGS -flto -O3 -march=native)
-   set(CMAKE_GNU_LINK_FLAGS -flto -O3 -march=native)
-   set(CMAKE_CLANG_COMP_FLAGS -flto -Ofast3 -march=native)
-   set(CMAKE_CLANG_LINK_FLAGS -flto -Ofast3 -march=native)
+   set(CMAKE_GNU_COMP_FLAGS -flto -fno-fat-lto-objects -march=native)
+   set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -fno-fat-lto-objects" "SHELL:-Xcompiler -march=native")
+   set(CMAKE_GNU_LINK_FLAGS -flto -fno-fat-lto-objects -march=native)
+   set(CMAKE_CLANG_COMP_FLAGS -O3 -flto -march=native)
+   set(CMAKE_CLANG_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -march=native")
+   set(CMAKE_CLANG_LINK_FLAGS -O3 -flto -march=native)
 endif()
 
 if(GOMC_ASAN)
    set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_GNU_CUDA_COMP_FLAGS ${CMAKE_GNU_CUDA_COMP_FLAGS} "SHELL:-Xcompiler --param=max-vartrack-size=100000000"
+                                                              "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
    set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
    set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
 endif()
 
@@ -50,10 +57,13 @@ if(NOT GOMC_ASAN)
     find_package(OpenMP)
     if(OPENMP_FOUND)
        set(CMAKE_INTEL_COMP_FLAGS ${CMAKE_INTEL_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_INTEL_CUDA_COMP_FLAGS ${CMAKE_INTEL_CUDA_COMP_FLAGS} "SHELL:-Xcompiler ${OpenMP_CXX_FLAGS}")
        set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
        set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_GNU_CUDA_COMP_FLAGS ${CMAKE_GNU_CUDA_COMP_FLAGS} "SHELL:-Xcompiler ${OpenMP_CXX_FLAGS}")
        set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
        set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
+       set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler ${OpenMP_CXX_FLAGS}")
        set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
     endif()
 endif()
@@ -71,8 +81,20 @@ include(${PROJECT_SOURCE_DIR}/CMake/GOMCMPI.cmake)
 
 include_directories("${PROJECT_BINARY_DIR}")
 
-# Additional flags for Intel, GNU and Clang compilers set elsewhere
-if(MSVC)
+# Set compiler flags now that all the options have been specified
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
+    set(CMAKE_COMP_FLAGS "${CMAKE_INTEL_COMP_FLAGS}")
+    set(CMAKE_GPU_COMP_FLAGS "${CMAKE_INTEL_CUDA_COMP_FLAGS}")
+    set(CMAKE_LINK_FLAGS "${CMAKE_INTEL_LINK_FLAGS}")
+elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+    set(CMAKE_COMP_FLAGS "${CMAKE_GNU_COMP_FLAGS}")
+    set(CMAKE_GPU_COMP_FLAGS "${CMAKE_GNU_CUDA_COMP_FLAGS}")
+    set(CMAKE_LINK_FLAGS "${CMAKE_GNU_LINK_FLAGS}")
+elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    set(CMAKE_COMP_FLAGS "${CMAKE_CLANG_COMP_FLAGS}")
+    set(CMAKE_GPU_COMP_FLAGS "${CMAKE_CLANG_CUDA_COMP_FLAGS}")
+    set(CMAKE_LINK_FLAGS "${CMAKE_CLANG_LINK_FLAGS}")
+elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
     set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
     set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} /MT /O1 /Ob1 /D NDEBUG")
diff --git a/README.md b/README.md
index 5fcfe19c3..17ebf0295 100644
--- a/README.md
+++ b/README.md
@@ -51,11 +51,11 @@ To cite GOMC project, please cite the following papers:
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.
 
 ## Executing GOMC:
-  You can set the number of the threads using the +pN argument, where N is the number of threads.
+  You can set the number of CPU threads using the +pN argument, where N is the number of threads.
   For example:
   ```bash
   ./GOMC_GPU_GEMC +p4 in.conf
   ```
 
-  will run a simulation with the Gibbs ensemble on the GPU using 4 threads and loads configuration settings from the file "in.conf".
+  will run a simulation with the Gibbs ensemble on the GPU using 4 CPU threads and loads configuration settings from the file "in.conf".
 
diff --git a/src/FFSetup.cpp b/src/FFSetup.cpp
index 17794908b..f0d40ddb4 100644
--- a/src/FFSetup.cpp
+++ b/src/FFSetup.cpp
@@ -380,7 +380,8 @@ void Dihedral::Read(Reader &param, std::string const &firstVar) {
   if (index == 0) {
     // set phase shift for n=0 to 90 degree
     // We will have C0 = Kchi (1 + cos(0 * phi + 90)) = Kchi
-    //this avoids double counting the C0 (constant offset) term, which is used force fields like TraPPE
+    // this avoids double counting the C0 (constant offset) term, which is used
+    // in force fields like TraPPE
     def = 90.00;
   }
   Add(merged, coeff, index, def);

From b43e807de27838a01a034548622c40bf1a147618 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 13:14:38 -0400
Subject: [PATCH 09/42] Final changes to resolve GPU linking issues

---
 CMake/GOMCCUDASetup.cmake | 63 +++++++++++++++++++--------------------
 CMakeLists.txt            | 12 ++++----
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index dd537ea43..b704cec3b 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -1,10 +1,10 @@
 # Find CUDA is enabled, set it up
-
 set(CMAKE_CUDA_COMP_FLAGS -DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT)
 
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message("-- Debug build type detected, passing '-g -G --keep' to nvcc")
     set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -g -G --keep)
+    set(CMAKE_CUDA_LINK_FLAGS -g -G --keep)
 endif()
 
 if(GOMC_NVTX_ENABLED)
@@ -16,18 +16,23 @@ endif()
 # Once CMake 3.23 has been available for a while, we should just use
 # set(CMAKE_CUDA_ARCHITECTURES all) and remove the if block
 # Can't get CUDA link time optimization enabled for all architectures directly, so need to do one-by-one.
-if(NOT GOMC_OPT)
+if(NOT GOMC_OPT OR (CMAKE_BUILD_TYPE STREQUAL "Debug"))
    if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
        set(CMAKE_CUDA_ARCHITECTURES all)
    else()
-       set(CMAKE_CUDA_ARCHITECTURES 50;60;70;75;80)
+       set(CMAKE_CUDA_ARCHITECTURES 60;70;75;80)
    endif()
 else()
     set(CMAKE_CUDA_ARCHITECTURES OFF)
-    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_50,code=lto_50")
     set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_60,code=lto_60")
     set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_70,code=lto_70")
+    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_75,code=lto_75")
     set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_80,code=lto_80")
+    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_60")
+    set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_70")
+    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_75")
+    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_80")
+    set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} -dlto)
 endif()
 
 include_directories(src/GPU)
@@ -47,29 +52,29 @@ set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 
 # Turn off warning that CUDA files were not compiled with the -ipo flag
-# if(GOMC_OPT)
-   # set(CMAKE_INTEL_LINK_FLAGS ${CMAKE_INTEL_LINK_FLAGS} -diag-disable=11003)
-# endif()
+if(GOMC_OPT)
+    if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
+        set(CMAKE_LINK_FLAGS ${CMAKE_LINK_FLAGS} -diag-disable=11003)
+    endif()
+endif()
 
 # Disable the warning on deprecated GPU targets
 set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -Wno-deprecated-gpu-targets)
+set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} -Wno-deprecated-gpu-targets)
 
 include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
 
 #####################################
 if(ENSEMBLE_GPU_NVT)
-    add_executable(GPU_NVT ${cudaSources} ${cudaHeaders}
-    ${sources} ${headers} ${libHeaders} ${libSources})
-    # Set compiler and linker flags for each compiler
+    add_executable(GPU_NVT ${cudaSources} ${cudaHeaders} ${sources}
+    ${headers} ${libHeaders} ${libSources})
+    # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_NVT
        PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
-    # target_link_options(GPU_NVT
-       # PUBLIC $<$<LINK_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
-              # $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              # $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              # $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
-              # $<$<LINK_LANG_AND_ID:CUDA,NVIDIA>: -dlto>)
+    target_link_options(GPU_NVT
+       PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
+              $<DEVICE_LINK:${CMAKE_CUDA_LINK_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     set_target_properties(GPU_NVT PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_NVT_name}
@@ -89,15 +94,13 @@ endif()
 if(ENSEMBLE_GPU_GEMC)
     add_executable(GPU_GEMC ${cudaSources} ${cudaHeaders} ${sources}
     ${headers} ${libHeaders} ${libSources})
-    # Set compiler and linker flags for each compiler
+    # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_GEMC
        PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
-    # target_link_options(GPU_GEMC
-       # PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-              # $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              # $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              # $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+    target_link_options(GPU_GEMC
+       PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
+              $<DEVICE_LINK:${CMAKE_CUDA_LINK_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     set_target_properties(GPU_GEMC PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_GE_name}
@@ -117,15 +120,13 @@ endif()
 if(ENSEMBLE_GPU_GCMC)
     add_executable(GPU_GCMC ${cudaSources} ${cudaHeaders} ${sources}
     ${headers} ${libHeaders} ${libSources})
-    # Set compiler and linker flags for each compiler
+    # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_GCMC
        PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_GCMC
-       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+       PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
+              $<DEVICE_LINK:${CMAKE_CUDA_LINK_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     set_target_properties(GPU_GCMC PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_GC_name}
@@ -145,15 +146,13 @@ endif()
 if(ENSEMBLE_GPU_NPT)
     add_executable(GPU_NPT ${cudaSources} ${cudaHeaders} ${sources}
     ${headers} ${libHeaders} ${libSources})
-    # Set compiler and linker flags for each compiler
+    # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_NPT
        PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_NPT
-       PUBLIC $<$<LINK_LANG_AND_ID:CXX,IntelLLVM,Intel>:${CMAKE_INTEL_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,GNU>:${CMAKE_GNU_LINK_FLAGS}>
-              $<$<LINK_LANG_AND_ID:CXX,Clang>:${CMAKE_CLANG_LINK_FLAGS}>
-              $<$<LINK_LANGUAGE:CUDA>:${CMAKE_CUDA_LINK_FLAGS}>)
+       PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
+              $<DEVICE_LINK:${CMAKE_CUDA_LINK_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     set_target_properties(GPU_NPT PROPERTIES
         CUDA_SEPARABLE_COMPILATION ON
         OUTPUT_NAME ${GPU_NPT_name}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33e4aa71c..3349d2c4e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,6 @@
 cmake_minimum_required(VERSION 3.18)
 
 project(GOMC)
-set(CMAKE_VERBOSE_MAKEFILE ON)
 
 include_directories(lib)
 include_directories(src)
@@ -33,9 +32,10 @@ if(GOMC_OPT)
    set(CMAKE_INTEL_COMP_FLAGS -Ofast -ipo -xHost)
    set(CMAKE_INTEL_CUDA_COMP_FLAGS "SHELL:-Xcompiler -Ofast" "SHELL:-Xcompiler -ipo" "SHELL:-Xcompiler -xHost")
    set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
-   set(CMAKE_GNU_COMP_FLAGS -flto -fno-fat-lto-objects -march=native)
-   set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -fno-fat-lto-objects" "SHELL:-Xcompiler -march=native")
-   set(CMAKE_GNU_LINK_FLAGS -flto -fno-fat-lto-objects -march=native)
+   set(CMAKE_GNU_COMP_FLAGS -flto -fno-fat-lto-objects -m64 -march=native)
+   set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -m64" "SHELL:-Xcompiler -march=native")
+   # set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -fno-fat-lto-objects" "SHELL:-Xcompiler -march=native")
+   set(CMAKE_GNU_LINK_FLAGS -flto -fno-fat-lto-objects -m64 -march=native)
    set(CMAKE_CLANG_COMP_FLAGS -O3 -flto -march=native)
    set(CMAKE_CLANG_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -march=native")
    set(CMAKE_CLANG_LINK_FLAGS -O3 -flto -march=native)
@@ -114,8 +114,8 @@ option(GOMC_GTEST_MPI    "Build unit tests for GOMC - MPI Enabled" OFF)
 
 #enable config header
 configure_file(
-	"${PROJECT_SOURCE_DIR}/GOMC_Config.h.in"
-	"${PROJECT_BINARY_DIR}/GOMC_Config.h"
+    "${PROJECT_SOURCE_DIR}/GOMC_Config.h.in"
+    "${PROJECT_BINARY_DIR}/GOMC_Config.h"
 )
 
 # Enable google test

From d3342d8edbd3ef3f3b4af97ee51dd9255c997e0d Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 13:54:56 -0400
Subject: [PATCH 10/42] Update GOMC Website in Readme file

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 17ebf0295..94d0a0bd4 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Current Release: 2.75 (6/21/2022)
 [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/GOMC_WSU/Lobby?utm_source=share-link&utm_medium=link&utm_campaign=share-link)
 [![Build Status](https://travis-ci.org/GOMC-WSU/GOMC.svg?branch=master)](https://travis-ci.org/GOMC-WSU/GOMC)
 
-We recommend the [GOMC Project Website](http://gomc.eng.wayne.edu/ "GOMC Website") and the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual") for further information and examples.
+We recommend the [GOMC Project Website](https://gomc-wsu.org/ "GOMC Website") and the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual") for further information and examples.
 
 To cite GOMC project, please cite the following papers:
 1.  [Y. Nejahi, M. Soroush Barhaghi,  G. Schwing, L. Schwiebert, J. Potoff. SoftwareX, 13, 100627 (2021). doi: 10.1016/j.softx.2020.100627.](https://www.sciencedirect.com/science/article/pii/S235271102030340X)

From 523b9f3e11d79ff9aec482ab9bae10bfe3b59a2f Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 17:32:35 -0400
Subject: [PATCH 11/42] Update web link for CMake

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 94d0a0bd4..6641f2e77 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ To cite GOMC project, please cite the following papers:
   `./metamake.sh` accepts flags which indicate which ensembles to compile. Default behavior with no flags will compile all CPU ensembles and, if CUDA is available, all GPU ensembles. Multiple flags must be separated by spaces. Current accepted flags are: `CPU` to compile all CPU ensembles, `GPU` to compile all GPU ensembles, or you can compile ensembles individually by using any of the following flags:
   `NVT`, `NPT`, `GCMC`, `GEMC`, `GPU_NVT`, `GPU_NPT`, `GPU_GCMC`, `GPU_GEMC`.
 
-> NOTES: Building GOMC requires CMake, available at http://www.cmake.org and in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. (More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual".)
+> NOTES: Building GOMC requires CMake, available at https://cmake.org/ and in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. (More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual".)
 
 ## Building GOMC on Windows:
   1. Open the Windows-compatible CMake GUI.

From 2bc9c6c61e49cf0d608f5884905a23595cdea26b Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 17:35:42 -0400
Subject: [PATCH 12/42] Remove unused use_tidy variable. Just set CMake flag

---
 metamake.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index a6c342dac..eecc34875 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -9,7 +9,6 @@ use_clang=0
 use_mpi=0
 use_asan=0
 use_opt=1
-use_tidy=0
 use_debug=0
 ENSEMBLES=""
 CMAKEARGS=""
@@ -76,7 +75,6 @@ while getopts 'acdglmnpt' opt; do
         a)
             use_asan=1;;
         c)
-            use_tidy=1
             CMAKEARGS+="-DGOMC_TIDY=on ";;
         d)
             use_debug=1;;

From 6b287029d509024eb32cf719fda7076bad60cc99 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 19:07:51 -0400
Subject: [PATCH 13/42] Clean up README.md per Codacy style

---
 README.md | 60 +++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index 6641f2e77..b7b15b219 100644
--- a/README.md
+++ b/README.md
@@ -13,40 +13,40 @@ To cite GOMC project, please cite the following papers:
 
 ## Building GOMC on GNU/Linux, macOS, or Cygwin:
 
-  1. Clone or download our code from GitHub:
-      ```bash
-      git clone https://github.com/GOMC-WSU/GOMC.git
-      ```
-  2. Go into the GOMC directory: 
-      ```bash
-      cd GOMC
-      ```
-  3. Give execution permission: 
-      ```bash
-      chmod u+x metamake.sh
-      ```
-  4. Run metamake file:
-      ```bash
-      ./metamake.sh
-      ```
-  5. Step 4 will place all the executables in ```bin``` directory.
-
-  `./metamake.sh` accepts flags which indicate which ensembles to compile. Default behavior with no flags will compile all CPU ensembles and, if CUDA is available, all GPU ensembles. Multiple flags must be separated by spaces. Current accepted flags are: `CPU` to compile all CPU ensembles, `GPU` to compile all GPU ensembles, or you can compile ensembles individually by using any of the following flags:
+1.  Clone or download our code from GitHub:
+     ```bash
+     git clone https://github.com/GOMC-WSU/GOMC.git
+     ```
+2.  Go into the GOMC directory: 
+     ```bash
+     cd GOMC
+     ```
+3.  Give execution permission: 
+     ```bash
+     chmod u+x metamake.sh
+     ```
+4.  Run metamake file:
+     ```bash
+     ./metamake.sh
+     ```
+5.  Step 4 will place all the executables in the ```bin``` directory.
+
+  `./metamake.sh` accepts a list of which ensembles to compile. Default behavior, listing no ensembles, is to compile all CPU ensembles and, if CUDA is available, all GPU ensembles. Multiple ensemble names must be separated by spaces. Current accepted values are: `CPU` to compile all CPU ensembles, `GPU` to compile all GPU ensembles, or you can compile ensembles individually by using any of the following keywords:
   `NVT`, `NPT`, `GCMC`, `GEMC`, `GPU_NVT`, `GPU_NPT`, `GPU_GCMC`, `GPU_GEMC`.
 
-> NOTES: Building GOMC requires CMake, available at https://cmake.org/ and in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. (More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/ "User Manual".)
+> NOTES: Building GOMC requires [CMake](https://cmake.org/) version 3.18 or newer. CMake is available in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/) "User Manual".
 
 ## Building GOMC on Windows:
-  1. Open the Windows-compatible CMake GUI.
-  2. Set the Source Folder to the GOMC root folder.
-  3. Set the Build Folder to your build folder.
-  4. Click Configure, select your compiler/environment.
-  5. Wait for CMake to finish the configuration.
-  6. Click Configure again and click Generate.
-  7. If your version of CUDA is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
-  8. If your version of CUDA is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
-  9. Open the CMake-generated project/solution etc. in the desired IDE (e.g., Visual Studio).
-  10. Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
+1.  Open the Windows-compatible CMake GUI.
+2.  Set the Source Folder to the GOMC root folder.
+3.  Set the Build Folder to your build folder.
+4.  Click Configure, select your compiler/environment.
+5.  Wait for CMake to finish the configuration.
+6.  Click Configure again and click Generate.
+7.  If your version of CUDA is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
+8.  If your version of CUDA is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
+9.  Open the CMake-generated project/solution etc. in the desired IDE (e.g., Visual Studio).
+10.  Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
 
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.
 

From 4f9d4fa502b2d10113fa06f020749027be629869 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 19:35:34 -0400
Subject: [PATCH 14/42] README.md clean up per Codacy Style guidelines

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b7b15b219..01fbe9d9d 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ To cite GOMC project, please cite the following papers:
 7.  If your version of CUDA is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
 8.  If your version of CUDA is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
 9.  Open the CMake-generated project/solution etc. in the desired IDE (e.g., Visual Studio).
-10.  Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
+10. Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
 
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.
 

From dde489239d5888034ce0815cef02b285cc830929 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 14 Aug 2023 19:36:30 -0400
Subject: [PATCH 15/42] README.md clean up per Codacy Style guidelines

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 01fbe9d9d..a3fa1e5bb 100644
--- a/README.md
+++ b/README.md
@@ -58,4 +58,3 @@ To cite GOMC project, please cite the following papers:
   ```
 
   will run a simulation with the Gibbs ensemble on the GPU using 4 CPU threads and loads configuration settings from the file "in.conf".
-

From 3f21ee930cddf4b23c3a51311357be2c70c71010 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Sun, 27 Aug 2023 15:47:28 -0400
Subject: [PATCH 16/42] Change OpenMP pragma for compatibility with old Intel
 compilers

---
 metamake.sh               | 1 -
 src/moves/MultiParticle.h | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index eecc34875..d2e13e767 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -13,7 +13,6 @@ use_debug=0
 ENSEMBLES=""
 CMAKEARGS=""
 
-
 # Check if nvcc is available
 if command -v nvcc &> /dev/null
 then
diff --git a/src/moves/MultiParticle.h b/src/moves/MultiParticle.h
index 7e0f31c0e..4d17b28f7 100644
--- a/src/moves/MultiParticle.h
+++ b/src/moves/MultiParticle.h
@@ -541,7 +541,7 @@ inline void MultiParticle::CalculateTrialDistRot() {
     double *y = r_k.y;
     double *z = r_k.z;
 #ifdef _OPENMP
-#pragma omp parallel for default(none) shared(lambda, r_max, x, y, z)
+#pragma omp parallel for default(none) shared(r_max, x, y, z)
 #endif
     for (uint m = 0; m < moleculeIndex.size(); m++) {
       uint molIndex = moleculeIndex[m];
@@ -563,7 +563,7 @@ inline void MultiParticle::CalculateTrialDistRot() {
     double *y = t_k.y;
     double *z = t_k.z;
 #ifdef _OPENMP
-#pragma omp parallel for default(none) shared(lambda, t_max, x, y, z)
+#pragma omp parallel for default(none) shared(t_max, x, y, z)
 #endif
     for (uint m = 0; m < moleculeIndex.size(); m++) {
       uint molIndex = moleculeIndex[m];

From 1587a197d726766b57d7e7fa2db50b77a1ce58b4 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Sun, 27 Aug 2023 15:48:26 -0400
Subject: [PATCH 17/42] Change OpenMP pragma for compatibility with old Intel
 compilers

---
 metamake.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/metamake.sh b/metamake.sh
index d2e13e767..eecc34875 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -13,6 +13,7 @@ use_debug=0
 ENSEMBLES=""
 CMAKEARGS=""
 
+
 # Check if nvcc is available
 if command -v nvcc &> /dev/null
 then

From 8d16bf071a09db7ebeca8d083961f27c8a15ca89 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 21 Sep 2023 10:48:02 -0400
Subject: [PATCH 18/42] Remove Endian Tests since Endian.h is no longer used in
 GOMC

---
 test/BuildCPUTests.cmake |  4 ----
 test/FileList.cmake      |  2 --
 test/src/EndianTest.cpp  | 19 -------------------
 3 files changed, 25 deletions(-)
 delete mode 100644 test/src/EndianTest.cpp

diff --git a/test/BuildCPUTests.cmake b/test/BuildCPUTests.cmake
index babf4dad8..1e9da8db7 100644
--- a/test/BuildCPUTests.cmake
+++ b/test/BuildCPUTests.cmake
@@ -11,7 +11,6 @@ function(add_NVT_test name)
       #add_test(NAME CircuitTester_NVT COMMAND DialaTest)
       add_test(NAME MolLookupTest_NVT COMMAND CheckConsensusBeta)
       #add_test(NAME PSFParserTest_NVT COMMAND CheckProtAndWaterTest)
-      add_test(NAME EndianTest_NVT COMMAND TestBitSwap)
 endfunction(add_NVT_test)
 
 function(add_NPT_test name)
@@ -25,7 +24,6 @@ function(add_NPT_test name)
       #add_test(NAME CircuitTester_NPT COMMAND DialaTest)
       add_test(NAME MolLookupTest_NPT COMMAND CheckConsensusBeta)
       #add_test(NAME PSFParserTest_NPT COMMAND CheckProtAndWaterTest)
-      add_test(NAME EndianTest_NPT COMMAND TestBitSwap)
 endfunction(add_NPT_test)
 
 function(add_GCMC_test name)
@@ -41,7 +39,6 @@ function(add_GCMC_test name)
       #add_test(NAME PSFParserTest_GCMC COMMAND CheckProtAndWaterTest)
       add_test(NAME ConsistentTrajectoryTest_GCMC COMMAND CheckPDBTrajCoordinates)
       #add_test(NAME CheckpointTest_GCMC COMMAND CheckMollookup)
-      add_test(NAME EndianTest_GCMC COMMAND TestBitSwap)
 endfunction(add_GCMC_test)
 
 function(add_GEMC_test name)
@@ -57,7 +54,6 @@ function(add_GEMC_test name)
       #add_test(NAME PSFParserTest_GEMC COMMAND CheckProtAndWaterTest)
       add_test(NAME ConsistentTrajectoryTest_GEMC COMMAND CheckPDBTrajCoordinates)
       #add_test(NAME CheckpointTest_GEMC COMMAND CheckMollookup)
-      add_test(NAME EndianTest_GEMC COMMAND TestBitSwap)
 endfunction(add_GEMC_test)
 
 add_NVT_test(GOMC_NVT_Test)
diff --git a/test/FileList.cmake b/test/FileList.cmake
index b16a7652e..472c0aae3 100644
--- a/test/FileList.cmake
+++ b/test/FileList.cmake
@@ -1,7 +1,6 @@
 set(TestSources
     test/src/BasicTypesTest.cpp
     test/src/BitLibTest.cpp
-    test/src/EndianTest.cpp
     test/src/MolLookupTest.cpp
     #test/src/CircuitTester.cpp
     #test/src/PSFParserTest.cpp
@@ -206,7 +205,6 @@ set(libHeaders
    lib/AlphaNum.h
    lib/BasicTypes.h
    lib/BitLib.h
-   lib/Endian.h
    lib/GeomLib.h
    lib/Lambda.h
    lib/NumLib.h
diff --git a/test/src/EndianTest.cpp b/test/src/EndianTest.cpp
deleted file mode 100644
index 1c060234b..000000000
--- a/test/src/EndianTest.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "Endian.h"
-#include <gtest/gtest.h>
-
-TEST(EndianTest, TestBitSwap) {
-  uint64_t x = 0x0123456789abcdefull;
-  uint64_t rev_x = 0xefcdab8967452301ull;
-  EXPECT_EQ(rev_x, bswap_64(x));
-
-  uint32_t y = 0x01234567;
-  uint32_t rev_y = 0x67452301;
-  EXPECT_EQ(rev_y, bswap_32(y));
-
-  uint16_t z = 0x0123;
-  uint16_t rev_z = 0x2301;
-  EXPECT_EQ(rev_z, bswap_16(z));
-
-  uint64_t w = 0x0123456789abcdefull;
-  EXPECT_EQ(w, bswap_64(bswap_64(w)));
-}
\ No newline at end of file

From 411ed83b205fedc53690f99fb530e97946cd443d Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 21 Sep 2023 14:06:28 -0400
Subject: [PATCH 19/42] Standardize guards on header files. Add some missing
 ones.

---
 lib/AlphaNum.h                            | 8 ++++++--
 lib/FloydWarshallCycle.h                  | 6 +++++-
 src/BondAdjacencyList.h                   | 6 +++---
 src/CBMC.h                                | 2 +-
 src/CalculateEnergy.h                     | 6 +++---
 src/CellList.h                            | 2 +-
 src/Checkpoint.h                          | 2 +-
 src/CheckpointOutput.h                    | 2 +-
 src/CheckpointSetup.h                     | 2 +-
 src/ConfigSetup.h                         | 6 +++---
 src/CoordinateSetup.h                     | 2 +-
 src/DCDlib.h                              | 2 +-
 src/EnergyTypes.h                         | 2 +-
 src/EwaldCached.h                         | 6 +++---
 src/ExtendedSystem.h                      | 6 +++---
 src/ExtendedSystemOutput.h                | 6 +++---
 src/FreeEnergyOutput.h                    | 6 +++---
 src/FxdWidthWrtr.h                        | 6 +++---
 src/GOMCEventsProfileDef.h                | 5 +++++
 src/GPU/CUDAMemoryManager.cuh             | 5 ++++-
 src/GPU/CalculateEnergyCUDAKernel.cuh     | 5 ++++-
 src/GPU/CalculateEwaldCUDAKernel.cuh      | 6 +++---
 src/GPU/CalculateForceCUDAKernel.cuh      | 6 +++---
 src/GPU/CalculateMinImageCUDAKernel.cuh   | 5 ++++-
 src/GPU/ConstantDefinitionsCUDAKernel.cuh | 6 +++---
 src/GPU/TransformParticlesCUDAKernel.cuh  | 5 ++++-
 src/GPU/VariablesCUDA.cuh                 | 6 ++++--
 src/Geometry.h                            | 2 +-
 src/InputFileReader.h                     | 6 +++++-
 src/MolSetup.h                            | 2 +-
 src/MoleculeKind.h                        | 6 +++---
 src/MoleculeLookup.h                      | 2 +-
 src/PSFOutput.h                           | 6 +++---
 src/Random123Wrapper.h                    | 7 +++++--
 src/Setup.h                               | 2 +-
 src/SubdividedArray.h                     | 6 +++---
 src/TransformMatrix.h                     | 6 +++---
 src/Velocity.h                            | 2 +-
 src/cbmc/DCRotateOnAtom.h                 | 4 ++--
 39 files changed, 106 insertions(+), 72 deletions(-)

diff --git a/lib/AlphaNum.h b/lib/AlphaNum.h
index 527b67aed..f5f3294f1 100644
--- a/lib/AlphaNum.h
+++ b/lib/AlphaNum.h
@@ -5,7 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef ALPHA_NUM_H
+#define ALPHA_NUM_H
+
 #include "BasicTypes.h" // uint
 #include <algorithm>
 #include <cassert>
@@ -40,4 +42,6 @@ class AlphaNum {
   uint string2Uint(std::string stringSuffix);
   struct icompare_char;
   struct compare;
-};
\ No newline at end of file
+};
+
+#endif /*ALPHA_NUM_H*/
diff --git a/lib/FloydWarshallCycle.h b/lib/FloydWarshallCycle.h
index 59b22632d..a00b31543 100644
--- a/lib/FloydWarshallCycle.h
+++ b/lib/FloydWarshallCycle.h
@@ -5,7 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef FLOYD_WARSHALL_CYCLE_H
+#define FLOYD_WARSHALL_CYCLE_H
+
 #include <algorithm>
 #include <cassert>
 #include <vector>
@@ -86,3 +88,5 @@ class FloydWarshallCycle {
   std::vector<int> returnCombinedSet(const std::vector<int> &first,
                                      const std::vector<int> &second);
 };
+
+#endif /*FLOYD_WARSHALL_CYCLE_H*/
diff --git a/src/BondAdjacencyList.h b/src/BondAdjacencyList.h
index 3590555a2..5297022fc 100644
--- a/src/BondAdjacencyList.h
+++ b/src/BondAdjacencyList.h
@@ -8,8 +8,8 @@ along with this program, also can be found at
 
 /* Courtesy of https://www.softwaretestinghelp.com/graph-implementation-cpp/ */
 
-#ifndef BONDADJACENCYLIST_H
-#define BONDADJACENCYLIST_H
+#ifndef BOND_ADJACENCY_LIST_H
+#define BOND_ADJACENCY_LIST_H
 
 #include <limits.h>
 
@@ -48,4 +48,4 @@ class BondAdjacencyList {
 
   graphEdge *edges;
 };
-#endif
\ No newline at end of file
+#endif /*BOND_ADJACENCY_LIST_H*/
\ No newline at end of file
diff --git a/src/CBMC.h b/src/CBMC.h
index 72590e36f..a3cf4d4ad 100644
--- a/src/CBMC.h
+++ b/src/CBMC.h
@@ -66,4 +66,4 @@ CBMC *MakeCBMC(System &sys, const Forcefield &ff, const MoleculeKind &kind,
                const Setup &set);
 } // namespace cbmc
 
-#endif
+#endif /*CBMC_H*/
diff --git a/src/CalculateEnergy.h b/src/CalculateEnergy.h
index 42ca18fb8..f96106c78 100644
--- a/src/CalculateEnergy.h
+++ b/src/CalculateEnergy.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef CALCULATEENERGY_H
-#define CALCULATEENERGY_H
+#ifndef CALCULATE_ENERGY_H
+#define CALCULATE_ENERGY_H
 
 #include <vector>
 
@@ -294,4 +294,4 @@ class CalculateEnergy {
   const CellList &cellList;
 };
 
-#endif /*ENERGY_H*/
+#endif /*CALCULATE_ENERGY_H*/
diff --git a/src/CellList.h b/src/CellList.h
index 982efd424..ec50951e5 100644
--- a/src/CellList.h
+++ b/src/CellList.h
@@ -254,4 +254,4 @@ inline void CellList::Pairs::Next() {
     // skip over doubles
   } while (First() >= Second());
 }
-#endif
+#endif /*CELLLIST_H*/
diff --git a/src/Checkpoint.h b/src/Checkpoint.h
index d638faab1..38b205f0d 100644
--- a/src/Checkpoint.h
+++ b/src/Checkpoint.h
@@ -169,4 +169,4 @@ class Checkpoint {
   }
 };
 
-#endif
\ No newline at end of file
+#endif /*CHECKPOINT_H*/
\ No newline at end of file
diff --git a/src/CheckpointOutput.h b/src/CheckpointOutput.h
index 5581a5ae9..bcc889239 100644
--- a/src/CheckpointOutput.h
+++ b/src/CheckpointOutput.h
@@ -65,4 +65,4 @@ class CheckpointOutput : public OutputableBase {
   ulong stepsPerCheckpoint;
 };
 
-#endif
+#endif /*CHECKPOINT_OUTPUT_H*/
diff --git a/src/CheckpointSetup.h b/src/CheckpointSetup.h
index 6f8cb3e6a..ffa6f12ac 100644
--- a/src/CheckpointSetup.h
+++ b/src/CheckpointSetup.h
@@ -97,4 +97,4 @@ class CheckpointSetup {
   friend class CheckpointOutput;
 };
 
-#endif
+#endif /*CHECKPOINT_SETUP_H*/
diff --git a/src/ConfigSetup.h b/src/ConfigSetup.h
index c91b1ab56..3558d69e2 100644
--- a/src/ConfigSetup.h
+++ b/src/ConfigSetup.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef CONFIGSETUP_H
-#define CONFIGSETUP_H
+#ifndef CONFIG_SETUP_H
+#define CONFIG_SETUP_H
 
 #include <iostream> //for cerr, cout;
 #include <map>      //for function handle storage.
@@ -918,4 +918,4 @@ class ConfigSetup {
   InputFileReader reader;
 };
 
-#endif
+#endif /*CONFIG_SETUP_H*/
diff --git a/src/CoordinateSetup.h b/src/CoordinateSetup.h
index 87addef50..019042abb 100644
--- a/src/CoordinateSetup.h
+++ b/src/CoordinateSetup.h
@@ -21,4 +21,4 @@ struct CoordinateSetup {
   void SetCOM(const MolSetupData &molData);
 }
 
-#endif
+#endif /*COORDINATESETUP_H*/
diff --git a/src/DCDlib.h b/src/DCDlib.h
index ba9219dcb..492f30161 100644
--- a/src/DCDlib.h
+++ b/src/DCDlib.h
@@ -112,4 +112,4 @@ void NAMD_bug(const char *err_msg);
 int NAMD_file_exists(const char *filename);
 void NAMD_backup_file(const char *filename, const char *extension);
 
-#endif /* ! DCDLIB_H */
+#endif /*DCDLIB_H*/
diff --git a/src/EnergyTypes.h b/src/EnergyTypes.h
index 9c9ef7742..3ec3b0682 100644
--- a/src/EnergyTypes.h
+++ b/src/EnergyTypes.h
@@ -539,4 +539,4 @@ inline std::ostream &operator<<(std::ostream &out, Energy &en) {
 }
 #endif
 
-#endif
+#endif /*ENERGYTYPES_H*/
diff --git a/src/EwaldCached.h b/src/EwaldCached.h
index f62b758c7..33498ba19 100644
--- a/src/EwaldCached.h
+++ b/src/EwaldCached.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef EWALDCACHED_H
-#define EWALDCACHED_H
+#ifndef EWALD_CACHED_H
+#define EWALD_CACHED_H
 
 #include "Ewald.h"
 
@@ -81,4 +81,4 @@ class EwaldCached : public Ewald {
 #endif
 };
 
-#endif /*EWALDCACHED_H*/
+#endif /*EWALD_CACHED_H*/
diff --git a/src/ExtendedSystem.h b/src/ExtendedSystem.h
index c896bb331..d7ca001df 100644
--- a/src/ExtendedSystem.h
+++ b/src/ExtendedSystem.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef EXTENDED_SYSTEM
-#define EXTENDED_SYSTEM
+#ifndef EXTENDED_SYSTEM_H
+#define EXTENDED_SYSTEM_H
 
 #include <vector>
 
@@ -83,4 +83,4 @@ class ExtendedSystem {
   std::vector<XYZ> binaryVeloc;
 };
 
-#endif /*EXTENDED_SYSTEM*/
+#endif /*EXTENDED_SYSTEM_H*/
diff --git a/src/ExtendedSystemOutput.h b/src/ExtendedSystemOutput.h
index 3d3788efd..3f5d0886c 100644
--- a/src/ExtendedSystemOutput.h
+++ b/src/ExtendedSystemOutput.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef DCD_OUTPUT_H
-#define DCD_OUTPUT_H
+#ifndef EXTENDED_SYSTEM_OUTPUT_H
+#define EXTENDED_SYSTEM_OUTPUT_H
 
 #include <cstring>
 #include <iostream>
@@ -124,4 +124,4 @@ struct ExtendedSystemOutput : OutputableBase {
   Writer xscFile[BOX_TOTAL];
 };
 
-#endif /*DCD_OUTPUT_H*/
+#endif /*EXTENDED_SYSTEM_OUTPUT_H*/
diff --git a/src/FreeEnergyOutput.h b/src/FreeEnergyOutput.h
index aead3ff28..9f25b5d4b 100644
--- a/src/FreeEnergyOutput.h
+++ b/src/FreeEnergyOutput.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef FREEENERGY_OUTPUT_H
-#define FREEENERGY_OUTPUT_H
+#ifndef FREE_ENERGY_OUTPUT_H
+#define FREE_ENERGY_OUTPUT_H
 
 #include <fstream>
 #include <string>
@@ -59,4 +59,4 @@ struct FreeEnergyOutput : OutputableBase {
 #endif
 };
 
-#endif /*HIST_OUTPUT_H*/
+#endif /*FREE_ENERGY_OUTPUT_H*/
diff --git a/src/FxdWidthWrtr.h b/src/FxdWidthWrtr.h
index e2087677a..30a9e457e 100644
--- a/src/FxdWidthWrtr.h
+++ b/src/FxdWidthWrtr.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef FXD_WIDTH_WRTR
-#define FXD_WIDTH_WRTR
+#ifndef FXD_WIDTH_WRTR_H
+#define FXD_WIDTH_WRTR_H
 
 struct FxdWidthWrtr : Writer {
   FxdWidthWrtr(std::string const &nm, std::string const &als, const bool crit,
@@ -23,4 +23,4 @@ struct FxdWidthWrtr : Writer {
   // Align left by default, but accept right alignment if necessary.
 }
 
-#endif /*FXD_WIDTH_WRTR*/
+#endif /*FXD_WIDTH_WRTR_H*/
diff --git a/src/GOMCEventsProfileDef.h b/src/GOMCEventsProfileDef.h
index b228191a0..a4a0437e6 100644
--- a/src/GOMCEventsProfileDef.h
+++ b/src/GOMCEventsProfileDef.h
@@ -5,6 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
+#ifndef GOMC_EVENTS_PROFILE_DEF_H
+#define GOMC_EVENTS_PROFILE_DEF_H
+
 GOMC_PROFILE_EVENT(INITIALIZE, "initialization")
 GOMC_PROFILE_EVENT(MC_RUN, "MC_run")
 GOMC_PROFILE_EVENT(DESTRUCTION, "destruction")
@@ -152,3 +155,5 @@ GOMC_PROFILE_EVENT(DUMMY_EVENT19, "Dummy Event19")
 GOMC_PROFILE_EVENT(DUMMY_EVENT20, "Dummy Event20")
 GOMC_PROFILE_EVENT(DUMMY_EVENT21, "Dummy Event21")
 GOMC_PROFILE_EVENT(DUMMY_EVENT22, "Dummy Event22")
+
+#endif /*GOMC_EVENTS_PROFILE_DEF_H*/
\ No newline at end of file
diff --git a/src/GPU/CUDAMemoryManager.cuh b/src/GPU/CUDAMemoryManager.cuh
index 0b5b52dcf..6f85af098 100644
--- a/src/GPU/CUDAMemoryManager.cuh
+++ b/src/GPU/CUDAMemoryManager.cuh
@@ -4,7 +4,9 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef CUDA_MEMORY_MANAGER_H
+#define CUDA_MEMORY_MANAGER_H
+
 #ifdef GOMC_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -27,3 +29,4 @@ private:
 };
 
 #endif
+#endif /*CUDA_MEMORY_MANAGER_H*/
diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh
index 9f1366400..4f38e8141 100644
--- a/src/GPU/CalculateEnergyCUDAKernel.cuh
+++ b/src/GPU/CalculateEnergyCUDAKernel.cuh
@@ -4,7 +4,9 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef CALCULATE_ENERGY_CUDA_KERNEL_H
+#define CALCULATE_ENERGY_CUDA_KERNEL_H
+
 #ifdef GOMC_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
@@ -206,3 +208,4 @@ __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index,
     double gpu_rCut, double gpu_rOn);
 
 #endif /*GOMC_CUDA*/
+#endif /*CALCULATE_ENERGY_CUDA_KERNEL_H*/
diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh
index 16a3fc532..e9ad84326 100644
--- a/src/GPU/CalculateEwaldCUDAKernel.cuh
+++ b/src/GPU/CalculateEwaldCUDAKernel.cuh
@@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef CALCULATE_EWALD_CUDA_KERNEL
-#define CALCULATE_EWALD_CUDA_KERNEL
+#ifndef CALCULATE_EWALD_CUDA_KERNEL_H
+#define CALCULATE_EWALD_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
 #include <cuda.h>
@@ -190,4 +190,4 @@ __global__ void BoxReciprocalGPU(double *gpu_prefact,
                                  int imageSize);
 
 #endif /*GOMC_CUDA*/
-#endif /*CALCULATE_EWALD_CUDA_KERNEL*/
+#endif /*CALCULATE_EWALD_CUDA_KERNEL_H*/
diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh
index 29d7953db..4c8773307 100644
--- a/src/GPU/CalculateForceCUDAKernel.cuh
+++ b/src/GPU/CalculateForceCUDAKernel.cuh
@@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef CALCULATE_FORCE_CUDA_KERNEL
-#define CALCULATE_FORCE_CUDA_KERNEL
+#ifndef CALCULATE_FORCE_CUDA_KERNEL_H
+#define CALCULATE_FORCE_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
 #include <vector>
@@ -403,4 +403,4 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj,
 
 
 #endif /*GOMC_CUDA*/
-#endif /*CALCULATE_FORCE_CUDA_KERNEL*/
+#endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/
diff --git a/src/GPU/CalculateMinImageCUDAKernel.cuh b/src/GPU/CalculateMinImageCUDAKernel.cuh
index a375e2db6..a3dd5ce74 100644
--- a/src/GPU/CalculateMinImageCUDAKernel.cuh
+++ b/src/GPU/CalculateMinImageCUDAKernel.cuh
@@ -5,7 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef CALCULATE_MIN_IMAGE_CUDA_KERNEL_H
+#define CALCULATE_MIN_IMAGE_CUDA_KERNEL_H
+
 #ifdef GOMC_CUDA
 
 #include "ConstantDefinitionsCUDAKernel.cuh"
@@ -278,3 +280,4 @@ static __inline__ __device__ double atomicAdd(double *address, double val) {
 #endif
 
 #endif /*GOMC_CUDA*/
+#endif /*CALCULATE_MIN_IMAGE_CUDA_KERNEL_H*/
diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
index 68eafb71b..52a2a4693 100644
--- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh
+++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
@@ -4,8 +4,8 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL
-#define CONSTANT_DEFINITIONS_CUDA_KERNEL
+#ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H
+#define CONSTANT_DEFINITIONS_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
 #include <cuda.h>
@@ -47,4 +47,4 @@ void DestroyExp6CUDAVars(VariablesCUDA *vars);
 void DestroyCUDAVars(VariablesCUDA *vars);
 
 #endif /*GOMC_CUDA*/
-#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL*/
+#endif /*CONSTANT_DEFINITIONS_CUDA_KERNEL_H*/
diff --git a/src/GPU/TransformParticlesCUDAKernel.cuh b/src/GPU/TransformParticlesCUDAKernel.cuh
index 01064af64..3ef9e34c6 100644
--- a/src/GPU/TransformParticlesCUDAKernel.cuh
+++ b/src/GPU/TransformParticlesCUDAKernel.cuh
@@ -5,7 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef TRANSFORM_PARTICLES_CUDA_KERNEL_H
+#define TRANSFORM_PARTICLES_CUDA_KERNEL_H
+
 #ifdef GOMC_CUDA
 #include "Random123/philox.h"
 #include <vector>
@@ -95,3 +97,4 @@ __global__ void BrownianMotionTranslateKernel(
     unsigned int key, ulong seed, double BETA);
 
 #endif
+#endif /*TRANSFORM_PARTICLES_CUDA_KERNEL_H*/
diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh
index 687b6e2c6..8e95ca554 100644
--- a/src/GPU/VariablesCUDA.cuh
+++ b/src/GPU/VariablesCUDA.cuh
@@ -4,9 +4,10 @@ Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
 along with this program, also can be found at <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
-#ifdef GOMC_CUDA
+#ifndef VARIABLES_CUDA_H
+#define VARIABLES_CUDA_H
 
+#ifdef GOMC_CUDA
 #include <cuda.h>
 #include <stdio.h>
 #include <cuda_runtime.h>
@@ -135,3 +136,4 @@ public:
   int *gpu_cellVector, *gpu_mapParticleToCell;
 };
 #endif
+#endif /*VARIABLES_CUDA_H*/
diff --git a/src/Geometry.h b/src/Geometry.h
index 751155712..a69106da0 100644
--- a/src/Geometry.h
+++ b/src/Geometry.h
@@ -138,4 +138,4 @@ class SortedNonbond {
   SubdividedArray subdiv;
 };
 
-#endif
+#endif /*GEOMETRY_H*/
diff --git a/src/InputFileReader.h b/src/InputFileReader.h
index 7f46d9ae7..51c7f1054 100644
--- a/src/InputFileReader.h
+++ b/src/InputFileReader.h
@@ -5,7 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef INPUT_FILE_READER_H
+#define INPUT_FILE_READER_H
+
 #include <fstream>
 #include <iostream>
 #include <vector>
@@ -23,3 +25,5 @@ class InputFileReader {
   InputFileReader(void);
   ~InputFileReader();
 };
+
+#endif /*INPUT_FILE_READER_H*/
diff --git a/src/MolSetup.h b/src/MolSetup.h
index b3ad9f92d..6b0184809 100644
--- a/src/MolSetup.h
+++ b/src/MolSetup.h
@@ -337,4 +337,4 @@ class MolSetup {
     ar &molVars;
   }
 };
-#endif
+#endif /*MOLSETUP_H*/
diff --git a/src/MoleculeKind.h b/src/MoleculeKind.h
index 1c92f46f6..31bb8485f 100644
--- a/src/MoleculeKind.h
+++ b/src/MoleculeKind.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef FF_MOLECULE_H
-#define FF_MOLECULE_H
+#ifndef MOLECULE_KIND_H
+#define MOLECULE_KIND_H
 
 #include <cassert>
 #include <string>
@@ -174,4 +174,4 @@ class MoleculeKind {
   double *atomCharge;
 };
 
-#endif /*FF_MOLECULE_H*/
+#endif /*MOLECULE_KIND_H*/
diff --git a/src/MoleculeLookup.h b/src/MoleculeLookup.h
index 311afd7c6..a0ac3e3f5 100644
--- a/src/MoleculeLookup.h
+++ b/src/MoleculeLookup.h
@@ -248,4 +248,4 @@ class MoleculeLookup::box_iterator {
   uint const *pIt;
 };
 
-#endif
+#endif /*MOLECULELOOKUP_H*/
diff --git a/src/PSFOutput.h b/src/PSFOutput.h
index 6158aa634..07ee8a1ea 100644
--- a/src/PSFOutput.h
+++ b/src/PSFOutput.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef PSFOUTPUT_H
-#define PSFOUTPUT_H
+#ifndef PSF_OUTPUT_H
+#define PSF_OUTPUT_H
 
 #include <string>
 #include <vector>
@@ -105,4 +105,4 @@ class PSFOutput : public OutputableBase {
   std::vector<std::string> moleculeSegmentNames;
 };
 
-#endif
+#endif /*PSF_OUTPUT_H*/
diff --git a/src/Random123Wrapper.h b/src/Random123Wrapper.h
index 50bcf4651..e80fba456 100644
--- a/src/Random123Wrapper.h
+++ b/src/Random123Wrapper.h
@@ -5,7 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#pragma once
+#ifndef RANDOM123_WRAPPER_H
+#define RANDOM123_WRAPPER_H
 
 #include "BasicTypes.h"
 #include "Random123/philox.h"
@@ -41,4 +42,6 @@ class Random123Wrapper {
   RNG::ctr_type c;
   RNG::key_type uk;
   RNG rng;
-};
\ No newline at end of file
+};
+
+#endif /*RANDOM123_WRAPPER_H*/
diff --git a/src/Setup.h b/src/Setup.h
index db554c004..772480604 100644
--- a/src/Setup.h
+++ b/src/Setup.h
@@ -57,4 +57,4 @@ class Setup {
   }
 };
 
-#endif
+#endif /*SETUP_H*/
diff --git a/src/SubdividedArray.h b/src/SubdividedArray.h
index 366766f3b..9e427f2d4 100644
--- a/src/SubdividedArray.h
+++ b/src/SubdividedArray.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef SUBDIV_ARRAY
-#define SUBDIV_ARRAY
+#ifndef SUB_DIVIDED_ARRAY_H
+#define SUB_DIVIDED_ARRAY_H
 
 #include <cstddef>
 
@@ -64,4 +64,4 @@ class SubdividedArray {
   uint *start, subdivCount;
 };
 
-#endif /*SUBDIV_ARRAY*/
+#endif /*SUB_DIVIDED_ARRAY_H*/
diff --git a/src/TransformMatrix.h b/src/TransformMatrix.h
index e6b2a359b..d95dc93b8 100644
--- a/src/TransformMatrix.h
+++ b/src/TransformMatrix.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef TRANSFORMMATRIX_H
-#define TRANSFORMMATRIX_H
+#ifndef TRANSFORM_MATRIX_H
+#define TRANSFORM_MATRIX_H
 
 #include "BasicTypes.h"
 #define _USE_MATH_DEFINES
@@ -264,4 +264,4 @@ inline TransformMatrix TransformMatrix::UniformRandom(double u1, double u2,
   result.matrix[2][2] = 1.0 - u3;
   return result;
 }
-#endif
+#endif /*TRANSFORM_MATRIX_H*/
diff --git a/src/Velocity.h b/src/Velocity.h
index 085e33785..26d7747b1 100644
--- a/src/Velocity.h
+++ b/src/Velocity.h
@@ -147,4 +147,4 @@ class Velocity : public XYZArray {
   double &temperature;               // system temperature
 };
 
-#endif
\ No newline at end of file
+#endif /*VELOCITY_H*/
\ No newline at end of file
diff --git a/src/cbmc/DCRotateOnAtom.h b/src/cbmc/DCRotateOnAtom.h
index 4c65b6c05..b559bce6a 100644
--- a/src/cbmc/DCRotateOnAtom.h
+++ b/src/cbmc/DCRotateOnAtom.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef DCROTATEONATOM
-#define DCROTATEONATOM
+#ifndef DCROTATEONATOM_H
+#define DCROTATEONATOM_H
 
 #include <vector>
 

From 11d44cdbc9d87e7a11df002170a0db9d0082c923 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 21 Sep 2023 19:20:48 -0400
Subject: [PATCH 20/42] Print error message if CUDA does not support this
 compiler

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3349d2c4e..11d14ffac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,6 +132,8 @@ if(CMAKE_CUDA_COMPILER)
     set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
     enable_language(CUDA)
     include(${PROJECT_SOURCE_DIR}/CMake/GOMCCUDASetup.cmake)
+else
+    message(STATUS "No CUDA support for this compiler")
 endif()
 
 # Setup Serial version

From 850848f43840cb0d57edad92b055409c8af1cdc5 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 21 Sep 2023 19:21:24 -0400
Subject: [PATCH 21/42] Build test executables only for specified ensembles

---
 metamake.sh | 55 ++++++++++++++++++++++-------------------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index eecc34875..60437f21b 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -134,6 +134,17 @@ while [ "$#" -ne 0 ]; do
     shift
 done
 
+# If user hasn't specified any ensemble, cmake automatically compiles all ensembles.
+# This will ensure we don't print empty for ensembles.
+if [ -z "$ENSEMBLES" ];
+then
+	ENSEMBLES="NVT NPT GCMC GEMC"
+	if (( use_cuda ))
+	then
+		ENSEMBLES+=" GPU_NVT GPU_NPT GPU_GCMC GPU_GEMC"
+	fi
+fi
+
 mkdir -p bin
 cd bin
 
@@ -176,46 +187,26 @@ if (( !use_gtest )); then
 else
     if (( use_mpi )); 
     then
-        ENSEMBLES+="GOMC_NVT_MPI_Test "
-		ENSEMBLES+="GOMC_NPT_MPI_Test "
-		ENSEMBLES+="GOMC_GCMC_MPI_Test "
-		ENSEMBLES+="GOMC_GEMC_MPI_Test "
-		if (( use_cuda ))
-		then
-        	ENSEMBLES+="GOMC_GPU_NVT_MPI_Test "
-        	ENSEMBLES+="GOMC_GPU_NPT_MPI_Test "
-        	ENSEMBLES+="GOMC_GPU_GCMC_MPI_Test "
-        	ENSEMBLES+="GOMC_GPU_GEMC_MPI_Test "
-		fi
+        TESTENS=""
+        for ENS in $ENSEMBLES
+        do
+            TESTENS+="GOMC_"$ENS"_MPI_Test "
+        done
+        ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST_MPI=on "
     else
-        ENSEMBLES+="GOMC_NVT_Test "
-        ENSEMBLES+="GOMC_NPT_Test "
-        ENSEMBLES+="GOMC_GCMC_Test "
-        ENSEMBLES+="GOMC_GEMC_Test "
-		if (( use_cuda ))
-		then
-        	ENSEMBLES+="GOMC_GPU_NVT_Test "
-        	ENSEMBLES+="GOMC_GPU_NPT_Test "
-        	ENSEMBLES+="GOMC_GPU_GCMC_Test "
-        	ENSEMBLES+="GOMC_GPU_GEMC_Test "
-		fi
+        TESTENS=""
+        for ENS in $ENSEMBLES
+        do
+            TESTENS+="GOMC_"$ENS"_Test "
+        done
+        ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST=on "
     fi
     export CC="$(which gcc 2> /dev/null)"
     export CXX="$(which g++ 2> /dev/null)"
 fi
 
-# If user hasn't specified any ensemble, cmake automatically compiles all ensembles.
-# This will ensure we don't print empty for ensembles.
-if [ -z "$ENSEMBLES" ];
-then
-	ENSEMBLES="NVT NPT GCMC GEMC"
-	if (( use_cuda ))
-	then
-		ENSEMBLES+=" GPU_NVT GPU_NPT GPU_GCMC GPU_GEMC"
-	fi
-fi
 echo "Ensembles To Compile: $ENSEMBLES"
 
 if (( use_profiler )); then

From 823c6e47246b906cdd4150e44e1165769696623d Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 22 Sep 2023 10:36:46 -0400
Subject: [PATCH 22/42] Warning instead Status message for compiler
 incompatible with CUDA

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 11d14ffac..e6821d399 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,7 +133,7 @@ if(CMAKE_CUDA_COMPILER)
     enable_language(CUDA)
     include(${PROJECT_SOURCE_DIR}/CMake/GOMCCUDASetup.cmake)
 else
-    message(STATUS "No CUDA support for this compiler")
+    message(WARNING "No CUDA support for this compiler")
 endif()
 
 # Setup Serial version

From b62c64b86ecd0a56657aa8b87bb9b8ea24a2021d Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 22 Sep 2023 12:04:59 -0400
Subject: [PATCH 23/42] Standardize guards on header files

---
 src/cbmc/DCRotateOnAtom.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cbmc/DCRotateOnAtom.h b/src/cbmc/DCRotateOnAtom.h
index b559bce6a..4a6e5f480 100644
--- a/src/cbmc/DCRotateOnAtom.h
+++ b/src/cbmc/DCRotateOnAtom.h
@@ -55,4 +55,5 @@ class DCRotateOnAtom : public DCComponent {
   std::vector<Dihedral> dih;
 };
 } // namespace cbmc
-#endif
+
+#endif /*DCROTATEONATOM_H*/

From 17ee84d1a3a4fcd377dd37d2a6b503a057b525b8 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 22 Sep 2023 13:40:58 -0400
Subject: [PATCH 24/42] Fix syntax error in CMakeLists.txt for warning message

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6821d399..762b6cf34 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,7 +132,7 @@ if(CMAKE_CUDA_COMPILER)
     set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
     enable_language(CUDA)
     include(${PROJECT_SOURCE_DIR}/CMake/GOMCCUDASetup.cmake)
-else
+else()
     message(WARNING "No CUDA support for this compiler")
 endif()
 

From b690cb7339b227f51e1edd69192811c1fa48afdd Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 8 Jan 2024 15:33:06 -0500
Subject: [PATCH 25/42] Remove link time optimization from the build process

---
 CMake/GOMCCUDASetup.cmake | 27 +++------------------------
 CMakeLists.txt            | 24 +++++++++++-------------
 2 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index b704cec3b..f6baad9da 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -15,24 +15,10 @@ endif()
 # Set architecture flags based on the CMake version
 # Once CMake 3.23 has been available for a while, we should just use
 # set(CMAKE_CUDA_ARCHITECTURES all) and remove the if block
-# Can't get CUDA link time optimization enabled for all architectures directly, so need to do one-by-one.
-if(NOT GOMC_OPT OR (CMAKE_BUILD_TYPE STREQUAL "Debug"))
-   if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
-       set(CMAKE_CUDA_ARCHITECTURES all)
-   else()
-       set(CMAKE_CUDA_ARCHITECTURES 60;70;75;80)
-   endif()
+if (CMAKE_MAJOR_VERSION VERSION_GREATER 3 OR CMAKE_MINOR_VERSION VERSION_GREATER_EQUAL 23)
+    set(CMAKE_CUDA_ARCHITECTURES all)
 else()
-    set(CMAKE_CUDA_ARCHITECTURES OFF)
-    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_60,code=lto_60")
-    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_70,code=lto_70")
-    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_75,code=lto_75")
-    set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} "SHELL:-gencode arch=compute_80,code=lto_80")
-    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_60")
-    set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_70")
-    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_75")
-    # set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} "SHELL:-arch=sm_80")
-    set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} -dlto)
+    set(CMAKE_CUDA_ARCHITECTURES 60;70;75;80)
 endif()
 
 include_directories(src/GPU)
@@ -51,13 +37,6 @@ set(CMAKE_CUDA_STANDARD_REQUIRED true)
 set(CMAKE_CXX_STANDARD 14)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 
-# Turn off warning that CUDA files were not compiled with the -ipo flag
-if(GOMC_OPT)
-    if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "IntelLLVM")
-        set(CMAKE_LINK_FLAGS ${CMAKE_LINK_FLAGS} -diag-disable=11003)
-    endif()
-endif()
-
 # Disable the warning on deprecated GPU targets
 set(CMAKE_CUDA_COMP_FLAGS ${CMAKE_CUDA_COMP_FLAGS} -Wno-deprecated-gpu-targets)
 set(CMAKE_CUDA_LINK_FLAGS ${CMAKE_CUDA_LINK_FLAGS} -Wno-deprecated-gpu-targets)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 762b6cf34..eae6c8f65 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,23 +29,21 @@ endif(NOT CMAKE_BUILD_TYPE)
 #Set compile and link flags. Need to do it this way so that we can pass
 #the source compiler flags to NVCC properly.
 if(GOMC_OPT)
-   set(CMAKE_INTEL_COMP_FLAGS -Ofast -ipo -xHost)
-   set(CMAKE_INTEL_CUDA_COMP_FLAGS "SHELL:-Xcompiler -Ofast" "SHELL:-Xcompiler -ipo" "SHELL:-Xcompiler -xHost")
-   set(CMAKE_INTEL_LINK_FLAGS -Ofast -ipo -xHost)
-   set(CMAKE_GNU_COMP_FLAGS -flto -fno-fat-lto-objects -m64 -march=native)
+   set(CMAKE_INTEL_COMP_FLAGS -Ofast -xHost)
+   set(CMAKE_INTEL_CUDA_COMP_FLAGS "SHELL:-Xcompiler -Ofast" "SHELL:-Xcompiler -xHost")
+   set(CMAKE_INTEL_LINK_FLAGS -Ofast -xHost)
+   set(CMAKE_GNU_COMP_FLAGS -O3 -m64 -march=native)
    set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -m64" "SHELL:-Xcompiler -march=native")
-   # set(CMAKE_GNU_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -fno-fat-lto-objects" "SHELL:-Xcompiler -march=native")
-   set(CMAKE_GNU_LINK_FLAGS -flto -fno-fat-lto-objects -m64 -march=native)
-   set(CMAKE_CLANG_COMP_FLAGS -O3 -flto -march=native)
-   set(CMAKE_CLANG_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -flto" "SHELL:-Xcompiler -march=native")
-   set(CMAKE_CLANG_LINK_FLAGS -O3 -flto -march=native)
+   set(CMAKE_GNU_LINK_FLAGS -m64 -march=native)
+   set(CMAKE_CLANG_COMP_FLAGS -O3 -march=native)
+   set(CMAKE_CLANG_CUDA_COMP_FLAGS "SHELL:-Xcompiler -O3" "SHELL:-Xcompiler -march=native")
+   set(CMAKE_CLANG_LINK_FLAGS -O3 -march=native)
 endif()
 
 if(GOMC_ASAN)
-   set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
-   set(CMAKE_GNU_CUDA_COMP_FLAGS ${CMAKE_GNU_CUDA_COMP_FLAGS} "SHELL:-Xcompiler --param=max-vartrack-size=100000000"
-                                                              "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
-   set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} --param=max-vartrack-size=100000000 -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_GNU_COMP_FLAGS ${CMAKE_GNU_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_GNU_CUDA_COMP_FLAGS ${CMAKE_GNU_CUDA_COMP_FLAGS} "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
+   set(CMAKE_GNU_LINK_FLAGS ${CMAKE_GNU_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
    set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)

From 118e91731f384f047856b3951bd30fbbde1155a0 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 28 Mar 2024 14:19:20 -0400
Subject: [PATCH 26/42] Remove Intel LLVM Compiler detection until CUDA
 supports it

---
 metamake.sh | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index 60437f21b..f0e490274 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -100,7 +100,7 @@ while getopts 'acdglmnpt' opt; do
             echo "-g, use the GNU compiler"
 			echo "-l, use the Clang compiler"
             echo "-m, enables MPI support (Required for Parallel Tempering)"
-            echo "-n, disables most optimizing compiler flags"
+            echo "-n, disables most compiler optimization flags"
             echo "-p enables GPU code profiling (NVTX tags)"
             echo "-t disables Intel compiler to allow GTests to compile"
             echo "For combined usage, concatenate flags, e.g.: -ptmg"
@@ -149,14 +149,16 @@ mkdir -p bin
 cd bin
 
 if (( !use_gtest )); then
-    if (( !use_gcc && !use_clang )); then
-        ICC_PATH="$(which icx 2> /dev/null)"
-        ICPC_PATH="$(which icpx 2> /dev/null)"
-        if [ -z "$ICC_PATH" ]
-        then
+    if (( !use_gcc && !use_clang ));
+    then
+# comment out this check until CUDA supports the newer Intel Compiler
+#        ICC_PATH="$(which icx 2> /dev/null)"
+#        ICPC_PATH="$(which icpx 2> /dev/null)"
+#        if [ -z "$ICC_PATH" ]
+#        then
             ICC_PATH="$(which icc 2> /dev/null)"
             ICPC_PATH="$(which icpc 2> /dev/null)"
-		fi
+#		fi
         if [ -z "$ICC_PATH" ]
         then
             export CC="$(which gcc 2> /dev/null)"

From 0bc3977e88d298094e7950d2f2b8bb33d0656ed0 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 28 Mar 2024 15:30:45 -0400
Subject: [PATCH 27/42] Clean up some #define statements and comments on the
 #endif for header files

---
 src/CoordinateSetup.h       | 6 +++---
 src/EnergyTypes.h           | 6 +++---
 src/MolSetup.h              | 6 +++---
 src/MoleculeLookup.h        | 6 +++---
 src/cbmc/DCComponent.h      | 2 +-
 src/cbmc/DCCrankShaftAng.h  | 2 +-
 src/cbmc/DCCrankShaftDih.h  | 2 +-
 src/cbmc/DCCyclic.h         | 2 +-
 src/cbmc/DCData.h           | 2 +-
 src/cbmc/DCFactory.h        | 2 +-
 src/cbmc/DCFreeCycle.h      | 2 +-
 src/cbmc/DCFreeCycleSeed.h  | 2 +-
 src/cbmc/DCFreeHedron.h     | 2 +-
 src/cbmc/DCFreeHedronSeed.h | 2 +-
 src/cbmc/DCGraph.h          | 2 +-
 src/cbmc/DCHedron.h         | 2 +-
 src/cbmc/DCHedronCycle.h    | 2 +-
 src/cbmc/DCLinear.h         | 2 +-
 src/cbmc/DCLinkedCycle.h    | 2 +-
 src/cbmc/DCLinkedHedron.h   | 2 +-
 src/cbmc/DCOnSphere.h       | 2 +-
 src/cbmc/DCRotateCOM.h      | 2 +-
 src/cbmc/DCSingle.h         | 2 +-
 src/cbmc/TrialMol.h         | 2 +-
 24 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/CoordinateSetup.h b/src/CoordinateSetup.h
index 019042abb..1491edccc 100644
--- a/src/CoordinateSetup.h
+++ b/src/CoordinateSetup.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef COORDINATESETUP_H
-#define COORDINATESETUP_H
+#ifndef COORDINATE_SETUP_H
+#define COORDINATE_SETUP_H
 
 #include <string>
 #include <vector>
@@ -21,4 +21,4 @@ struct CoordinateSetup {
   void SetCOM(const MolSetupData &molData);
 }
 
-#endif /*COORDINATESETUP_H*/
+#endif /*COORDINATE_SETUP_H*/
diff --git a/src/EnergyTypes.h b/src/EnergyTypes.h
index 3ec3b0682..d909fff22 100644
--- a/src/EnergyTypes.h
+++ b/src/EnergyTypes.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef ENERGYTYPES_H
-#define ENERGYTYPES_H
+#ifndef ENERGY_TYPES_H
+#define ENERGY_TYPES_H
 
 /*
  *    EnergyTypes.h
@@ -539,4 +539,4 @@ inline std::ostream &operator<<(std::ostream &out, Energy &en) {
 }
 #endif
 
-#endif /*ENERGYTYPES_H*/
+#endif /*ENERGY_TYPES_H*/
diff --git a/src/MolSetup.h b/src/MolSetup.h
index 6b0184809..63f209656 100644
--- a/src/MolSetup.h
+++ b/src/MolSetup.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef MOLSETUP_H
-#define MOLSETUP_H
+#ifndef MOL_SETUP_H
+#define MOL_SETUP_H
 
 #include <cereal/access.hpp>
 #include <cereal/types/map.hpp>
@@ -337,4 +337,4 @@ class MolSetup {
     ar &molVars;
   }
 };
-#endif /*MOLSETUP_H*/
+#endif /*MOL_SETUP_H*/
diff --git a/src/MoleculeLookup.h b/src/MoleculeLookup.h
index a0ac3e3f5..41f9580a5 100644
--- a/src/MoleculeLookup.h
+++ b/src/MoleculeLookup.h
@@ -5,8 +5,8 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#ifndef MOLECULELOOKUP_H
-#define MOLECULELOOKUP_H
+#ifndef MOLECULE_LOOKUP_H
+#define MOLECULE_LOOKUP_H
 
 #include <cereal/access.hpp>
 #include <cereal/cereal.hpp>
@@ -248,4 +248,4 @@ class MoleculeLookup::box_iterator {
   uint const *pIt;
 };
 
-#endif /*MOLECULELOOKUP_H*/
+#endif /*MOLECULE_LOOKUP_H*/
diff --git a/src/cbmc/DCComponent.h b/src/cbmc/DCComponent.h
index b33cc32f6..668041125 100644
--- a/src/cbmc/DCComponent.h
+++ b/src/cbmc/DCComponent.h
@@ -29,4 +29,4 @@ class DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCCOMPONENT_H*/
diff --git a/src/cbmc/DCCrankShaftAng.h b/src/cbmc/DCCrankShaftAng.h
index db16d88ff..97e746714 100644
--- a/src/cbmc/DCCrankShaftAng.h
+++ b/src/cbmc/DCCrankShaftAng.h
@@ -55,4 +55,4 @@ class DCCrankShaftAng : public DCComponent {
   std::vector<Dihedral> dih;
 };
 } // namespace cbmc
-#endif
+#endif /*DCCRANKSHAFTANG_H*/
diff --git a/src/cbmc/DCCrankShaftDih.h b/src/cbmc/DCCrankShaftDih.h
index cf4722794..475d56d33 100644
--- a/src/cbmc/DCCrankShaftDih.h
+++ b/src/cbmc/DCCrankShaftDih.h
@@ -55,4 +55,4 @@ class DCCrankShaftDih : public DCComponent {
   std::vector<Dihedral> dih;
 };
 } // namespace cbmc
-#endif
+#endif /*DCCRANKSHAFTDIH_H*/
diff --git a/src/cbmc/DCCyclic.h b/src/cbmc/DCCyclic.h
index 1ccc3a9cc..d3beab896 100644
--- a/src/cbmc/DCCyclic.h
+++ b/src/cbmc/DCCyclic.h
@@ -93,4 +93,4 @@ class DCCyclic : public CBMC {
 };
 } // namespace cbmc
 
-#endif
\ No newline at end of file
+#endif /*DCCYCLIC_H*/
\ No newline at end of file
diff --git a/src/cbmc/DCData.h b/src/cbmc/DCData.h
index 69a8c00c7..4a294a476 100644
--- a/src/cbmc/DCData.h
+++ b/src/cbmc/DCData.h
@@ -131,4 +131,4 @@ inline DCData::~DCData() {
 
 } // namespace cbmc
 
-#endif
+#endif /*DCDATA_H*/
diff --git a/src/cbmc/DCFactory.h b/src/cbmc/DCFactory.h
index 1a6517448..6feaa7123 100644
--- a/src/cbmc/DCFactory.h
+++ b/src/cbmc/DCFactory.h
@@ -20,4 +20,4 @@ class DCFactory {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCFACTORY_H*/
diff --git a/src/cbmc/DCFreeCycle.h b/src/cbmc/DCFreeCycle.h
index a964e283d..50c1193fc 100644
--- a/src/cbmc/DCFreeCycle.h
+++ b/src/cbmc/DCFreeCycle.h
@@ -51,4 +51,4 @@ class DCFreeCycle : public DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCFREECYCLE_H*/
diff --git a/src/cbmc/DCFreeCycleSeed.h b/src/cbmc/DCFreeCycleSeed.h
index 706ce97de..7991cfd8e 100644
--- a/src/cbmc/DCFreeCycleSeed.h
+++ b/src/cbmc/DCFreeCycleSeed.h
@@ -49,4 +49,4 @@ class DCFreeCycleSeed : public DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCFREECYCLESEED_H*/
diff --git a/src/cbmc/DCFreeHedron.h b/src/cbmc/DCFreeHedron.h
index 854473bb2..b9b76f837 100644
--- a/src/cbmc/DCFreeHedron.h
+++ b/src/cbmc/DCFreeHedron.h
@@ -50,4 +50,4 @@ class DCFreeHedron : public DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCFREEHEDRON_H*/
diff --git a/src/cbmc/DCFreeHedronSeed.h b/src/cbmc/DCFreeHedronSeed.h
index 691cb4287..58b891bdc 100644
--- a/src/cbmc/DCFreeHedronSeed.h
+++ b/src/cbmc/DCFreeHedronSeed.h
@@ -48,4 +48,4 @@ class DCFreeHedronSeed : public DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCFREEHEDRONSEED_H*/
diff --git a/src/cbmc/DCGraph.h b/src/cbmc/DCGraph.h
index 7b618f759..a3f273c7d 100644
--- a/src/cbmc/DCGraph.h
+++ b/src/cbmc/DCGraph.h
@@ -83,4 +83,4 @@ class DCGraph : public CBMC {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCGRAPH_H*/
diff --git a/src/cbmc/DCHedron.h b/src/cbmc/DCHedron.h
index 557180134..425174062 100644
--- a/src/cbmc/DCHedron.h
+++ b/src/cbmc/DCHedron.h
@@ -69,4 +69,4 @@ class DCHedron {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCHEDRON_H*/
diff --git a/src/cbmc/DCHedronCycle.h b/src/cbmc/DCHedronCycle.h
index 38a281a95..a51d84e23 100644
--- a/src/cbmc/DCHedronCycle.h
+++ b/src/cbmc/DCHedronCycle.h
@@ -84,4 +84,4 @@ class DCHedronCycle {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCHEDRONCYCLE_H*/
diff --git a/src/cbmc/DCLinear.h b/src/cbmc/DCLinear.h
index 5ff25bfcc..8bcf8f3ce 100644
--- a/src/cbmc/DCLinear.h
+++ b/src/cbmc/DCLinear.h
@@ -47,4 +47,4 @@ class DCLinear : public CBMC {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCLINEAR_H*/
diff --git a/src/cbmc/DCLinkedCycle.h b/src/cbmc/DCLinkedCycle.h
index 73cdce981..5d91c18a5 100644
--- a/src/cbmc/DCLinkedCycle.h
+++ b/src/cbmc/DCLinkedCycle.h
@@ -65,4 +65,4 @@ class DCLinkedCycle : public DCComponent {
   std::vector<bool> bExist;
 };
 } // namespace cbmc
-#endif
+#endif /*DCLINKEDCYCLE_H*/
diff --git a/src/cbmc/DCLinkedHedron.h b/src/cbmc/DCLinkedHedron.h
index 757fdca45..9b436c81f 100644
--- a/src/cbmc/DCLinkedHedron.h
+++ b/src/cbmc/DCLinkedHedron.h
@@ -53,4 +53,4 @@ class DCLinkedHedron : public DCComponent {
   uint bondKinds[MAX_BONDS];
 };
 } // namespace cbmc
-#endif
+#endif /*DCLINKEDHEDRON_H*/
diff --git a/src/cbmc/DCOnSphere.h b/src/cbmc/DCOnSphere.h
index 3f4b7c8dc..1a7009429 100644
--- a/src/cbmc/DCOnSphere.h
+++ b/src/cbmc/DCOnSphere.h
@@ -41,4 +41,4 @@ class DCOnSphere : public DCComponent {
 };
 
 } // namespace cbmc
-#endif
+#endif /*DCONSPHERE_H*/
diff --git a/src/cbmc/DCRotateCOM.h b/src/cbmc/DCRotateCOM.h
index 49700e8bf..5d81b6194 100644
--- a/src/cbmc/DCRotateCOM.h
+++ b/src/cbmc/DCRotateCOM.h
@@ -44,4 +44,4 @@ class DCRotateCOM : public DCComponent {
 };
 } // namespace cbmc
 
-#endif
+#endif /*DCROTATECOM_H*/
diff --git a/src/cbmc/DCSingle.h b/src/cbmc/DCSingle.h
index 40e30baea..ac97d9af2 100644
--- a/src/cbmc/DCSingle.h
+++ b/src/cbmc/DCSingle.h
@@ -32,4 +32,4 @@ class DCSingle : public DCComponent {
   uint atom;
 };
 } // namespace cbmc
-#endif
+#endif /*DCSINGLE_H*/
diff --git a/src/cbmc/TrialMol.h b/src/cbmc/TrialMol.h
index 92d1fe29f..cf311f753 100644
--- a/src/cbmc/TrialMol.h
+++ b/src/cbmc/TrialMol.h
@@ -256,4 +256,4 @@ class TrialMol {
 };
 } // namespace cbmc
 
-#endif
+#endif /*TRIALMOL_H*/

From 8173bddda61066778063c6da4f8bfb7a42fdc914 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 28 Mar 2024 16:11:33 -0400
Subject: [PATCH 28/42] Resolve Codacy-detected error for builds with test

---
 metamake.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index f0e490274..130b0d4ac 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -192,7 +192,7 @@ else
         TESTENS=""
         for ENS in $ENSEMBLES
         do
-            TESTENS+="GOMC_"$ENS"_MPI_Test "
+            TESTENS+="GOMC_$ENS_MPI_Test "
         done
         ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST_MPI=on "
@@ -200,7 +200,7 @@ else
         TESTENS=""
         for ENS in $ENSEMBLES
         do
-            TESTENS+="GOMC_"$ENS"_Test "
+            TESTENS+="GOMC_$ENS_Test "
         done
         ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST=on "

From fbf6c9fcfb09adde20dbd5a3b33e93749452963f Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 28 Mar 2024 17:11:09 -0400
Subject: [PATCH 29/42] Handle ensemble names when building the test configs

---
 metamake.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/metamake.sh b/metamake.sh
index 130b0d4ac..4c2b800a4 100755
--- a/metamake.sh
+++ b/metamake.sh
@@ -138,10 +138,10 @@ done
 # This will ensure we don't print empty for ensembles.
 if [ -z "$ENSEMBLES" ];
 then
-	ENSEMBLES="NVT NPT GCMC GEMC"
+	ENSEMBLES="NVT NPT GCMC GEMC "
 	if (( use_cuda ))
 	then
-		ENSEMBLES+=" GPU_NVT GPU_NPT GPU_GCMC GPU_GEMC"
+		ENSEMBLES+="GPU_NVT GPU_NPT GPU_GCMC GPU_GEMC "
 	fi
 fi
 
@@ -192,7 +192,7 @@ else
         TESTENS=""
         for ENS in $ENSEMBLES
         do
-            TESTENS+="GOMC_$ENS_MPI_Test "
+            TESTENS+="GOMC_${ENS}_MPI_Test "
         done
         ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST_MPI=on "
@@ -200,7 +200,7 @@ else
         TESTENS=""
         for ENS in $ENSEMBLES
         do
-            TESTENS+="GOMC_$ENS_Test "
+            TESTENS+="GOMC_${ENS}_Test "
         done
         ENSEMBLES+=$TESTENS
         CMAKEARGS+="-DGOMC_GTEST=on "

From 0a97b181891f5051e265e67b766143e78d7e81dc Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 8 Jul 2024 09:26:23 -0400
Subject: [PATCH 30/42] Build CUDA code for GPU executables

---
 CMake/GOMCCUDASetup.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index f6baad9da..8c069d8e5 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -1,5 +1,6 @@
 # Find CUDA is enabled, set it up
 set(CMAKE_CUDA_COMP_FLAGS -DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT)
+set(CMAKE_COMP_FLAGS ${CMAKE_COMP_FLAGS} -DGOMC_CUDA)
 
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message("-- Debug build type detected, passing '-g -G --keep' to nvcc")

From 55726dec72369ce8f36f44c54a1e491accab9f83 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 8 Jul 2024 09:46:56 -0400
Subject: [PATCH 31/42] Don't include GPU calls in CPU builds

---
 CMake/GOMCCUDASetup.cmake | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMake/GOMCCUDASetup.cmake b/CMake/GOMCCUDASetup.cmake
index 8c069d8e5..491fc5467 100644
--- a/CMake/GOMCCUDASetup.cmake
+++ b/CMake/GOMCCUDASetup.cmake
@@ -1,6 +1,6 @@
 # Find CUDA is enabled, set it up
 set(CMAKE_CUDA_COMP_FLAGS -DGOMC_CUDA -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT)
-set(CMAKE_COMP_FLAGS ${CMAKE_COMP_FLAGS} -DGOMC_CUDA)
+set(CMAKE_HOST_COMP_FLAGS ${CMAKE_COMP_FLAGS} -DGOMC_CUDA)
 
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
     message("-- Debug build type detected, passing '-g -G --keep' to nvcc")
@@ -50,7 +50,7 @@ if(ENSEMBLE_GPU_NVT)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_NVT
-       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_HOST_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_NVT
        PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
@@ -76,7 +76,7 @@ if(ENSEMBLE_GPU_GEMC)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_GEMC
-       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_HOST_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_GEMC
        PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
@@ -102,7 +102,7 @@ if(ENSEMBLE_GPU_GCMC)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_GCMC
-       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_HOST_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_GCMC
        PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>
@@ -128,7 +128,7 @@ if(ENSEMBLE_GPU_NPT)
     ${headers} ${libHeaders} ${libSources})
     # Set compiler and linker flags for NVCC and the host compiler
     target_compile_options(GPU_NPT
-       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_COMP_FLAGS}>
+       PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${CMAKE_HOST_COMP_FLAGS}>
               $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CUDA_COMP_FLAGS} ${CMAKE_GPU_COMP_FLAGS}>)
     target_link_options(GPU_NPT
        PUBLIC $<HOST_LINK:${CMAKE_LINK_FLAGS}>

From afe876decbad4073e5411ccc87ba7174fc7c31de Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Tue, 9 Jul 2024 22:35:51 -0400
Subject: [PATCH 32/42] Don't call RotationMatrix::UniformRandom with prng()
 calls as function parameters so that gcc and intel give consistent results

---
 src/cbmc/DCFreeCycle.cpp      | 14 ++++++++++++--
 src/cbmc/DCFreeCycleSeed.cpp  | 14 ++++++++++++--
 src/cbmc/DCFreeHedron.cpp     | 14 ++++++++++++--
 src/cbmc/DCFreeHedronSeed.cpp | 14 ++++++++++++--
 src/cbmc/DCRotateCOM.cpp      | 14 ++++++++++++--
 5 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/src/cbmc/DCFreeCycle.cpp b/src/cbmc/DCFreeCycle.cpp
index e12557a19..ad6290af2 100644
--- a/src/cbmc/DCFreeCycle.cpp
+++ b/src/cbmc/DCFreeCycle.cpp
@@ -139,9 +139,14 @@ void DCFreeCycle::BuildNew(TrialMol &newMol, uint molIndex) {
   positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0));
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 0;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -217,9 +222,14 @@ void DCFreeCycle::BuildOld(TrialMol &oldMol, uint molIndex) {
   positions[hed.NumBond()].Add(0, -center);
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 1;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeCycleSeed.cpp b/src/cbmc/DCFreeCycleSeed.cpp
index 2ed2829fd..133106742 100644
--- a/src/cbmc/DCFreeCycleSeed.cpp
+++ b/src/cbmc/DCFreeCycleSeed.cpp
@@ -138,9 +138,14 @@ void DCFreeCycleSeed::BuildNew(TrialMol &newMol, uint molIndex) {
   positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0));
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 0;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -216,9 +221,14 @@ void DCFreeCycleSeed::BuildOld(TrialMol &oldMol, uint molIndex) {
   positions[hed.NumBond()].Add(0, -center);
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 1;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeHedron.cpp b/src/cbmc/DCFreeHedron.cpp
index f13c03e0a..dd76b9a1d 100644
--- a/src/cbmc/DCFreeHedron.cpp
+++ b/src/cbmc/DCFreeHedron.cpp
@@ -118,9 +118,14 @@ void DCFreeHedron::BuildNew(TrialMol &newMol, uint molIndex) {
   positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0));
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 0;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -196,9 +201,14 @@ void DCFreeHedron::BuildOld(TrialMol &oldMol, uint molIndex) {
   positions[hed.NumBond()].Add(0, -center);
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 1;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeHedronSeed.cpp b/src/cbmc/DCFreeHedronSeed.cpp
index 6907994c1..25ed3bc41 100644
--- a/src/cbmc/DCFreeHedronSeed.cpp
+++ b/src/cbmc/DCFreeHedronSeed.cpp
@@ -117,9 +117,14 @@ void DCFreeHedronSeed::BuildNew(TrialMol &newMol, uint molIndex) {
   positions[hed.NumBond()].Set(0, newMol.RawRectCoords(anchorBond, 0, 0));
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 0;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -195,9 +200,14 @@ void DCFreeHedronSeed::BuildOld(TrialMol &oldMol, uint molIndex) {
   positions[hed.NumBond()].Add(0, -center);
 
   // counting backward to preserve prototype
+  double u1, u2, u3;
   for (uint lj = nLJTrials; lj-- > 1;) {
     // convert chosen torsion to 3D positions
-    RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+    u1 = prng();
+    u2 = prng();
+    u3 = prng();
+    RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
+    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCRotateCOM.cpp b/src/cbmc/DCRotateCOM.cpp
index ff112b468..73cc74dad 100644
--- a/src/cbmc/DCRotateCOM.cpp
+++ b/src/cbmc/DCRotateCOM.cpp
@@ -182,7 +182,12 @@ void DCRotateCOM::BuildNew(TrialMol &newMol, uint molIndex) {
         RandRotateZ();
       } else {
         // convert chosen torsion to 3D positions
-        spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+        double u1, u2, u3;
+        u1 = prng();
+        u2 = prng();
+        u3 = prng();
+        spin = RotationMatrix::UniformRandom(u1, u2, u3);
+        // spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
       }
 
       for (uint a = 0; a < atomNumber; ++a) {
@@ -287,7 +292,12 @@ void DCRotateCOM::BuildOld(TrialMol &oldMol, uint molIndex) {
         RandRotateZ();
       } else {
         // convert chosen torsion to 3D positions
-        spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
+        double u1, u2, u3;
+        u1 = prng();
+        u2 = prng();
+        u3 = prng();
+        spin = RotationMatrix::UniformRandom(u1, u2, u3);
+        // spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
       }
 
       for (uint a = 0; a < atomNumber; ++a) {

From c99f658329230cc9a582944653fb1f060cf56ef6 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Tue, 9 Jul 2024 22:46:59 -0400
Subject: [PATCH 33/42] Remove old UniformRandom function calls

---
 src/cbmc/DCFreeCycle.cpp      | 2 --
 src/cbmc/DCFreeCycleSeed.cpp  | 2 --
 src/cbmc/DCFreeHedron.cpp     | 2 --
 src/cbmc/DCFreeHedronSeed.cpp | 2 --
 src/cbmc/DCRotateCOM.cpp      | 2 --
 5 files changed, 10 deletions(-)

diff --git a/src/cbmc/DCFreeCycle.cpp b/src/cbmc/DCFreeCycle.cpp
index ad6290af2..5dc4eb307 100644
--- a/src/cbmc/DCFreeCycle.cpp
+++ b/src/cbmc/DCFreeCycle.cpp
@@ -146,7 +146,6 @@ void DCFreeCycle::BuildNew(TrialMol &newMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -229,7 +228,6 @@ void DCFreeCycle::BuildOld(TrialMol &oldMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeCycleSeed.cpp b/src/cbmc/DCFreeCycleSeed.cpp
index 133106742..f57a2aced 100644
--- a/src/cbmc/DCFreeCycleSeed.cpp
+++ b/src/cbmc/DCFreeCycleSeed.cpp
@@ -145,7 +145,6 @@ void DCFreeCycleSeed::BuildNew(TrialMol &newMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -228,7 +227,6 @@ void DCFreeCycleSeed::BuildOld(TrialMol &oldMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeHedron.cpp b/src/cbmc/DCFreeHedron.cpp
index dd76b9a1d..89aa59efc 100644
--- a/src/cbmc/DCFreeHedron.cpp
+++ b/src/cbmc/DCFreeHedron.cpp
@@ -125,7 +125,6 @@ void DCFreeHedron::BuildNew(TrialMol &newMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -208,7 +207,6 @@ void DCFreeHedron::BuildOld(TrialMol &oldMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCFreeHedronSeed.cpp b/src/cbmc/DCFreeHedronSeed.cpp
index 25ed3bc41..b6d527069 100644
--- a/src/cbmc/DCFreeHedronSeed.cpp
+++ b/src/cbmc/DCFreeHedronSeed.cpp
@@ -124,7 +124,6 @@ void DCFreeHedronSeed::BuildNew(TrialMol &newMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
@@ -207,7 +206,6 @@ void DCFreeHedronSeed::BuildOld(TrialMol &oldMol, uint molIndex) {
     u2 = prng();
     u3 = prng();
     RotationMatrix spin = RotationMatrix::UniformRandom(u1, u2, u3);
-    // RotationMatrix spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
     for (uint b = 0; b < hed.NumBond() + 1; ++b) {
       // find positions
       positions[b].Set(lj, spin.Apply(positions[b][0]));
diff --git a/src/cbmc/DCRotateCOM.cpp b/src/cbmc/DCRotateCOM.cpp
index 73cc74dad..19471438f 100644
--- a/src/cbmc/DCRotateCOM.cpp
+++ b/src/cbmc/DCRotateCOM.cpp
@@ -187,7 +187,6 @@ void DCRotateCOM::BuildNew(TrialMol &newMol, uint molIndex) {
         u2 = prng();
         u3 = prng();
         spin = RotationMatrix::UniformRandom(u1, u2, u3);
-        // spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
       }
 
       for (uint a = 0; a < atomNumber; ++a) {
@@ -297,7 +296,6 @@ void DCRotateCOM::BuildOld(TrialMol &oldMol, uint molIndex) {
         u2 = prng();
         u3 = prng();
         spin = RotationMatrix::UniformRandom(u1, u2, u3);
-        // spin = RotationMatrix::UniformRandom(prng(), prng(), prng());
       }
 
       for (uint a = 0; a < atomNumber; ++a) {

From d3dd0e70ff01319ea55ac9aeba864d9f5e2e3ac2 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Tue, 19 Nov 2024 10:21:30 -0500
Subject: [PATCH 34/42] Fix typo in comment

---
 src/moves/VolumeTransfer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/moves/VolumeTransfer.h b/src/moves/VolumeTransfer.h
index a291f570a..62d347f7c 100644
--- a/src/moves/VolumeTransfer.h
+++ b/src/moves/VolumeTransfer.h
@@ -82,7 +82,7 @@ inline uint VolumeTransfer::Prep(const double subDraw, const double movePerc) {
     if (fixBox0) {
       // For NPT-GEMC and when box0 is fixed, we cannot pick box 0
       while (box == 0) {
-        // To avoid infinite loop, we don't use sunDraw
+        // To avoid infinite loop, we don't use subDraw
         box = prng.randIntExc(BOX_TOTAL);
       }
     }

From aab3c02dfa17d3b85e0073076bc720626abef09d Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Fri, 22 Nov 2024 21:46:42 -0500
Subject: [PATCH 35/42] Add entries to print newer OpenMP versions

---
 src/Main.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Main.cpp b/src/Main.cpp
index 262b151d5..6b376e61c 100644
--- a/src/Main.cpp
+++ b/src/Main.cpp
@@ -114,9 +114,9 @@ int main(int argc, char *argv[]) {
     // Print OpenMP version if recognized or OpenMP date code if not recognized.
 #ifdef _OPENMP
     std::unordered_map<unsigned, std::string> omp_map{
-        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"},
-        {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0 Preview 1"},
-        {201811, "5.0"}};
+        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"},
+        {201511, "4.5"}, {201611, "5.0 Preview 1"}, {201811, "5.0"},
+        {202011, "5.1"}, {202111, "5.2"}, {202411, "6.0"}};
     std::unordered_map<unsigned, std::string>::const_iterator match =
         omp_map.find(_OPENMP);
     if (match == omp_map.end())

From 69b4ebcb03596d5c98f234b85df02a37663a0237 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Sat, 30 Nov 2024 12:35:21 -0500
Subject: [PATCH 36/42] Patch to handle older OpenMP versions

---
 src/EwaldCached.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/EwaldCached.cpp b/src/EwaldCached.cpp
index f613d298a..9d8d36413 100644
--- a/src/EwaldCached.cpp
+++ b/src/EwaldCached.cpp
@@ -425,7 +425,7 @@ void EwaldCached::ChangeRecip(Energy *energyDiff, Energy &dUdL_Coul,
   double *energyRecip = new double[lambdaSize];
   std::fill_n(energyRecip, lambdaSize, 0.0);
 
-#if defined _OPENMP
+#if defined _OPENMP && _OPENMP >= 201511 // check if OpenMP version is 4.5
 #pragma omp parallel for default(none) shared(lambda_Coul, lambdaSize) \
 reduction(+:energyRecip[:lambdaSize]) firstprivate(box, iState, molIndex)
 #endif

From 64eba6e8fd504ddfbb663e6be1e5a36469c574c4 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Sat, 30 Nov 2024 12:43:51 -0500
Subject: [PATCH 37/42] Updates to build on Windows with Visual Studio

---
 CMakeLists.txt                | 13 +++++++------
 README.md                     | 10 +++++-----
 lib/BasicTypes.h              |  1 +
 lib/GeomLib.h                 | 14 ++++++++++----
 lib/NumLib.h                  |  6 +++---
 lib/VectorLib.h               |  2 +-
 src/CheckpointSetup.cpp       |  2 +-
 src/ConfigSetup.cpp           | 18 +++++++++---------
 src/ConfigSetup.h             |  2 +-
 src/Coordinates.cpp           |  2 --
 src/DCDlib.cpp                |  2 +-
 src/Random123Wrapper.h        |  4 ++++
 src/TransformMatrix.h         |  2 --
 src/moves/IntraTargetedSwap.h |  2 +-
 src/moves/TargetedSwap.h      |  2 +-
 15 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index eae6c8f65..696338534 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,6 +63,7 @@ if(NOT GOMC_ASAN)
        set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} ${OpenMP_CXX_FLAGS})
        set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler ${OpenMP_CXX_FLAGS}")
        set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} ${OpenMP_CXX_FLAGS})
+	   set(CMAKE_MSVC_OPENMP_FLAGS "/openmp:llvm")
     endif()
 endif()
 
@@ -93,12 +94,12 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     set(CMAKE_GPU_COMP_FLAGS "${CMAKE_CLANG_CUDA_COMP_FLAGS}")
     set(CMAKE_LINK_FLAGS "${CMAKE_CLANG_LINK_FLAGS}")
 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
-    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} /D_DEBUG /MTd /Zi /Ob0 /Od /RTC1")
-    set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} /MT /O1 /Ob1 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} /MT /O2 /Ob2 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT} /MT /O2 /Ob2 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "${CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT} /MT /Zi /O2 /Ob1 /D NDEBUG")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1")
+    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1")
+    set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O1 /Ob1 /D NDEBUG")
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /Ob2 /D NDEBUG")
+    set(CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /Ob2 /D NDEBUG")
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "${CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /Zi /O2 /Ob1 /D NDEBUG")
 endif()
 
 # Set Source and Header files
diff --git a/README.md b/README.md
index a3fa1e5bb..c6e2e66a9 100644
--- a/README.md
+++ b/README.md
@@ -41,11 +41,11 @@ To cite GOMC project, please cite the following papers:
 2.  Set the Source Folder to the GOMC root folder.
 3.  Set the Build Folder to your build folder.
 4.  Click Configure, select your compiler/environment.
-5.  Wait for CMake to finish the configuration.
-6.  Click Configure again and click Generate.
-7.  If your version of CUDA is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
-8.  If your version of CUDA is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
-9.  Open the CMake-generated project/solution etc. in the desired IDE (e.g., Visual Studio).
+5.  Wait for CMake to finish creating the configuration.
+6.  Click Generate.
+7.  If building GPU executables and the CUDA version is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
+8.  If building GPU executables and the CUDA version is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
+9.  Open the CMake-generated project/solution file, located in your Build Folder, in the desired IDE (e.g., Visual Studio).
 10. Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
 
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.
diff --git a/lib/BasicTypes.h b/lib/BasicTypes.h
index 2b9713e00..18e7e87c7 100644
--- a/lib/BasicTypes.h
+++ b/lib/BasicTypes.h
@@ -8,6 +8,7 @@ along with this program, also can be found at
 #ifndef BASIC_TYPES_H
 #define BASIC_TYPES_H
 
+#define _USE_MATH_DEFINES
 #include <cmath>
 #include <cstddef>
 #include <fstream>
diff --git a/lib/GeomLib.h b/lib/GeomLib.h
index 5ec57a647..d5b53e87d 100644
--- a/lib/GeomLib.h
+++ b/lib/GeomLib.h
@@ -11,13 +11,12 @@ along with this program, also can be found at
 // Standard way to get pi constant on most platforms
 // Needs to be defined _before_ including cmath
 // so that the PI constants come from cmath
-#define _USE_MATH_DEFINES
-
-#include <cmath>  //For sqrt, fabs, M_PI
-#include <limits> //for double limits
+//#define _USE_MATH_DEFINES
+//#include <cmath>  //For sqrt, fabs, M_PI
 
 #include "BasicTypes.h" //For uint, XYZ
 #include "XYZArray.h"
+#include <limits> //for double limits
 
 /////////////////////////////////////////////////////////////
 //  DEFINES  //
@@ -30,6 +29,10 @@ along with this program, also can be found at
 #define M_PI                                                                   \
   3.14159265358979323846264338327950288419716939937510582097494459230781640629
 #endif
+#ifndef M_1_PI
+// Reciprocal of PI:
+#define M_1_PI 1.0 / M_PI
+#endif
 #ifndef M_PI_2
 // From Mathematica:
 // N[Pi/2, 75]
@@ -40,6 +43,9 @@ along with this program, also can be found at
 #define M_PI_4                                                                 \
   0.785398163397448309615660845819875721049292349843776455243736148076954101572
 #endif
+#ifndef M_2_SQRTPI
+#define M_2_SQRTPI 2.0 / std::sqrt(M_PI)
+#endif
 
 #define DEG_TO_RAD (M_PI / 180.0)
 #define RAD_TO_DEG (180.0 * M_1_PI) // Same as 180/PI
diff --git a/lib/NumLib.h b/lib/NumLib.h
index a54568dd2..5e10de65c 100644
--- a/lib/NumLib.h
+++ b/lib/NumLib.h
@@ -9,7 +9,6 @@ along with this program, also can be found at
 #define NUMERIC_LIB_H
 
 #include "BasicTypes.h" //For uint, XYZ
-#include <cmath>
 #include <iostream>
 #include <limits> //for double limits
 #include <vector> //for vector average
@@ -196,7 +195,8 @@ inline double POW(const double d2, const double d4, const double d6, uint e) {
 // Class to define the function used in Zbrent
 class Exp6Fun {
 public:
-  Exp6Fun(const double a, const double s) : sigma(s), alpha(a) {}
+  Exp6Fun(const float a, const float s, const float r = 0.0) : sigma(s),
+      alpha(a), rmin(r) {}
   virtual ~Exp6Fun(){};
   virtual float operator()(float x) = 0;
 
@@ -216,7 +216,7 @@ class RminFun : public Exp6Fun {
 
 class RmaxFun : public Exp6Fun {
 public:
-  RmaxFun(double a, double s, double r) : Exp6Fun(a, s) { rmin = r; }
+  RmaxFun(double a, double s, double r) : Exp6Fun(a, s, r) {}
   virtual ~RmaxFun(){};
   virtual float operator()(float x) {
     double rep = (-1.0 / rmin) * exp(alpha * (1.0 - x / rmin));
diff --git a/lib/VectorLib.h b/lib/VectorLib.h
index c13b22871..6c0820fbb 100644
--- a/lib/VectorLib.h
+++ b/lib/VectorLib.h
@@ -43,7 +43,7 @@ template <typename T> T *TransferInto(T *array, const std::vector<T> &vec) {
 // overloaded because vector<bool> is a lie and copy may not be supported
 inline bool *transfer(const std::vector<bool> &vec) {
   bool *array = new bool[vec.size()];
-  for (unsigned int i = 0; i < vec.size(); ++i) {
+  for (size_t i = 0; i < vec.size(); ++i) {
     array[i] = vec[i];
   }
   return array;
diff --git a/src/CheckpointSetup.cpp b/src/CheckpointSetup.cpp
index a8c17aff0..c0cf21b03 100644
--- a/src/CheckpointSetup.cpp
+++ b/src/CheckpointSetup.cpp
@@ -103,7 +103,7 @@ void CheckpointSetup::SetCheckpointData(bool &parallelTemperingIsEnabled,
 void CheckpointSetup::SetStepNumber() { startStepRef = chkObj.stepNumber; }
 
 void CheckpointSetup::SetTrueStepNumber() {
-  printf("%-40s %-lu \n", "Info: Loading true step from checkpoint",
+  printf("%-40s %-l64u \n", "Info: Loading true step from checkpoint",
          chkObj.trueStepNumber);
   trueStepRef = chkObj.trueStepNumber;
 }
diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp
index 8eb1b2536..d3cababdb 100644
--- a/src/ConfigSetup.cpp
+++ b/src/ConfigSetup.cpp
@@ -442,7 +442,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeBox(idx, b);
         sys.intraTargetedSwapCollection.AddsubVolumeBox(idx, b);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 2 values for SubVolumeBox, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -457,7 +457,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeCenter(idx, temp);
         sys.intraTargetedSwapCollection.AddsubVolumeCenter(idx, temp);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 4 values for SubVolumeCenter, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -468,7 +468,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumePBC(idx, line[2]);
         sys.intraTargetedSwapCollection.AddsubVolumePBC(idx, line[2]);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 2 values for SubVolumePBC, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -483,7 +483,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeAtomList(idx, temp);
         sys.intraTargetedSwapCollection.AddsubVolumeAtomList(idx, temp);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected atleast 3 values for SubVolumeCenterList, but "
                "received",
                line.size() - 1);
@@ -499,7 +499,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeDimension(idx, temp);
         sys.intraTargetedSwapCollection.AddsubVolumeDimension(idx, temp);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 4 values for SubVolumeDim, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -515,7 +515,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeResKind(idx, temp);
         sys.intraTargetedSwapCollection.AddsubVolumeResKind(idx, temp);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected atleast 2 values for SubVolumeResidueKind, but "
                "received",
                line.size() - 1);
@@ -528,7 +528,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeSwapType(idx, isRigid);
         sys.intraTargetedSwapCollection.AddsubVolumeSwapType(idx, isRigid);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 2 values for SubVolumeRigidSwap, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -544,7 +544,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeChemPot(idx, resName, value,
                                                        isFugacity);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 3 values for SubVolumeChemPot, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
@@ -558,7 +558,7 @@ void ConfigSetup::Init(const char *fileName, MultiSim const *const &multisim) {
         sys.targetedSwapCollection.AddsubVolumeChemPot(idx, resName, value,
                                                        isFugacity);
       } else {
-        printf("%-40s %-lu !\n",
+        printf("%-40s %-zu !\n",
                "ERROR: Expected 3 values for SubVolumeFugacity, but received",
                line.size() - 1);
         exit(EXIT_FAILURE);
diff --git a/src/ConfigSetup.h b/src/ConfigSetup.h
index a39b33085..f2863670d 100644
--- a/src/ConfigSetup.h
+++ b/src/ConfigSetup.h
@@ -645,7 +645,7 @@ struct TargetSwapCollection {
         (std::find(newKind.begin(), newKind.end(), "All") != newKind.end());
     if (selectedAll) {
       if (newKind.size() > 1) {
-        printf("Warning: %lu additional residue kinds were defined for "
+        printf("Warning: %zu additional residue kinds were defined for "
                "subVolume index %d, while using all residues!\n",
                newKind.size() - 1, subVIdx);
         printf(
diff --git a/src/Coordinates.cpp b/src/Coordinates.cpp
index ee8d9acef..a93830f10 100644
--- a/src/Coordinates.cpp
+++ b/src/Coordinates.cpp
@@ -7,9 +7,7 @@ along with this program, also can be found at
 ********************************************************************************/
 #include "Coordinates.h"
 
-#include <algorithm> //For copy
 #include <cassert>
-#include <cmath>
 
 #include "TransformMatrix.h"
 
diff --git a/src/DCDlib.cpp b/src/DCDlib.cpp
index e0f955bcf..4c39bf5cf 100644
--- a/src/DCDlib.cpp
+++ b/src/DCDlib.cpp
@@ -200,7 +200,7 @@ OFF_T NAMD_seek(int file, OFF_T offset, int whence) {
   if (whence == SEEK_SET && retval != offset) {
     char buf[256];
     sprintf(buf,
-            "seek failed while writing DCD file: SEEK_SET %ld returned %ld\n",
+            "seek failed while writing DCD file: SEEK_SET %lld returned %lld\n",
             offset, retval);
     NAMD_die(buf);
   }
diff --git a/src/Random123Wrapper.h b/src/Random123Wrapper.h
index e80fba456..990e031f7 100644
--- a/src/Random123Wrapper.h
+++ b/src/Random123Wrapper.h
@@ -8,6 +8,10 @@ along with this program, also can be found at
 #ifndef RANDOM123_WRAPPER_H
 #define RANDOM123_WRAPPER_H
 
+#ifdef _MSC_VER
+#define R123_NO_SINCOS 1
+#endif
+
 #include "BasicTypes.h"
 #include "Random123/philox.h"
 typedef r123::Philox4x64 RNG;
diff --git a/src/TransformMatrix.h b/src/TransformMatrix.h
index d95dc93b8..d1b4b4f1f 100644
--- a/src/TransformMatrix.h
+++ b/src/TransformMatrix.h
@@ -9,8 +9,6 @@ along with this program, also can be found at
 #define TRANSFORM_MATRIX_H
 
 #include "BasicTypes.h"
-#define _USE_MATH_DEFINES
-#include <cmath> //cos and sin
 
 class TransformMatrix;
 typedef TransformMatrix RotationMatrix;
diff --git a/src/moves/IntraTargetedSwap.h b/src/moves/IntraTargetedSwap.h
index d79fef836..cc88b1f44 100644
--- a/src/moves/IntraTargetedSwap.h
+++ b/src/moves/IntraTargetedSwap.h
@@ -663,7 +663,7 @@ void IntraTargetedSwap::PrintIntraTargetedSwapInfo() {
              tsp.subVolumeIdx);
       printf("%-40s %d \n", "      SubVolume Box:", b);
       if (tsp.calcSubVolCenter) {
-        printf("%-40s Using %lu defined atom indexes \n",
+        printf("%-40s Using %zu defined atom indexes \n",
                "      Calculating subVolume center:", tsp.atomList.size());
         int max = *std::max_element(tsp.atomList.begin(), tsp.atomList.end());
         if (max >= (int)coordCurrRef.Count()) {
diff --git a/src/moves/TargetedSwap.h b/src/moves/TargetedSwap.h
index 1565e2e97..a4d2df58b 100644
--- a/src/moves/TargetedSwap.h
+++ b/src/moves/TargetedSwap.h
@@ -809,7 +809,7 @@ void TargetedSwap::PrintTargetedSwapInfo() {
              tsp.subVolumeIdx);
       printf("%-40s %d \n", "      SubVolume Box:", b);
       if (tsp.calcSubVolCenter) {
-        printf("%-40s Using %lu defined atom indexes \n",
+        printf("%-40s Using %zu defined atom indexes \n",
                "      Calculating subVolume center:", tsp.atomList.size());
         int max = *std::max_element(tsp.atomList.begin(), tsp.atomList.end());
         if (max >= (int)coordCurrRef.Count()) {

From 64f8f80ca0085a2fc0a0367d5a1d0b213779cb7c Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Sun, 1 Dec 2024 13:25:24 -0500
Subject: [PATCH 38/42] Clarify Windows build

---
 README.md | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index c6e2e66a9..6ee8763b3 100644
--- a/README.md
+++ b/README.md
@@ -37,15 +37,14 @@ To cite GOMC project, please cite the following papers:
 > NOTES: Building GOMC requires [CMake](https://cmake.org/) version 3.18 or newer. CMake is available in most Linux package repositories (as cmake). If you wish to utilize NVIDIA graphics cards you will need to install the NVIDIA toolkit before compiling. The metamake file will automatically detect the location of your CUDA installation. More detailed info can be found in the [user manual](https://gomc-wsu.github.io/Manual/) "User Manual".
 
 ## Building GOMC on Windows:
-1.  Open the Windows-compatible CMake GUI.
-2.  Set the Source Folder to the GOMC root folder.
-3.  Set the Build Folder to your build folder.
-4.  Click Configure, select your compiler/environment.
-5.  Wait for CMake to finish creating the configuration.
-6.  Click Generate.
-7.  If building GPU executables and the CUDA version is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
-8.  If building GPU executables and the CUDA version is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
-9.  Open the CMake-generated project/solution file, located in your Build Folder, in the desired IDE (e.g., Visual Studio).
+1.  If building GPU executables and the CUDA version is older than CUDA 11, download the [CUB library](https://nvlabs.github.io/cub/download_cub.html).
+2.  If building GPU executables and the CUDA version is older than CUDA 11, extract the CUB library and copy the "cub" folder from the CUB library into the "lib" folder inside the GOMC directory.
+3.  Open the Windows-compatible CMake GUI.
+4.  Set the Source Folder to the GOMC root folder.
+5.  Set the Build Folder to your build folder.
+6.  Click `Configure`, select your compiler/environment.
+8.  Click `Generate` after CMake finishes configurating the project.
+9.  Click `Open Project` after CMake finishes generating the project.
 10. Using the solution in the IDE, build GOMC per the IDE's standard release compilation/executable generation methods.
 
 > NOTES: You can also use CMake from the Windows command line if its directory is added to the PATH environment variable.

From 713622a633285c25d8fa52a907e71d3b87157874 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 2 Dec 2024 20:15:20 -0500
Subject: [PATCH 39/42] Updates to resolve MSVC warnings and improve MSVC build
 flags

---
 CMakeLists.txt                          | 13 +++++++------
 lib/BasicTypes.h                        |  6 ++++++
 lib/GeomLib.h                           |  9 ---------
 src/FFAngles.h                          |  2 --
 src/FFDihedrals.h                       |  2 --
 src/Forcefield.cpp                      |  2 --
 src/MersenneTwister.h                   |  4 ++++
 src/MoveSettings.cpp                    |  3 ---
 src/PRNG.h                              |  2 --
 src/XYZArray.h                          |  1 -
 src/cbmc/DCFreeCycle.cpp                |  4 ----
 src/cbmc/DCFreeCycleSeed.cpp            |  4 ----
 src/cbmc/DCFreeHedron.cpp               |  4 ----
 src/cbmc/DCFreeHedronSeed.cpp           |  4 ----
 src/cbmc/DCHedron.cpp                   |  4 +---
 src/cbmc/DCHedron.h                     | 12 ++++++------
 src/cbmc/DCHedronCycle.cpp              |  2 --
 src/cbmc/DCLinkedCycle.cpp              |  2 --
 src/cbmc/DCLinkedHedron.cpp             |  2 --
 src/cbmc/DCRotateCOM.cpp                |  4 ----
 src/cbmc/TrialMol.cpp                   |  5 -----
 src/moves/IntraMoleculeExchange1.h      |  2 --
 src/moves/IntraMoleculeExchange2.h      |  2 --
 src/moves/IntraMoleculeExchange3.h      |  2 --
 src/moves/IntraTargetedSwap.h           |  5 ++---
 src/moves/MoleculeExchange1.h           |  3 ---
 src/moves/MoleculeExchange2.h           |  3 ---
 src/moves/MoleculeExchange3.h           |  3 ---
 src/moves/MultiParticle.h               |  4 +---
 src/moves/MultiParticleBrownianMotion.h |  2 --
 src/moves/TargetedSwap.h                |  1 -
 31 files changed, 27 insertions(+), 91 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 696338534..53c13006f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,6 +47,7 @@ if(GOMC_ASAN)
    set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
    set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
+   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /fsanitize=address")
 endif()
 
 # find OpenMP and set it up
@@ -94,12 +95,12 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     set(CMAKE_GPU_COMP_FLAGS "${CMAKE_CLANG_CUDA_COMP_FLAGS}")
     set(CMAKE_LINK_FLAGS "${CMAKE_CLANG_LINK_FLAGS}")
 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1")
-    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1")
-    set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O1 /Ob1 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /Ob2 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /Ob2 /D NDEBUG")
-    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "${CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /Zi /O2 /Ob1 /D NDEBUG")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O1 /Ob1 /D NDEBUG /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /D NDEBUG /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /D NDEBUG /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT "${CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /Zi /O2 /Ob1 /D NDEBUG /D_USE_MATH_DEFINES")
 endif()
 
 # Set Source and Header files
diff --git a/lib/BasicTypes.h b/lib/BasicTypes.h
index 18e7e87c7..b34ce54f8 100644
--- a/lib/BasicTypes.h
+++ b/lib/BasicTypes.h
@@ -8,7 +8,13 @@ along with this program, also can be found at
 #ifndef BASIC_TYPES_H
 #define BASIC_TYPES_H
 
+// Standard way to get pi constant on most platforms
+// Needs to be defined _before_ including cmath
+// so that the PI constants come from cmath
+#ifndef _USE_MATH_DEFINES
 #define _USE_MATH_DEFINES
+#endif
+
 #include <cmath>
 #include <cstddef>
 #include <fstream>
diff --git a/lib/GeomLib.h b/lib/GeomLib.h
index d5b53e87d..f8642f5b4 100644
--- a/lib/GeomLib.h
+++ b/lib/GeomLib.h
@@ -8,12 +8,6 @@ along with this program, also can be found at
 #ifndef GEOM_LIB_H
 #define GEOM_LIB_H
 
-// Standard way to get pi constant on most platforms
-// Needs to be defined _before_ including cmath
-// so that the PI constants come from cmath
-//#define _USE_MATH_DEFINES
-//#include <cmath>  //For sqrt, fabs, M_PI
-
 #include "BasicTypes.h" //For uint, XYZ
 #include "XYZArray.h"
 #include <limits> //for double limits
@@ -43,9 +37,6 @@ along with this program, also can be found at
 #define M_PI_4                                                                 \
   0.785398163397448309615660845819875721049292349843776455243736148076954101572
 #endif
-#ifndef M_2_SQRTPI
-#define M_2_SQRTPI 2.0 / std::sqrt(M_PI)
-#endif
 
 #define DEG_TO_RAD (M_PI / 180.0)
 #define RAD_TO_DEG (180.0 * M_1_PI) // Same as 180/PI
diff --git a/src/FFAngles.h b/src/FFAngles.h
index 4498202ec..81e7d4468 100644
--- a/src/FFAngles.h
+++ b/src/FFAngles.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef FF_ANGLES_H
 #define FF_ANGLES_H
 
-#include <cmath>
-
 #include "BasicTypes.h" //For "uint"
 #include "FFSetup.h"    //For initialization data
 #include "NumLib.h"     //For "Sq" function
diff --git a/src/FFDihedrals.h b/src/FFDihedrals.h
index c04eea04b..7a01ce676 100644
--- a/src/FFDihedrals.h
+++ b/src/FFDihedrals.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef FF_DIHEDRALS_H
 #define FF_DIHEDRALS_H
 
-#include <cmath> //cos, pow
-
 #include "BasicTypes.h"      //For "uint"
 #include "FFConst.h"         //GetRot
 #include "NumLib.h"          //Sq
diff --git a/src/Forcefield.cpp b/src/Forcefield.cpp
index c07919130..38c98f41e 100644
--- a/src/Forcefield.cpp
+++ b/src/Forcefield.cpp
@@ -12,8 +12,6 @@ along with this program, also can be found at
 #include "FFSwitch.h"
 #include "FFSwitchMartini.h"
 #include "Setup.h"
-#define _USE_MATH_DEFINES
-#include <cmath>
 
 Forcefield::Forcefield() {
   particles = NULL;
diff --git a/src/MersenneTwister.h b/src/MersenneTwister.h
index f6b53e9c7..66a31425d 100644
--- a/src/MersenneTwister.h
+++ b/src/MersenneTwister.h
@@ -58,6 +58,10 @@
 #ifndef MERSENNETWISTER_H
 #define MERSENNETWISTER_H
 
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+
 // Not thread safe (unless auto-initialization is avoided and each thread has
 // its own MTRand object)
 
diff --git a/src/MoveSettings.cpp b/src/MoveSettings.cpp
index 79dfd4736..ab4e8c9f3 100644
--- a/src/MoveSettings.cpp
+++ b/src/MoveSettings.cpp
@@ -6,9 +6,6 @@ along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #include "MoveSettings.h" //header spec
-
-#include <cmath>
-
 #include "BoxDimensions.h" //For axis sizes
 #include "BoxDimensionsNonOrth.h"
 #include "GeomLib.h"    //For M_PI
diff --git a/src/PRNG.h b/src/PRNG.h
index 1b5a549b7..c3172af28 100644
--- a/src/PRNG.h
+++ b/src/PRNG.h
@@ -11,8 +11,6 @@ along with this program, also can be found at
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
-#define _USE_MATH_DEFINES
-#include <cmath>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
diff --git a/src/XYZArray.h b/src/XYZArray.h
index 6479495db..055573d2f 100644
--- a/src/XYZArray.h
+++ b/src/XYZArray.h
@@ -12,7 +12,6 @@ along with this program, also can be found at
 #include <string.h> //for memset, memcpy, etc.
 
 #include <algorithm> //for swap pre-c++11 compilers
-#include <cmath>
 #include <utility> //for swap (most modern compilers)
 
 #include "BasicTypes.h"
diff --git a/src/cbmc/DCFreeCycle.cpp b/src/cbmc/DCFreeCycle.cpp
index 5dc4eb307..92aa258b9 100644
--- a/src/cbmc/DCFreeCycle.cpp
+++ b/src/cbmc/DCFreeCycle.cpp
@@ -5,11 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCFreeCycle.h"
-
-#include <cmath>
-
 #include "DCData.h"
 #include "Forcefield.h"
 #include "MolSetup.h"
diff --git a/src/cbmc/DCFreeCycleSeed.cpp b/src/cbmc/DCFreeCycleSeed.cpp
index f57a2aced..5eebde94e 100644
--- a/src/cbmc/DCFreeCycleSeed.cpp
+++ b/src/cbmc/DCFreeCycleSeed.cpp
@@ -5,11 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCFreeCycleSeed.h"
-
-#include <cmath>
-
 #include "DCData.h"
 #include "Forcefield.h"
 #include "MolSetup.h"
diff --git a/src/cbmc/DCFreeHedron.cpp b/src/cbmc/DCFreeHedron.cpp
index 89aa59efc..c0a6432da 100644
--- a/src/cbmc/DCFreeHedron.cpp
+++ b/src/cbmc/DCFreeHedron.cpp
@@ -5,11 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCFreeHedron.h"
-
-#include <cmath>
-
 #include "DCData.h"
 #include "Forcefield.h"
 #include "MolSetup.h"
diff --git a/src/cbmc/DCFreeHedronSeed.cpp b/src/cbmc/DCFreeHedronSeed.cpp
index b6d527069..ad2733ecb 100644
--- a/src/cbmc/DCFreeHedronSeed.cpp
+++ b/src/cbmc/DCFreeHedronSeed.cpp
@@ -5,11 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCFreeHedronSeed.h"
-
-#include <cmath>
-
 #include "DCData.h"
 #include "Forcefield.h"
 #include "MolSetup.h"
diff --git a/src/cbmc/DCHedron.cpp b/src/cbmc/DCHedron.cpp
index da341ff55..9004f7841 100644
--- a/src/cbmc/DCHedron.cpp
+++ b/src/cbmc/DCHedron.cpp
@@ -5,11 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCHedron.h"
 
 #include <cassert>
-#include <cmath>
 #include <numeric>
 
 #include "DCData.h"
@@ -91,7 +89,7 @@ void DCHedron::SetBondOld(double const *bondLen, double const &anchBond) {
   anchorBondOld = anchBond;
 }
 
-double DCHedron::GetWeight() {
+double DCHedron::GetWeight() const {
   double result = 1;
   for (uint i = 0; i < nBonds; ++i) {
     result *= thetaWeight[i];
diff --git a/src/cbmc/DCHedron.h b/src/cbmc/DCHedron.h
index 425174062..b454d5fd9 100644
--- a/src/cbmc/DCHedron.h
+++ b/src/cbmc/DCHedron.h
@@ -29,12 +29,12 @@ class DCHedron {
   uint Bonded(uint i) const { return bonded[i]; }
   double Theta(uint i) const { return theta[i]; }
   double Phi(uint i) const { return phi[i]; }
-  double GetWeight();
-  double GetEnergy() { return bendEnergy; }
-  double GetNonBondedEn() { return oneThree; }
-  uint NumBond() { return nBonds; }
-  uint Focus() { return focus; }
-  uint Prev() { return prev; }
+  double GetWeight() const;
+  double GetEnergy() const { return bendEnergy; }
+  double GetNonBondedEn() const { return oneThree; }
+  uint NumBond() const { return nBonds; }
+  uint Focus() const { return focus; }
+  uint Prev() const { return prev; }
 
   // need to go to private
   uint bonded[MAX_BONDS];
diff --git a/src/cbmc/DCHedronCycle.cpp b/src/cbmc/DCHedronCycle.cpp
index 92771a9e9..8925fdb55 100644
--- a/src/cbmc/DCHedronCycle.cpp
+++ b/src/cbmc/DCHedronCycle.cpp
@@ -5,11 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCHedronCycle.h"
 
 #include <cassert>
-#include <cmath>
 #include <numeric>
 
 #include "DCData.h"
diff --git a/src/cbmc/DCLinkedCycle.cpp b/src/cbmc/DCLinkedCycle.cpp
index 11e89218a..2e9015f54 100644
--- a/src/cbmc/DCLinkedCycle.cpp
+++ b/src/cbmc/DCLinkedCycle.cpp
@@ -5,11 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCLinkedCycle.h"
 
 #include <cassert>
-#include <cmath>
 #include <numeric>
 
 #include "DCData.h"
diff --git a/src/cbmc/DCLinkedHedron.cpp b/src/cbmc/DCLinkedHedron.cpp
index 038141293..9d60b8ad5 100644
--- a/src/cbmc/DCLinkedHedron.cpp
+++ b/src/cbmc/DCLinkedHedron.cpp
@@ -5,11 +5,9 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCLinkedHedron.h"
 
 #include <cassert>
-#include <cmath>
 #include <numeric>
 
 #include "DCData.h"
diff --git a/src/cbmc/DCRotateCOM.cpp b/src/cbmc/DCRotateCOM.cpp
index 19471438f..fc5cc0d4c 100644
--- a/src/cbmc/DCRotateCOM.cpp
+++ b/src/cbmc/DCRotateCOM.cpp
@@ -5,11 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#define _USE_MATH_DEFINES
 #include "DCRotateCOM.h"
-
-#include <cmath>
-
 #include "DCData.h"
 #include "Forcefield.h"
 #include "MolSetup.h"
diff --git a/src/cbmc/TrialMol.cpp b/src/cbmc/TrialMol.cpp
index 04393a87e..df256eb1e 100644
--- a/src/cbmc/TrialMol.cpp
+++ b/src/cbmc/TrialMol.cpp
@@ -8,15 +8,10 @@ along with this program, also can be found at
 #include "TrialMol.h"
 
 #include <algorithm>
-#include <cmath>   //for sin, cos, atan2
 #include <utility> //swap
 
-#include "BasicTypes.h"
 #include "BoxDimensions.h"
-#include "EnergyTypes.h"
 #include "GeomLib.h" //for Theta
-#include "MoleculeKind.h"
-#include "XYZArray.h"
 #ifndef NDEBUG
 #include <iostream>
 #endif
diff --git a/src/moves/IntraMoleculeExchange1.h b/src/moves/IntraMoleculeExchange1.h
index f9a9cdf1a..154f910d8 100644
--- a/src/moves/IntraMoleculeExchange1.h
+++ b/src/moves/IntraMoleculeExchange1.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef INTRAMOLECULEEXCHANGE1_H
 #define INTRAMOLECULEEXCHANGE1_H
 
-#include <cmath>
-
 #include "GeomLib.h"
 #include "MoveBase.h"
 #include "TrialMol.h"
diff --git a/src/moves/IntraMoleculeExchange2.h b/src/moves/IntraMoleculeExchange2.h
index 542c393f5..74bd8a4a3 100644
--- a/src/moves/IntraMoleculeExchange2.h
+++ b/src/moves/IntraMoleculeExchange2.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef INTRAMOLECULEEXCHANGE2_H
 #define INTRAMOLECULEEXCHANGE2_H
 
-#include <cmath>
-
 #include "GeomLib.h"
 #include "IntraMoleculeExchange1.h"
 #include "TrialMol.h"
diff --git a/src/moves/IntraMoleculeExchange3.h b/src/moves/IntraMoleculeExchange3.h
index 49d0355fc..1bc0e8ab7 100644
--- a/src/moves/IntraMoleculeExchange3.h
+++ b/src/moves/IntraMoleculeExchange3.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef INTRAMOLECULEEXCHANGE3_H
 #define INTRAMOLECULEEXCHANGE3_H
 
-#include <cmath>
-
 #include "GeomLib.h"
 #include "IntraMoleculeExchange1.h"
 #include "TrialMol.h"
diff --git a/src/moves/IntraTargetedSwap.h b/src/moves/IntraTargetedSwap.h
index cc88b1f44..0715d36e3 100644
--- a/src/moves/IntraTargetedSwap.h
+++ b/src/moves/IntraTargetedSwap.h
@@ -8,9 +8,6 @@ along with this program, also can be found at
 #ifndef INTRATARGETEDSWAP_H
 #define INTRATARGETEDSWAP_H
 
-#include <cmath>
-#include <queue>
-
 #include "ConfigSetup.h"
 #include "FloydWarshallCycle.h"
 #include "GeomLib.h"
@@ -19,6 +16,8 @@ along with this program, also can be found at
 #include "TargetedSwap.h" // for enum PBC Struct TSwapParam defined in there
 #include "TrialMol.h"
 
+#include <queue>
+
 struct BondList;
 
 class IntraTargetedSwap : public MoveBase {
diff --git a/src/moves/MoleculeExchange1.h b/src/moves/MoleculeExchange1.h
index 2eca32323..24018c593 100644
--- a/src/moves/MoleculeExchange1.h
+++ b/src/moves/MoleculeExchange1.h
@@ -9,9 +9,6 @@ along with this program, also can be found at
 #define MOLECULEEXCHANGE1_H
 
 #if ENSEMBLE == GCMC || ENSEMBLE == GEMC
-
-#include <cmath>
-
 #include "GeomLib.h"
 #include "MoveBase.h"
 #include "TrialMol.h"
diff --git a/src/moves/MoleculeExchange2.h b/src/moves/MoleculeExchange2.h
index 959165052..35dcb7761 100644
--- a/src/moves/MoleculeExchange2.h
+++ b/src/moves/MoleculeExchange2.h
@@ -9,9 +9,6 @@ along with this program, also can be found at
 #define MOLECULEEXCHANGE2_H
 
 #if ENSEMBLE == GCMC || ENSEMBLE == GEMC
-
-#include <cmath>
-
 #include "GeomLib.h"
 #include "MoleculeExchange1.h"
 #include "TrialMol.h"
diff --git a/src/moves/MoleculeExchange3.h b/src/moves/MoleculeExchange3.h
index c40c64b1c..6cbcaa2c5 100644
--- a/src/moves/MoleculeExchange3.h
+++ b/src/moves/MoleculeExchange3.h
@@ -9,9 +9,6 @@ along with this program, also can be found at
 #define MOLECULEEXCHANGE3_H
 
 #if ENSEMBLE == GCMC || ENSEMBLE == GEMC
-
-#include <cmath>
-
 #include "GeomLib.h"
 #include "MoleculeExchange1.h"
 #include "TrialMol.h"
diff --git a/src/moves/MultiParticle.h b/src/moves/MultiParticle.h
index f92400c5c..d74deae75 100644
--- a/src/moves/MultiParticle.h
+++ b/src/moves/MultiParticle.h
@@ -8,9 +8,6 @@ along with this program, also can be found at
 #ifndef MULTIPARTICLE_H
 #define MULTIPARTICLE_H
 
-#include <cmath>
-#include <fstream>
-
 #include "MoveBase.h"
 #include "Random123Wrapper.h"
 #include "StaticVals.h"
@@ -19,6 +16,7 @@ along with this program, also can be found at
 #include "TransformParticlesCUDAKernel.cuh"
 #include "VariablesCUDA.cuh"
 #endif
+#include <fstream>
 
 #define MIN_FORCE 1E-12
 #define MAX_FORCE 30
diff --git a/src/moves/MultiParticleBrownianMotion.h b/src/moves/MultiParticleBrownianMotion.h
index 68c095cbb..d55552a6a 100644
--- a/src/moves/MultiParticleBrownianMotion.h
+++ b/src/moves/MultiParticleBrownianMotion.h
@@ -8,8 +8,6 @@ along with this program, also can be found at
 #ifndef MULTIPARTICLEBROWNIANMOTION_H
 #define MULTIPARTICLEBROWNIANMOTION_H
 
-#include <cmath>
-
 #include "MoveBase.h"
 #include "Random123Wrapper.h"
 #include "StaticVals.h"
diff --git a/src/moves/TargetedSwap.h b/src/moves/TargetedSwap.h
index a4d2df58b..827641ced 100644
--- a/src/moves/TargetedSwap.h
+++ b/src/moves/TargetedSwap.h
@@ -40,7 +40,6 @@ struct TSwapParam {
 
 #if ENSEMBLE == GCMC || ENSEMBLE == GEMC
 
-#include <cmath>
 #include <queue>
 
 #include "ConfigSetup.h"

From f451add70fcb8f5dc14ba09b35aa0a8ac1b87437 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 12 Dec 2024 21:39:21 -0500
Subject: [PATCH 40/42] Pass more forcefield parameters to the GPU for
 consistent values

---
 src/Ewald.cpp                             |  3 +-
 src/FFParticle.cpp                        |  7 ++-
 src/Forcefield.h                          |  4 +-
 src/GPU/CalculateEwaldCUDAKernel.cu       | 24 ++++----
 src/GPU/CalculateEwaldCUDAKernel.cuh      |  6 +-
 src/GPU/CalculateForceCUDAKernel.cu       | 71 ++++++++++++-----------
 src/GPU/CalculateForceCUDAKernel.cuh      | 34 ++++++-----
 src/GPU/ConstantDefinitionsCUDAKernel.cu  | 17 ++++--
 src/GPU/ConstantDefinitionsCUDAKernel.cuh |  6 +-
 src/GPU/VariablesCUDA.cuh                 |  9 ++-
 10 files changed, 97 insertions(+), 84 deletions(-)

diff --git a/src/Ewald.cpp b/src/Ewald.cpp
index de23f1bc7..d97d192cc 100644
--- a/src/Ewald.cpp
+++ b/src/Ewald.cpp
@@ -1530,8 +1530,7 @@ void Ewald::BoxForceReciprocal(XYZArray const &molCoords,
     CallBoxForceReciprocalGPU(
         ff.particles->getCUDAVars(), atomForceRec, molForceRec, particleCharge,
         particleMol, particleHasNoCharge, particleUsed, startMol, lengthMol,
-        ff.alpha[box], ff.alphaSq[box], constValue, imageSizeRef[box],
-        molCoords, currentAxes, box);
+        constValue, imageSizeRef[box], molCoords, currentAxes, box);
     delete[] particleUsed;
 #else
     // molecule iterator
diff --git a/src/FFParticle.cpp b/src/FFParticle.cpp
index db35b589d..11e169b8b 100644
--- a/src/FFParticle.cpp
+++ b/src/FFParticle.cpp
@@ -88,9 +88,10 @@ void FFParticle::Init(ff_setup::Particle const &mie,
   double diElectric_1 = 1.0 / forcefield.dielectric;
   InitGPUForceField(*varCUDA, sigmaSq, epsilon_cn, n, forcefield.vdwKind,
                     forcefield.isMartini, count, forcefield.rCut,
-                    forcefield.rCutCoulomb, forcefield.rCutLow,
-                    forcefield.rswitch, forcefield.alpha, forcefield.ewald,
-                    diElectric_1);
+                    forcefield.rCutSq, forcefield.rCutCoulomb,
+                    forcefield.rCutCoulombSq, forcefield.rCutLow,
+                    forcefield.rswitch, forcefield.alpha, forcefield.alphaSq,
+                    forcefield.ewald, diElectric_1);
 #endif
 }
 
diff --git a/src/Forcefield.h b/src/Forcefield.h
index 85ddcd5d5..64ac8f6ba 100644
--- a/src/Forcefield.h
+++ b/src/Forcefield.h
@@ -54,12 +54,12 @@ class Forcefield {
   double tolerance;                // Ewald sum terms
   double rswitch;                  // Switch distance
   double dielectric;               // dielectric for martini
-  double scaling_14; //!< Scaling factor for 1-4 pairs' ewald interactions
+  double scaling_14; //!< Scaling factor for 1-4 pairs' Ewald interactions
   double sc_alpha;   // Free energy parameter
   double sc_sigma, sc_sigma_6; // Free energy parameter
 
   bool OneThree, OneFour, OneN; // To include 1-3, 1-4 and more interaction
-  bool electrostatic, ewald;    // To consider columb interaction
+  bool electrostatic, ewald;    // To consider coulomb interaction
   bool vdwGeometricSigma;       // For sigma combining rule
   bool isMartini;
   bool exp6;
diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu
index 5812426b6..164091427 100644
--- a/src/GPU/CalculateEwaldCUDAKernel.cu
+++ b/src/GPU/CalculateEwaldCUDAKernel.cu
@@ -450,8 +450,8 @@ void CallBoxForceReciprocalGPU(
     const std::vector<int> &particleMol,
     const std::vector<bool> &particleHasNoCharge, const bool *particleUsed,
     const std::vector<int> &startMol, const std::vector<int> &lengthMol,
-    double alpha, double alphaSq, double constValue, uint imageSize,
-    XYZArray const &molCoords, BoxDimensions const &boxAxes, int box) {
+    double constValue, uint imageSize, XYZArray const &molCoords,
+    BoxDimensions const &boxAxes, int box) {
   int atomCount = atomForceRec.Count();
   int molCount = molForceRec.Count();
   double *gpu_particleCharge;
@@ -518,13 +518,13 @@ void CallBoxForceReciprocalGPU(
       vars->gpu_aForceRecx, vars->gpu_aForceRecy, vars->gpu_aForceRecz,
       vars->gpu_mForceRecx, vars->gpu_mForceRecy, vars->gpu_mForceRecz,
       gpu_particleCharge, gpu_particleMol, gpu_particleHasNoCharge,
-      gpu_particleUsed, gpu_startMol, gpu_lengthMol, alpha, alphaSq, constValue,
-      imageSize, vars->gpu_kxRef[box], vars->gpu_kyRef[box],
-      vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y, vars->gpu_z,
-      vars->gpu_prefactRef[box], vars->gpu_sumRnew[box], vars->gpu_sumInew[box],
-      vars->gpu_isFraction, vars->gpu_molIndex, vars->gpu_lambdaCoulomb,
-      vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
-      vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
+      gpu_particleUsed, gpu_startMol, gpu_lengthMol, vars->gpu_alpha,
+      vars->gpu_alphaSq, constValue, imageSize, vars->gpu_kxRef[box],
+      vars->gpu_kyRef[box], vars->gpu_kzRef[box], vars->gpu_x, vars->gpu_y,
+      vars->gpu_z, vars->gpu_prefactRef[box], vars->gpu_sumRnew[box],
+      vars->gpu_sumInew[box], vars->gpu_isFraction, vars->gpu_molIndex,
+      vars->gpu_lambdaCoulomb, vars->gpu_cell_x[box], vars->gpu_cell_y[box],
+      vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
       vars->gpu_Invcell_z[box], vars->gpu_nonOrth, boxAxes.GetAxis(box).x,
       boxAxes.GetAxis(box).y, boxAxes.GetAxis(box).z, box, atomCount);
   cudaDeviceSynchronize();
@@ -558,7 +558,7 @@ __global__ void BoxForceReciprocalGPU(
     double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
     double *gpu_particleCharge, int *gpu_particleMol,
     bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol,
-    int *gpu_lengthMol, double alpha, double alphaSq, double constValue,
+    int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue,
     int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz,
     double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact,
     double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction,
@@ -627,11 +627,11 @@ __global__ void BoxForceReciprocalGPU(
                      gpu_Invcell_z);
         dist = sqrt(distSq);
 
-        double expConstValue = exp(-1.0 * alphaSq * distSq);
+        double expConstValue = exp(-1.0 * gpu_alphaSq[box] * distSq);
         double qiqj = gpu_particleCharge[particleID] *
                       gpu_particleCharge[otherParticle] * qqFactGPU;
         intraForce = qiqj * lambdaCoef * lambdaCoef / distSq;
-        intraForce *= ((erf(alpha * dist) / dist) - constValue * expConstValue);
+        intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue);
         forceX -= intraForce * distVect.x;
         forceY -= intraForce * distVect.y;
         forceZ -= intraForce * distVect.z;
diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh
index e9ad84326..80062586b 100644
--- a/src/GPU/CalculateEwaldCUDAKernel.cuh
+++ b/src/GPU/CalculateEwaldCUDAKernel.cuh
@@ -23,8 +23,6 @@ void CallBoxForceReciprocalGPU(VariablesCUDA *vars,
                                const bool *particleUsed,
                                const std::vector<int> &startMol,
                                const std::vector<int> &lengthMol,
-                               double alpha,
-                               double alphaSq,
                                double constValue,
                                uint imageSize,
                                XYZArray const &molCoords,
@@ -103,8 +101,8 @@ __global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx,
                                        bool *gpu_particleUsed,
                                      int *gpu_startMol,
                                       int *gpu_lengthMol,
-                                      double alpha,
-                                      double alphaSq,
+                                      double *gpu_alpha,
+                                      double *gpu_alphaSq,
                                       double constValue,
                                       int imageSize,
                                       double *gpu_kx,
diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu
index c827a803d..219a6cafb 100644
--- a/src/GPU/CalculateForceCUDAKernel.cu
+++ b/src/GPU/CalculateForceCUDAKernel.cu
@@ -125,7 +125,7 @@ void CallBoxInterForceGPU(
       vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33,
       vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind,
       vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
-      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
+      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq,
       vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box],
       vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box],
       vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth,
@@ -302,7 +302,7 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
       gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq,
       vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind,
       vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
-      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
+      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq,
       vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth,
       vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
       vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
@@ -469,7 +469,7 @@ __global__ void BoxInterForceGPU(
     double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn,
     double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count,
     double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow,
-    double *gpu_rOn, double *gpu_alpha, int *gpu_ewald,
+    double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald,
     double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y,
     double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
     double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6,
@@ -577,7 +577,7 @@ __global__ void BoxInterForceGPU(
                 mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb);
             double pRF = CalcCoulombForceGPU(
                 distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0],
-                gpu_alpha[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0],
+                gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0],
                 gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power,
                 lambdaCoulomb, gpu_count[0], kA, kB);
 
@@ -608,7 +608,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
             double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn,
             double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini,
             int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb,
-            double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha,
+            double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq,
             int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth,
             double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
             double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
@@ -708,7 +708,7 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
 
             forces += CalcCoulombForceGPU(
                 distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0],
-                gpu_isMartini[0], gpu_alpha[box], gpu_rCutCoulomb[box],
+                gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box],
                 gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha,
                 sc_power, lambdaCoulomb, gpu_count[0], kA, kB);
           }
@@ -868,12 +868,12 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq,
 //**************************************************************//
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
                                             int gpu_ewald, double gpu_alpha,
-                                            int index, double gpu_sigmaSq,
+                                            double gpu_alphaSq, int index, double gpu_sigmaSq,
                                             bool sc_coul, double sc_sigma_6,
                                             double sc_alpha, uint sc_power,
                                             double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
 
   if (sc_coul) {
@@ -886,20 +886,20 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   } else {
     return gpu_lambdaCoulomb *
-           CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-                                            int gpu_ewald, double gpu_alpha) {
+                                            int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
     double constValue = gpu_alpha * M_2_SQRTPI;
-    double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq);
+    double expConstValue = exp(-1.0 * gpu_alphaSq * distSq);
     double temp = 1.0 - erf(gpu_alpha * dist);
     return qi_qj * (temp / dist + constValue * expConstValue) / distSq;
   } else {
@@ -909,13 +909,13 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-                                         int gpu_ewald, double gpu_alpha,
+                                         int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
                                          int index, double gpu_sigmaSq,
                                          bool sc_coul, double sc_sigma_6,
                                          double sc_alpha, uint sc_power,
                                          double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
 
   if (sc_coul) {
@@ -928,20 +928,20 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   } else {
     return gpu_lambdaCoulomb *
-           CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-                                         int gpu_ewald, double gpu_alpha) {
+                                         int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
     double constValue = gpu_alpha * M_2_SQRTPI;
-    double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq);
+    double expConstValue = exp(-1.0 * gpu_alphaSq * distSq);
     double temp = 1.0 - erf(gpu_alpha * dist);
     return qi_qj * (temp / dist + constValue * expConstValue) / distSq;
   } else {
@@ -950,13 +950,13 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha,
+                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
                                         int index, double gpu_sigmaSq,
                                         bool sc_coul, double sc_sigma_6,
                                         double sc_alpha, uint sc_power,
                                         double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
   if (sc_coul) {
     double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq;
@@ -968,20 +968,20 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   } else {
     return gpu_lambdaCoulomb *
-           CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha);
+           CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha) {
+                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
     double constValue = gpu_alpha * M_2_SQRTPI;
-    double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq);
+    double expConstValue = exp(-1.0 * gpu_alphaSq * distSq);
     double temp = erfc(gpu_alpha * dist);
     return qi_qj * (temp / dist + constValue * expConstValue) / distSq;
   } else {
@@ -990,12 +990,12 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirSwitchMartiniGPU(
-    double distSq, double qi_qj, int gpu_ewald, double gpu_alpha,
+    double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
     double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq,
     bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power,
     double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
                                           gpu_rCut, gpu_diElectric_1);
   }
 
@@ -1009,11 +1009,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
+           CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
                                           gpu_rCut, gpu_diElectric_1);
   } else {
     return gpu_lambdaCoulomb *
-           CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+           CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
                                           gpu_rCut, gpu_diElectric_1);
   }
 }
@@ -1021,13 +1021,14 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(
 __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
                                                  int gpu_ewald,
                                                  double gpu_alpha,
+												 double gpu_alphaSq,
                                                  double gpu_rCut,
                                                  double gpu_diElectric_1) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
     double constValue = gpu_alpha * M_2_SQRTPI;
-    double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq);
+    double expConstValue = exp(-1.0 * gpu_alphaSq * distSq);
     double temp = 1.0 - erf(gpu_alpha * dist);
     return qi_qj * (temp / dist + constValue * expConstValue) / distSq;
   } else {
@@ -1053,7 +1054,7 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-                                          int gpu_ewald, double gpu_alpha,
+                                          int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
                                           double gpu_rCut, int index,
                                           double gpu_sigmaSq, bool sc_coul,
                                           double sc_sigma_6, double sc_alpha,
@@ -1061,7 +1062,7 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
                                           double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
     return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
-                                   gpu_rCut);
+                                   gpu_alphaSq, gpu_rCut);
   }
 
   if (sc_coul) {
@@ -1075,21 +1076,21 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
            CalcCoulombVirSwitchGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
-                                   gpu_rCut);
+                                   gpu_alphaSq, gpu_rCut);
   } else {
     return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald,
-                                                       gpu_alpha, gpu_rCut);
+                                                       gpu_alpha, gpu_alphaSq, gpu_rCut);
   }
 }
 
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
                                           int gpu_ewald, double gpu_alpha,
-                                          double gpu_rCut) {
+                                          double gpu_alphaSq, double gpu_rCut) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
     double constValue = gpu_alpha * M_2_SQRTPI;
-    double expConstValue = exp(-1.0 * gpu_alpha * gpu_alpha * distSq);
+    double expConstValue = exp(-1.0 * gpu_alphaSq * distSq);
     double temp = 1.0 - erf(gpu_alpha * dist);
     return qi_qj * (temp / dist + constValue * expConstValue) / distSq;
   } else {
diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh
index 4c8773307..072939c24 100644
--- a/src/GPU/CalculateForceCUDAKernel.cuh
+++ b/src/GPU/CalculateForceCUDAKernel.cuh
@@ -114,6 +114,7 @@ __global__ void BoxForceGPU(int *gpu_cellStartIndex,
                             double *gpu_rCutLow,
                             double *gpu_rOn,
                             double *gpu_alpha,
+                            double *gpu_alphaSq,
                             int *gpu_ewald,
                             double *gpu_diElectric_1,
                             int *gpu_nonOrth,
@@ -183,6 +184,7 @@ __global__ void BoxInterForceGPU(int *gpu_cellStartIndex,
                                  double *gpu_rCutLow,
                                  double *gpu_rOn,
                                  double *gpu_alpha,
+                                 double *gpu_alphaSq,
                                  int *gpu_ewald,
                                  double *gpu_diElectric_1,
                                  double *gpu_cell_x,
@@ -249,32 +251,32 @@ __device__ double CalcEnForceGPU(double distSq, int kind1, int kind2,
 //ElectroStatic Calculation
 //**************************************************************//
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha,
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
     int index, double gpu_sigmaSq,
     bool sc_coul, double sc_sigma_6,
     double sc_alpha, uint sc_power,
     double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha);
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha,
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
     int index, double gpu_sigmaSq,
     bool sc_coul, double sc_sigma_6,
     double sc_alpha, uint sc_power,
     double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha);
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha,
+                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
                                         int index, double gpu_sigmaSq,
                                         bool sc_coul, double sc_sigma_6,
                                         double sc_alpha, uint sc_power,
                                         double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha);
+                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
 __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
     int gpu_ewald,
-    double gpu_alpha,
+    double gpu_alpha, double gpu_alphaSq,
     double gpu_rCut,
     double gpu_diElectric_1,
     int index,
@@ -286,18 +288,18 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
     double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
     int gpu_ewald,
-    double gpu_alpha,
+    double gpu_alpha, double gpu_alphaSq,
     double gpu_rCut,
     double gpu_diElectric_1);
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha,
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
     double gpu_rCut, int index,
     double gpu_sigmaSq, bool sc_coul,
     double sc_sigma_6, double sc_alpha,
     uint sc_power,
     double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha,
+    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
     double gpu_rCut);
 
 //VDW Calculation
@@ -359,7 +361,7 @@ __device__ double CalcVirSwitchGPU(double distSq, int index,
 __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj,
     int gpu_VDW_Kind, int gpu_ewald,
     int gpu_isMartini,
-    double gpu_alpha,
+    double gpu_alpha, double gpu_alphaSq,
     double gpu_rCutCoulomb,
     double gpu_diElectric_1,
     double *gpu_sigmaSq,
@@ -377,25 +379,25 @@ __device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj,
 
   int index = FlatIndexGPU(kind1, kind2, gpu_count);
   if(gpu_VDW_Kind == GPU_VDW_STD_KIND) {
-    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index,
+    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
                                      gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
                                      sc_power, gpu_lambdaCoulomb);
   } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) {
-    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index,
+    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
                                   gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
                                   sc_power, gpu_lambdaCoulomb);
   } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) {
-    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, index,
+    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
                                  gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
                                  sc_power, gpu_lambdaCoulomb);
   } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) {
-    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
                                           gpu_rCutCoulomb, gpu_diElectric_1,
                                           index, gpu_sigmaSq[index], sc_coul,
                                           sc_sigma_6, sc_alpha, sc_power,
                                           gpu_lambdaCoulomb);
   } else
-    return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+    return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
                                    gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul,
                                    sc_sigma_6, sc_alpha, sc_power,
                                    gpu_lambdaCoulomb);
diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cu b/src/GPU/ConstantDefinitionsCUDAKernel.cu
index eda129319..e32aa3098 100644
--- a/src/GPU/ConstantDefinitionsCUDAKernel.cu
+++ b/src/GPU/ConstantDefinitionsCUDAKernel.cu
@@ -31,9 +31,10 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW,
 
 void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
                        double const *epsilon_Cn, double const *n, int VDW_Kind,
-                       int isMartini, int count, double Rcut,
-                       double const *rCutCoulomb, double RcutLow, double Ron,
-                       double const *alpha, int ewald, double diElectric_1) {
+                       int isMartini, int count, double Rcut, double RcutSq,
+                       double const *rCutCoulomb, double const *rCutCoulombSq,
+                       double RcutLow, double Ron, double const *alpha,
+                       double const *alphaSq, int ewald, double diElectric_1) {
   int countSq = count * count;
   CUMALLOC((void **)&vars.gpu_sigmaSq, countSq * sizeof(double));
   CUMALLOC((void **)&vars.gpu_epsilon_Cn, countSq * sizeof(double));
@@ -42,14 +43,17 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
   CUMALLOC((void **)&vars.gpu_isMartini, sizeof(int));
   CUMALLOC((void **)&vars.gpu_count, sizeof(int));
   CUMALLOC((void **)&vars.gpu_rCut, sizeof(double));
+  CUMALLOC((void **)&vars.gpu_rCutSq, sizeof(double));
   CUMALLOC((void **)&vars.gpu_rCutCoulomb, BOX_TOTAL * sizeof(double));
+  CUMALLOC((void **)&vars.gpu_rCutCoulombSq, BOX_TOTAL * sizeof(double));
   CUMALLOC((void **)&vars.gpu_rCutLow, sizeof(double));
   CUMALLOC((void **)&vars.gpu_rOn, sizeof(double));
   CUMALLOC((void **)&vars.gpu_alpha, BOX_TOTAL * sizeof(double));
+  CUMALLOC((void **)&vars.gpu_alphaSq, BOX_TOTAL * sizeof(double));
   CUMALLOC((void **)&vars.gpu_ewald, sizeof(int));
   CUMALLOC((void **)&vars.gpu_diElectric_1, sizeof(double));
 
-  // allocate gpu memory for lambda variables
+  // allocate GPU memory for lambda variables
   CUMALLOC((void **)&vars.gpu_molIndex, (int)BOX_TOTAL * sizeof(int));
   CUMALLOC((void **)&vars.gpu_lambdaVDW, (int)BOX_TOTAL * sizeof(double));
   CUMALLOC((void **)&vars.gpu_lambdaCoulomb, (int)BOX_TOTAL * sizeof(double));
@@ -65,13 +69,18 @@ void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
              cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_count, &count, sizeof(int), cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_rCut, &Rcut, sizeof(double), cudaMemcpyHostToDevice);
+  cudaMemcpy(vars.gpu_rCutSq, &RcutSq, sizeof(double), cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_rCutCoulomb, rCutCoulomb, BOX_TOTAL * sizeof(double),
              cudaMemcpyHostToDevice);
+  cudaMemcpy(vars.gpu_rCutCoulombSq, rCutCoulombSq, BOX_TOTAL * sizeof(double),
+             cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_rCutLow, &RcutLow, sizeof(double),
              cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_rOn, &Ron, sizeof(double), cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_alpha, alpha, BOX_TOTAL * sizeof(double),
              cudaMemcpyHostToDevice);
+  cudaMemcpy(vars.gpu_alphaSq, alphaSq, BOX_TOTAL * sizeof(double),
+             cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_ewald, &ewald, sizeof(int), cudaMemcpyHostToDevice);
   cudaMemcpy(vars.gpu_diElectric_1, &diElectric_1, sizeof(double),
              cudaMemcpyHostToDevice);
diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
index 52a2a4693..3584e46c2 100644
--- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh
+++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
@@ -25,9 +25,9 @@ void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW,
 void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
                        double const *epsilon_Cn, double const *n,
                        int VDW_Kind, int isMartini, int count,
-                       double Rcut, double const *rCutCoulomb,
-                       double RcutLow, double Ron, double const *alpha,
-                       int ewald, double diElectric_1);
+                       double Rcut, double RcutSq, double const *rCutCoulomb,
+                       double const *rCutCoulombSq, double RcutLow, double Ron, double const *alpha,
+                       double const *alphaSq, int ewald, double diElectric_1);
 void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber,
                          uint maxAtomsInMol, uint maxMolNumber);
 void InitEwaldVariablesCUDA(VariablesCUDA *vars, uint imageTotal);
diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh
index 8e95ca554..30376c273 100644
--- a/src/GPU/VariablesCUDA.cuh
+++ b/src/GPU/VariablesCUDA.cuh
@@ -66,10 +66,13 @@ public:
     gpu_isMartini = NULL;
     gpu_count = NULL;
     gpu_rCut = NULL;
+    gpu_rCutSq = NULL;
     gpu_rCutLow = NULL;
     gpu_rOn = NULL;
     gpu_alpha = NULL;
+    gpu_alphaSq = NULL;
     gpu_rCutCoulomb = NULL;
+    gpu_rCutCoulombSq = NULL;
     gpu_ewald = NULL;
     gpu_diElectric_1 = NULL;
     gpu_aForcex = NULL;
@@ -93,11 +96,11 @@ public:
   int *gpu_isMartini;
   int *gpu_count;
   int *gpu_startAtomIdx; //start atom index of the molecule
-  double *gpu_rCut;
-  double *gpu_rCutCoulomb;
+  double *gpu_rCut, *gpu_rCutSq;
+  double *gpu_rCutCoulomb, *gpu_rCutCoulombSq;
   double *gpu_rCutLow;
   double *gpu_rOn;
-  double *gpu_alpha;
+  double *gpu_alpha, *gpu_alphaSq;
   int *gpu_ewald;
   double *gpu_diElectric_1;
   double *gpu_x, *gpu_y, *gpu_z;

From 57181f0deacdc888117bec2685a783d0031aec7a Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Thu, 12 Dec 2024 21:40:50 -0500
Subject: [PATCH 41/42] Reformat source files using clang format

---
 src/BlockOutput.h                         |   8 +-
 src/ConfigSetup.cpp                       |  59 +--
 src/GPU/CUDAMemoryManager.cuh             |  19 +-
 src/GPU/CalculateEnergyCUDAKernel.cuh     | 238 ++++------
 src/GPU/CalculateEwaldCUDAKernel.cu       |  20 +-
 src/GPU/CalculateEwaldCUDAKernel.cuh      | 215 +++------
 src/GPU/CalculateForceCUDAKernel.cu       | 185 ++++----
 src/GPU/CalculateForceCUDAKernel.cuh      | 521 ++++++++--------------
 src/GPU/ConstantDefinitionsCUDAKernel.cuh |  19 +-
 src/GPU/VariablesCUDA.cuh                 |  62 +--
 src/Main.cpp                              |   7 +-
 src/MoveSettings.cpp                      |   2 +-
 src/PRNG.h                                |   3 -
 src/XYZArray.h                            |   2 +-
 14 files changed, 550 insertions(+), 810 deletions(-)

diff --git a/src/BlockOutput.h b/src/BlockOutput.h
index 302a427a3..047bcb226 100644
--- a/src/BlockOutput.h
+++ b/src/BlockOutput.h
@@ -111,13 +111,13 @@ struct BlockAverages : OutputableBase {
     stepsPerOut = event.frequency;
     invSteps = 1.0 / stepsPerOut;
     firstInvSteps = invSteps;
-    //Handle the case where we are restarting from a checkpoint and the first
-    //interval is smaller than expected because we create a checkpoint more
-    //often than the Block output frequency.
+    // Handle the case where we are restarting from a checkpoint and the first
+    // interval is smaller than expected because we create a checkpoint more
+    // often than the Block output frequency.
     if (startStep != 0 && (startStep % stepsPerOut) != 0) {
       ulong diff;
       diff = stepsPerOut - (startStep % stepsPerOut);
-      firstInvSteps = 1.0/diff;
+      firstInvSteps = 1.0 / diff;
     }
     enableOut = event.enable;
   }
diff --git a/src/ConfigSetup.cpp b/src/ConfigSetup.cpp
index d3cababdb..08bbccf60 100644
--- a/src/ConfigSetup.cpp
+++ b/src/ConfigSetup.cpp
@@ -1897,7 +1897,6 @@ void ConfigSetup::verifyInputs(void) {
     std::cout << "ERROR: Impulse Pressure Correction cannot be "
               << "used with LJ long-range corrections." << std::endl;
     exit(EXIT_FAILURE);
-
   }
   if (((sys.ff.VDW_KIND == sys.ff.VDW_SHIFT_KIND) ||
        (sys.ff.VDW_KIND == sys.ff.VDW_SWITCH_KIND)) &&
@@ -1905,7 +1904,6 @@ void ConfigSetup::verifyInputs(void) {
     std::cout << "ERROR: Impulse Pressure Correction is not supported "
               << "for SWITCH or SHIFT potentials." << std::endl;
     exit(EXIT_FAILURE);
-
   }
   if (sys.ff.doImpulsePressureCorr && sys.ff.doTailCorr) {
     std::cout << "ERROR: Both LRC (Long Range Correction) and "
@@ -2104,9 +2102,10 @@ void ConfigSetup::verifyInputs(void) {
     if (in.restart.restartFromBinaryCoorFile) {
       for (i = 0; i < BOX_TOTAL; i++) {
         if (!in.files.binaryCoorInput.defined[i]) {
-          std::cout << "ERROR: Binary coordinate file was not specified for box "
-                       "number "
-                    << i << "!" << std::endl;
+          std::cout
+              << "ERROR: Binary coordinate file was not specified for box "
+                 "number "
+              << i << "!" << std::endl;
           exit(EXIT_FAILURE);
         }
       }
@@ -2174,25 +2173,30 @@ void ConfigSetup::verifyInputs(void) {
     if ((sys.memcVal.MEMC1 && sys.memcVal.MEMC2) ||
         (sys.memcVal.MEMC1 && sys.memcVal.MEMC3) ||
         (sys.memcVal.MEMC2 && sys.memcVal.MEMC3)) {
-      std::cout << "ERROR: Multiple MEMC methods were specified, but only one is allowed!\n";
+      std::cout << "ERROR: Multiple MEMC methods were specified, but only one "
+                   "is allowed!\n";
       exit(EXIT_FAILURE);
     }
     if ((sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC2) ||
         (sys.intraMemcVal.MEMC1 && sys.intraMemcVal.MEMC3) ||
         (sys.intraMemcVal.MEMC2 && sys.intraMemcVal.MEMC3)) {
-      std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only one is allowed!\n";
+      std::cout << "ERROR: Multiple Intra-MEMC methods are specified, but only "
+                   "one is allowed!\n";
       exit(EXIT_FAILURE);
     }
     if (!sys.memcVal.readVol || !sys.intraMemcVal.readVol) {
-      std::cout << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n";
+      std::cout
+          << "ERROR: In the MEMC method, the Sub-Volume was not specified!\n";
       exit(EXIT_FAILURE);
     }
     if (!sys.memcVal.readRatio || !sys.intraMemcVal.readRatio) {
-      std::cout << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n";
+      std::cout
+          << "ERROR: In the MEMC method, Exchange Ratio was not specified!\n";
       exit(EXIT_FAILURE);
     }
     if (sys.memcVal.largeKind.size() != sys.memcVal.exchangeRatio.size()) {
-      std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was "
+      std::cout << "ERROR: In the MEMC method, the specified number of Large "
+                   "Kinds was "
                 << sys.memcVal.largeKind.size() << ", but "
                 << sys.memcVal.exchangeRatio.size()
                 << " exchange ratio was specified!\n";
@@ -2209,49 +2213,52 @@ void ConfigSetup::verifyInputs(void) {
     if ((sys.memcVal.largeKind.size() != sys.memcVal.smallKind.size()) ||
         (sys.intraMemcVal.largeKind.size() !=
          sys.intraMemcVal.smallKind.size())) {
-      std::cout
-          << "ERROR: In the MEMC method, the specified number of Large Kinds is not "
-          << " equal as specified number of Small Kinds!\n";
+      std::cout << "ERROR: In the MEMC method, the specified number of Large "
+                   "Kinds is not "
+                << " equal as specified number of Small Kinds!\n";
       exit(EXIT_FAILURE);
     }
     if (!sys.memcVal.readLargeBB || !sys.intraMemcVal.readLargeBB) {
-      std::cout
-          << "ERROR: In the MEMC method, Large Kind BackBone was not specified!\n";
+      std::cout << "ERROR: In the MEMC method, Large Kind BackBone was not "
+                   "specified!\n";
       exit(EXIT_FAILURE);
     }
     if (sys.memcVal.largeKind.size() != sys.memcVal.largeBBAtom1.size()) {
-      std::cout << "ERROR: In the MEMC method, the specified number of Large Kinds was "
+      std::cout << "ERROR: In the MEMC method, the specified number of Large "
+                   "Kinds was "
                 << sys.memcVal.largeKind.size() << ", but "
                 << sys.memcVal.largeBBAtom1.size()
                 << " sets of Large Molecule BackBone was specified!\n";
       exit(EXIT_FAILURE);
     }
     if (sys.memcVal.MEMC2 && !sys.memcVal.readSmallBB) {
-      std::cout
-          << "ERROR: In the MEMC-2 method, Small Kind BackBone was not specified!\n";
+      std::cout << "ERROR: In the MEMC-2 method, Small Kind BackBone was not "
+                   "specified!\n";
       exit(EXIT_FAILURE);
     }
 
     if (sys.memcVal.MEMC2 &&
         (sys.memcVal.smallKind.size() != sys.memcVal.smallBBAtom1.size())) {
-      std::cout
-          << "ERROR: In the MEMC-2 method, the specified number of Small Kinds was "
-          << sys.memcVal.smallKind.size() << ", but "
-          << sys.memcVal.smallBBAtom1.size()
-          << " sets of Small Molecule BackBone was specified!\n";
+      std::cout << "ERROR: In the MEMC-2 method, the specified number of Small "
+                   "Kinds was "
+                << sys.memcVal.smallKind.size() << ", but "
+                << sys.memcVal.smallBBAtom1.size()
+                << " sets of Small Molecule BackBone was specified!\n";
       exit(EXIT_FAILURE);
     }
 
     if (sys.intraMemcVal.MEMC2 && !sys.intraMemcVal.readSmallBB) {
-      std::cout << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not "
-                   "specified!\n";
+      std::cout
+          << "ERROR: In the Intra-MEMC-2 method, Small Kind BackBone was not "
+             "specified!\n";
       exit(EXIT_FAILURE);
     }
     if (sys.memcVal.enable && sys.intraMemcVal.enable) {
       if ((sys.memcVal.MEMC1 && !sys.intraMemcVal.MEMC1) ||
           (sys.memcVal.MEMC2 && !sys.intraMemcVal.MEMC2) ||
           (sys.memcVal.MEMC3 && !sys.intraMemcVal.MEMC3)) {
-        std::cout << "ERROR: The selected intra-MEMC method was not same as the inter-MEMC method!\n";
+        std::cout << "ERROR: The selected intra-MEMC method was not same as "
+                     "the inter-MEMC method!\n";
         exit(EXIT_FAILURE);
       }
     }
diff --git a/src/GPU/CUDAMemoryManager.cuh b/src/GPU/CUDAMemoryManager.cuh
index 6f85af098..d4e76b910 100644
--- a/src/GPU/CUDAMemoryManager.cuh
+++ b/src/GPU/CUDAMemoryManager.cuh
@@ -2,7 +2,8 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef CUDA_MEMORY_MANAGER_H
 #define CUDA_MEMORY_MANAGER_H
@@ -10,22 +11,24 @@ along with this program, also can be found at <https://opensource.org/licenses/M
 #ifdef GOMC_CUDA
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include <unordered_map>
 #include <iostream>
+#include <unordered_map>
 
-#define CUMALLOC(address,size) CUDAMemoryManager::mallocMemory(address,size,#address)
-#define CUFREE(address) CUDAMemoryManager::freeMemory(address,#address)
+#define CUMALLOC(address, size)                                                \
+  CUDAMemoryManager::mallocMemory(address, size, #address)
+#define CUFREE(address) CUDAMemoryManager::freeMemory(address, #address)
 
-class CUDAMemoryManager
-{
+class CUDAMemoryManager {
 public:
-  static cudaError_t mallocMemory(void **address, unsigned int size, std::string var_name);
+  static cudaError_t mallocMemory(void **address, unsigned int size,
+                                  std::string var_name);
   static cudaError_t freeMemory(void *address, std::string var_name);
   static bool isFreed();
 
 private:
   static long long totalAllocatedBytes;
-  static std::unordered_map<void *, std::pair<unsigned int, std::string> > allocatedPointers;
+  static std::unordered_map<void *, std::pair<unsigned int, std::string>>
+      allocatedPointers;
 };
 
 #endif
diff --git a/src/GPU/CalculateEnergyCUDAKernel.cuh b/src/GPU/CalculateEnergyCUDAKernel.cuh
index 4f38e8141..d65a3764b 100644
--- a/src/GPU/CalculateEnergyCUDAKernel.cuh
+++ b/src/GPU/CalculateEnergyCUDAKernel.cuh
@@ -2,95 +2,56 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef CALCULATE_ENERGY_CUDA_KERNEL_H
 #define CALCULATE_ENERGY_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
+#include "BoxDimensions.h"
+#include "VariablesCUDA.cuh"
+#include "XYZArray.h"
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <vector>
-#include "XYZArray.h"
-#include "BoxDimensions.h"
-#include "VariablesCUDA.cuh"
 
-void CallBoxInterGPU(VariablesCUDA *vars,
-                     const std::vector<int> &cellVector,
+void CallBoxInterGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
                      const std::vector<int> &cellStartIndex,
-                     const std::vector<std::vector<int> > &neighborList,
-                     XYZArray const &coords,
-                     BoxDimensions const &boxAxes,
+                     const std::vector<std::vector<int>> &neighborList,
+                     XYZArray const &coords, BoxDimensions const &boxAxes,
                      bool electrostatic,
                      const std::vector<double> &particleCharge,
                      const std::vector<int> &particleKind,
-                     const std::vector<int> &particleMol,
-                     double &REn,
-                     double &LJEn,
-                     bool sc_coul,
-                     double sc_sigma_6,
-                     double sc_alpha,
-                     uint sc_power,
-                     uint const box);
-
-__global__ void BoxInterGPU(int *gpu_cellStartIndex,
-                            int *gpu_cellVector,
-                            int *gpu_neighborList,
-                            int numberOfCells,
-                            double *gpu_x,
-                            double *gpu_y,
-                            double *gpu_z,
-                            double3 axis,
-                            double3 halfAx,
-                            bool electrostatic,
-                            double *gpu_particleCharge,
-                            int *gpu_particleKind,
-                            int *gpu_particleMol,
-                            double *gpu_REn,
-                            double *gpu_LJEn,
-                            double *gpu_sigmaSq,
-                            double *gpu_epsilon_Cn,
-                            double *gpu_n,
-                            int *gpu_VDW_Kind,
-                            int *gpu_isMartini,
-                            int *gpu_count,
-                            double *gpu_rCut,
-                            double *gpu_rCutCoulomb,
-                            double *gpu_rCutLow,
-                            double *gpu_rOn,
-                            double *gpu_alpha,
-                            int *gpu_ewald,
-                            double *gpu_diElectric_1,
-                            int *gpu_nonOrth,
-                            double *gpu_cell_x,
-                            double *gpu_cell_y,
-                            double *gpu_cell_z,
-                            double *gpu_Invcell_x,
-                            double *gpu_Invcell_y,
-                            double *gpu_Invcell_z,
-                            bool sc_coul,
-                            double sc_sigma_6,
-                            double sc_alpha,
-                            uint sc_power,
-                            double *gpu_rMin,
-                            double *gpu_rMaxSq,
-                            double *gpu_expConst,
-                            int *gpu_molIndex,
-                            double *gpu_lambdaVDW,
-                            double *gpu_lambdaCoulomb,
-                            bool *gpu_isFraction,
-                            int box);
+                     const std::vector<int> &particleMol, double &REn,
+                     double &LJEn, bool sc_coul, double sc_sigma_6,
+                     double sc_alpha, uint sc_power, uint const box);
 
+__global__ void
+BoxInterGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
+            int numberOfCells, double *gpu_x, double *gpu_y, double *gpu_z,
+            double3 axis, double3 halfAx, bool electrostatic,
+            double *gpu_particleCharge, int *gpu_particleKind,
+            int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn,
+            double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n,
+            int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count,
+            double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow,
+            double *gpu_rOn, double *gpu_alpha, int *gpu_ewald,
+            double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x,
+            double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
+            double *gpu_Invcell_y, double *gpu_Invcell_z, bool sc_coul,
+            double sc_sigma_6, double sc_alpha, uint sc_power, double *gpu_rMin,
+            double *gpu_rMaxSq, double *gpu_expConst, int *gpu_molIndex,
+            double *gpu_lambdaVDW, double *gpu_lambdaCoulomb,
+            bool *gpu_isFraction, int box);
 
-__device__ double CalcCoulombGPU(double distSq, int kind1, int kind2,
-                                 double qi_qj_fact, double gpu_rCutLow,
-                                 int gpu_ewald, int gpu_VDW_Kind,
-                                 double gpu_alpha, double gpu_rCutCoulomb,
-                                 int gpu_isMartini, double gpu_diElectric_1,
-                                 double gpu_lambdaCoulomb, bool sc_coul,
-                                 double sc_sigma_6, double sc_alpha,
-                                 uint sc_power, double *gpu_sigmaSq,
-                                 int gpu_count);
+__device__ double
+CalcCoulombGPU(double distSq, int kind1, int kind2, double qi_qj_fact,
+               double gpu_rCutLow, int gpu_ewald, int gpu_VDW_Kind,
+               double gpu_alpha, double gpu_rCutCoulomb, int gpu_isMartini,
+               double gpu_diElectric_1, double gpu_lambdaCoulomb, bool sc_coul,
+               double sc_sigma_6, double sc_alpha, uint sc_power,
+               double *gpu_sigmaSq, int gpu_count);
 __device__ double CalcCoulombVirGPU(double distSq, double qi_qj,
                                     double gpu_rCutCoulomb, double gpu_alpha,
                                     int gpu_VDW_Kind, int gpu_ewald,
@@ -104,78 +65,74 @@ __device__ double CalcEnGPU(double distSq, int kind1, int kind2,
                             double *gpu_rMin, double *gpu_rMaxSq,
                             double *gpu_expConst);
 
-//ElectroStatic Calculation
+// ElectroStatic Calculation
 //**************************************************************//
-__device__ double CalcCoulombParticleGPU(double distSq, int index, double qi_qj_fact,
-    int gpu_ewald, double gpu_alpha,
-    double gpu_lambdaCoulomb, bool sc_coul,
-    double sc_sigma_6, double sc_alpha,
-    uint sc_power, double *gpu_sigmaSq);
+__device__ double CalcCoulombParticleGPU(double distSq, int index,
+                                         double qi_qj_fact, int gpu_ewald,
+                                         double gpu_alpha,
+                                         double gpu_lambdaCoulomb, bool sc_coul,
+                                         double sc_sigma_6, double sc_alpha,
+                                         uint sc_power, double *gpu_sigmaSq);
 __device__ double CalcCoulombParticleGPUNoLambda(double distSq,
-    double qi_qj_fact,
-    int gpu_ewald,
-    double gpu_alpha);
-__device__ double CalcCoulombShiftGPU(double distSq, int index, double qi_qj_fact,
-                                      int gpu_ewald, double gpu_alpha,
-                                      double gpu_rCut, double gpu_lambdaCoulomb,
-                                      bool sc_coul, double sc_sigma_6,
-                                      double sc_alpha, uint sc_power,
-                                      double *gpu_sigmaSq);
+                                                 double qi_qj_fact,
+                                                 int gpu_ewald,
+                                                 double gpu_alpha);
+__device__ double CalcCoulombShiftGPU(double distSq, int index,
+                                      double qi_qj_fact, int gpu_ewald,
+                                      double gpu_alpha, double gpu_rCut,
+                                      double gpu_lambdaCoulomb, bool sc_coul,
+                                      double sc_sigma_6, double sc_alpha,
+                                      uint sc_power, double *gpu_sigmaSq);
 __device__ double CalcCoulombShiftGPUNoLambda(double distSq, double qi_qj_fact,
-    int gpu_ewald, double gpu_alpha,
-    double gpu_rCut);
-__device__ double CalcCoulombExp6GPU(double distSq, int index, double qi_qj_fact,
-                                     int gpu_ewald, double gpu_alpha,
-                                     double gpu_lambdaCoulomb, bool sc_coul,
-                                     double sc_sigma_6, double sc_alpha,
-                                     uint sc_power, double *gpu_sigmaSq);
+                                              int gpu_ewald, double gpu_alpha,
+                                              double gpu_rCut);
+__device__ double CalcCoulombExp6GPU(double distSq, int index,
+                                     double qi_qj_fact, int gpu_ewald,
+                                     double gpu_alpha, double gpu_lambdaCoulomb,
+                                     bool sc_coul, double sc_sigma_6,
+                                     double sc_alpha, uint sc_power,
+                                     double *gpu_sigmaSq);
 __device__ double CalcCoulombExp6GPUNoLambda(double distSq, double qi_qj_fact,
-    int gpu_ewald, double gpu_alpha);
-__device__ double CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact,
-    int gpu_ewald, double gpu_alpha,
-    double gpu_rCut,
-    double gpu_diElectric_1,
-    double gpu_lambdaCoulomb,
-    bool sc_coul, double sc_sigma_6,
-    double sc_alpha, uint sc_power,
-    double *gpu_sigmaSq);
-__device__ double CalcCoulombSwitchMartiniGPUNoLambda(double distSq,
-    double qi_qj_fact,
-    int gpu_ewald,
-    double gpu_alpha,
-    double gpu_rCut,
-    double gpu_diElectric_1);
-__device__ double CalcCoulombSwitchGPU(double distSq, int index, double qi_qj_fact,
-                                       double gpu_alpha, int gpu_ewald,
-                                       double gpu_rCut,
+                                             int gpu_ewald, double gpu_alpha);
+__device__ double
+CalcCoulombSwitchMartiniGPU(double distSq, int index, double qi_qj_fact,
+                            int gpu_ewald, double gpu_alpha, double gpu_rCut,
+                            double gpu_diElectric_1, double gpu_lambdaCoulomb,
+                            bool sc_coul, double sc_sigma_6, double sc_alpha,
+                            uint sc_power, double *gpu_sigmaSq);
+__device__ double
+CalcCoulombSwitchMartiniGPUNoLambda(double distSq, double qi_qj_fact,
+                                    int gpu_ewald, double gpu_alpha,
+                                    double gpu_rCut, double gpu_diElectric_1);
+__device__ double CalcCoulombSwitchGPU(double distSq, int index,
+                                       double qi_qj_fact, double gpu_alpha,
+                                       int gpu_ewald, double gpu_rCut,
                                        double gpu_lambdaCoulomb, bool sc_coul,
                                        double sc_sigma_6, double sc_alpha,
                                        uint sc_power, double *gpu_sigmaSq);
 __device__ double CalcCoulombSwitchGPUNoLambda(double distSq, double qi_qj_fact,
-    int gpu_ewald, double gpu_alpha, double gpu_rCut);
-
+                                               int gpu_ewald, double gpu_alpha,
+                                               double gpu_rCut);
 
-//VDW Calculation
+// VDW Calculation
 //*****************************************************************//
 __device__ double CalcEnParticleGPU(double distSq, int index,
                                     double *gpu_sigmaSq, double *gpu_n,
                                     double *gpu_epsilon_Cn,
-                                    double gpu_lambdaVDW,
-                                    double sc_sigma_6,
-                                    double sc_alpha,
-                                    uint sc_power);
+                                    double gpu_lambdaVDW, double sc_sigma_6,
+                                    double sc_alpha, uint sc_power);
 __device__ double CalcEnParticleGPUNoLambda(double distSq, int index,
-    double *gpu_sigmaSq, double *gpu_n,
-    double *gpu_epsilon_Cn);
+                                            double *gpu_sigmaSq, double *gpu_n,
+                                            double *gpu_epsilon_Cn);
 __device__ double CalcEnShiftGPU(double distSq, int index, double *gpu_sigmaSq,
                                  double *gpu_n, double *gpu_epsilon_Cn,
                                  double gpu_rCut, double gpu_lambdaVDW,
                                  double sc_sigma_6, double sc_alpha,
                                  uint sc_power);
 __device__ double CalcEnShiftGPUNoLambda(double distSq, int index,
-    double *gpu_sigmaSq,
-    double *gpu_n, double *gpu_epsilon_Cn,
-    double gpu_rCut);
+                                         double *gpu_sigmaSq, double *gpu_n,
+                                         double *gpu_epsilon_Cn,
+                                         double gpu_rCut);
 __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq,
                                 double *gpu_n, double gpu_lambdaVDW,
                                 double sc_sigma_6, double sc_alpha,
@@ -183,29 +140,24 @@ __device__ double CalcEnExp6GPU(double distSq, int index, double *gpu_sigmaSq,
                                 double *gpu_rMaxSq, double *gpu_expConst);
 __device__ double CalcEnExp6GPUNoLambda(double distSq, int index, double *gpu_n,
                                         double *gpu_rMin, double *gpu_expConst);
-__device__ double CalcEnSwitchMartiniGPU(double distSq, int index,
-    double *gpu_sigmaSq, double *gpu_n,
-    double *gpu_epsilon_Cn,
-    double gpu_rCut, double gpu_rOn,
-    double gpu_lambdaVDW,
-    double sc_sigma_6,
-    double sc_alpha,
-    uint sc_power);
-__device__ double CalcEnSwitchMartiniGPUNoLambda(double distSq, int index,
-    double *gpu_sigmaSq,
-    double *gpu_n,
-    double *gpu_epsilon_Cn,
-    double gpu_rCut,
-    double gpu_rOn);
+__device__ double
+CalcEnSwitchMartiniGPU(double distSq, int index, double *gpu_sigmaSq,
+                       double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut,
+                       double gpu_rOn, double gpu_lambdaVDW, double sc_sigma_6,
+                       double sc_alpha, uint sc_power);
+__device__ double
+CalcEnSwitchMartiniGPUNoLambda(double distSq, int index, double *gpu_sigmaSq,
+                               double *gpu_n, double *gpu_epsilon_Cn,
+                               double gpu_rCut, double gpu_rOn);
 __device__ double CalcEnSwitchGPU(double distSq, int index, double *gpu_sigmaSq,
                                   double *gpu_n, double *gpu_epsilon_Cn,
                                   double gpu_rCut, double gpu_rOn,
                                   double gpu_lambdaVDW, double sc_sigma_6,
                                   double sc_alpha, uint sc_power);
 __device__ double CalcEnSwitchGPUNoLambda(double distSq, int index,
-    double *gpu_sigmaSq, double *gpu_n,
-    double *gpu_epsilon_Cn,
-    double gpu_rCut, double gpu_rOn);
+                                          double *gpu_sigmaSq, double *gpu_n,
+                                          double *gpu_epsilon_Cn,
+                                          double gpu_rCut, double gpu_rOn);
 
 #endif /*GOMC_CUDA*/
 #endif /*CALCULATE_ENERGY_CUDA_KERNEL_H*/
diff --git a/src/GPU/CalculateEwaldCUDAKernel.cu b/src/GPU/CalculateEwaldCUDAKernel.cu
index 164091427..8c92d9f26 100644
--- a/src/GPU/CalculateEwaldCUDAKernel.cu
+++ b/src/GPU/CalculateEwaldCUDAKernel.cu
@@ -558,14 +558,15 @@ __global__ void BoxForceReciprocalGPU(
     double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
     double *gpu_particleCharge, int *gpu_particleMol,
     bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol,
-    int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq, double constValue,
-    int imageSize, double *gpu_kx, double *gpu_ky, double *gpu_kz,
-    double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_prefact,
-    double *gpu_sumRnew, double *gpu_sumInew, bool *gpu_isFraction,
-    int *gpu_molIndex, double *gpu_lambdaCoulomb, double *gpu_cell_x,
-    double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
-    double *gpu_Invcell_y, double *gpu_Invcell_z, int *gpu_nonOrth, double axx,
-    double axy, double axz, int box, int atomCount) {
+    int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq,
+    double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
+    double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
+    double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
+    bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
+    double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
+    double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
+    int *gpu_nonOrth, double axx, double axy, double axz, int box,
+    int atomCount) {
   __shared__ double shared_kvector[IMAGES_PER_BLOCK * 3];
   int particleID = blockDim.x * blockIdx.x + threadIdx.x;
   int offset_vector_index = blockIdx.y * IMAGES_PER_BLOCK;
@@ -631,7 +632,8 @@ __global__ void BoxForceReciprocalGPU(
         double qiqj = gpu_particleCharge[particleID] *
                       gpu_particleCharge[otherParticle] * qqFactGPU;
         intraForce = qiqj * lambdaCoef * lambdaCoef / distSq;
-        intraForce *= ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue);
+        intraForce *=
+            ((erf(gpu_alpha[box] * dist) / dist) - constValue * expConstValue);
         forceX -= intraForce * distVect.x;
         forceY -= intraForce * distVect.y;
         forceZ -= intraForce * distVect.z;
diff --git a/src/GPU/CalculateEwaldCUDAKernel.cuh b/src/GPU/CalculateEwaldCUDAKernel.cuh
index 80062586b..638d5662c 100644
--- a/src/GPU/CalculateEwaldCUDAKernel.cuh
+++ b/src/GPU/CalculateEwaldCUDAKernel.cuh
@@ -2,189 +2,112 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef CALCULATE_EWALD_CUDA_KERNEL_H
 #define CALCULATE_EWALD_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
+#include "VariablesCUDA.cuh"
+#include "XYZArray.h"
 #include <cuda.h>
 #include <cuda_runtime.h>
-#include "VariablesCUDA.cuh"
 #include <vector>
-#include "XYZArray.h"
 
-void CallBoxForceReciprocalGPU(VariablesCUDA *vars,
-                               XYZArray &atomForceRec,
-                               XYZArray &molForceRec,
-                               const std::vector<double> &particleCharge,
-                               const std::vector<int> &particleMol,
-                               const std::vector<bool> &particleHasNoCharge,
-                               const bool *particleUsed,
-                               const std::vector<int> &startMol,
-                               const std::vector<int> &lengthMol,
-                               double constValue,
-                               uint imageSize,
-                               XYZArray const &molCoords,
-                               BoxDimensions const &boxAxes,
-                               int box);
+void CallBoxForceReciprocalGPU(
+    VariablesCUDA *vars, XYZArray &atomForceRec, XYZArray &molForceRec,
+    const std::vector<double> &particleCharge,
+    const std::vector<int> &particleMol,
+    const std::vector<bool> &particleHasNoCharge, const bool *particleUsed,
+    const std::vector<int> &startMol, const std::vector<int> &lengthMol,
+    double constValue, uint imageSize, XYZArray const &molCoords,
+    BoxDimensions const &boxAxes, int box);
 
-void CallBoxReciprocalSetupGPU(VariablesCUDA *vars,
-                               XYZArray const &coords,
-                               double const *kx,
-                               double const *ky,
+void CallBoxReciprocalSetupGPU(VariablesCUDA *vars, XYZArray const &coords,
+                               double const *kx, double const *ky,
                                double const *kz,
                                const std::vector<double> &particleCharge,
-                               uint imageSize,
-                               double *sumRnew,
-                               double *sumInew,
-                               double *prefact,
-                               double *hsqr,
-                               double &energyRecip,
-                               uint box);
+                               uint imageSize, double *sumRnew, double *sumInew,
+                               double *prefact, double *hsqr,
+                               double &energyRecip, uint box);
 
-void CallBoxReciprocalSumsGPU(VariablesCUDA *vars,
-                              XYZArray const &coords,
+void CallBoxReciprocalSumsGPU(VariablesCUDA *vars, XYZArray const &coords,
                               const std::vector<double> &particleCharge,
-                              uint imageSize,
-                              double *sumRnew,
-                              double *sumInew,
-                              double &energyRecip,
-                              uint box);
+                              uint imageSize, double *sumRnew, double *sumInew,
+                              double &energyRecip, uint box);
 
-void CallMolReciprocalGPU(VariablesCUDA *vars,
-                          XYZArray const &currentCoords,
+void CallMolReciprocalGPU(VariablesCUDA *vars, XYZArray const &currentCoords,
                           XYZArray const &newCoords,
                           const std::vector<double> &particleCharge,
-                          uint imageSize,
-                          double *sumRnew,
-                          double *sumInew,
-                          double &energyRecipNew,
-                          uint box);
+                          uint imageSize, double *sumRnew, double *sumInew,
+                          double &energyRecipNew, uint box);
 
-//Calculate reciprocal term for lambdaNew and Old with same coordinates
+// Calculate reciprocal term for lambdaNew and Old with same coordinates
 void CallChangeLambdaMolReciprocalGPU(VariablesCUDA *vars,
                                       XYZArray const &coords,
                                       const std::vector<double> &particleCharge,
-                                      uint imageSize,
-                                      double *sumRnew,
-                                      double *sumInew,
-                                      double &energyRecipNew,
-                                      const double lambdaCoef,
-                                      uint box);
+                                      uint imageSize, double *sumRnew,
+                                      double *sumInew, double &energyRecipNew,
+                                      const double lambdaCoef, uint box);
 
-void CallSwapReciprocalGPU(VariablesCUDA *vars,
-                           XYZArray const &coords,
+void CallSwapReciprocalGPU(VariablesCUDA *vars, XYZArray const &coords,
                            const std::vector<double> &particleCharge,
-                           uint imageSize,
-                           double *sumRnew,
-                           double *sumInew,
-                           const bool insert,
-                           double &energyRecipNew,
-                           uint box);
+                           uint imageSize, double *sumRnew, double *sumInew,
+                           const bool insert, double &energyRecipNew, uint box);
 
-void CallMolExchangeReciprocalGPU(VariablesCUDA *vars,
-                                  uint imageSize,
-                                  double *sumRnew,
-                                  double *sumInew,
-                                  uint box);
+void CallMolExchangeReciprocalGPU(VariablesCUDA *vars, uint imageSize,
+                                  double *sumRnew, double *sumInew, uint box);
 
-__global__ void BoxForceReciprocalGPU(double *gpu_aForceRecx,
-                                      double *gpu_aForceRecy,
-                                      double *gpu_aForceRecz,
-                                      double *gpu_mForceRecx,
-                                      double *gpu_mForceRecy,
-                                      double *gpu_mForceRecz,
-                                      double *gpu_particleCharge,
-                                      int *gpu_particleMol,
-                                      bool *gpu_particleHasNoCharge,
-                                       bool *gpu_particleUsed,
-                                     int *gpu_startMol,
-                                      int *gpu_lengthMol,
-                                      double *gpu_alpha,
-                                      double *gpu_alphaSq,
-                                      double constValue,
-                                      int imageSize,
-                                      double *gpu_kx,
-                                      double *gpu_ky,
-                                      double *gpu_kz,
-                                      double *gpu_x,
-                                      double *gpu_y,
-                                      double *gpu_z,
-                                      double *gpu_prefact,
-                                      double *gpu_sumRnew,
-                                      double *gpu_sumInew,
-                                      bool *gpu_isFraction,
-                                      int *gpu_molIndex,
-                                      double *gpu_lambdaCoulomb,
-                                      double *gpu_cell_x,
-                                      double *gpu_cell_y,
-                                      double *gpu_cell_z,
-                                      double *gpu_Invcell_x,
-                                      double *gpu_Invcell_y,
-                                      double *gpu_Invcell_z,
-                                      int *gpu_nonOrth,
-                                      double axx,
-                                      double axy,
-                                      double axz,
-                                      int box,
-                                      int atomCount);
+__global__ void BoxForceReciprocalGPU(
+    double *gpu_aForceRecx, double *gpu_aForceRecy, double *gpu_aForceRecz,
+    double *gpu_mForceRecx, double *gpu_mForceRecy, double *gpu_mForceRecz,
+    double *gpu_particleCharge, int *gpu_particleMol,
+    bool *gpu_particleHasNoCharge, bool *gpu_particleUsed, int *gpu_startMol,
+    int *gpu_lengthMol, double *gpu_alpha, double *gpu_alphaSq,
+    double constValue, int imageSize, double *gpu_kx, double *gpu_ky,
+    double *gpu_kz, double *gpu_x, double *gpu_y, double *gpu_z,
+    double *gpu_prefact, double *gpu_sumRnew, double *gpu_sumInew,
+    bool *gpu_isFraction, int *gpu_molIndex, double *gpu_lambdaCoulomb,
+    double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
+    double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
+    int *gpu_nonOrth, double axx, double axy, double axz, int box,
+    int atomCount);
 
-__global__ void BoxReciprocalSumsGPU(double * gpu_x,
-                                     double * gpu_y,
-                                     double * gpu_z,
-                                     double * gpu_kx,
-                                     double * gpu_ky,
-                                     double * gpu_kz,
-                                     int atomNumber,
-                                     double * gpu_particleCharge,
-                                     double * gpu_sumRnew,
-                                     double * gpu_sumInew,
+__global__ void BoxReciprocalSumsGPU(double *gpu_x, double *gpu_y,
+                                     double *gpu_z, double *gpu_kx,
+                                     double *gpu_ky, double *gpu_kz,
+                                     int atomNumber, double *gpu_particleCharge,
+                                     double *gpu_sumRnew, double *gpu_sumInew,
                                      int imageSize);
 
 __global__ void MolReciprocalGPU(double *gpu_cx, double *gpu_cy, double *gpu_cz,
                                  double *gpu_nx, double *gpu_ny, double *gpu_nz,
                                  double *gpu_kx, double *gpu_ky, double *gpu_kz,
-                                 int atomNumber,
-                                 double *gpu_particleCharge,
-                                 double *gpu_sumRnew,
-                                 double *gpu_sumInew,
-                                 double *gpu_sumRref,
-                                 double *gpu_sumIref,
+                                 int atomNumber, double *gpu_particleCharge,
+                                 double *gpu_sumRnew, double *gpu_sumInew,
+                                 double *gpu_sumRref, double *gpu_sumIref,
                                  double *gpu_prefactRef,
-                                 double *gpu_energyRecipNew,
-                                 int imageSize);
+                                 double *gpu_energyRecipNew, int imageSize);
 
-__global__ void ChangeLambdaMolReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z,
-                                            double *gpu_kx, double *gpu_ky, double *gpu_kz,
-                                            int atomNumber,
-                                            double *gpu_particleCharge,
-                                            double *gpu_sumRnew,
-                                            double *gpu_sumInew,
-                                            double *gpu_sumRref,
-                                            double *gpu_sumIref,
-                                            double *gpu_prefactRef,
-                                            double *gpu_energyRecipNew,
-                                            double lambdaCoef,
-                                            int imageSize);
+__global__ void ChangeLambdaMolReciprocalGPU(
+    double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_kx, double *gpu_ky,
+    double *gpu_kz, int atomNumber, double *gpu_particleCharge,
+    double *gpu_sumRnew, double *gpu_sumInew, double *gpu_sumRref,
+    double *gpu_sumIref, double *gpu_prefactRef, double *gpu_energyRecipNew,
+    double lambdaCoef, int imageSize);
 
 __global__ void SwapReciprocalGPU(double *gpu_x, double *gpu_y, double *gpu_z,
-                                  double *gpu_kx, double *gpu_ky, double *gpu_kz,
-                                  int atomNumber,
+                                  double *gpu_kx, double *gpu_ky,
+                                  double *gpu_kz, int atomNumber,
                                   double *gpu_particleCharge,
-                                  double *gpu_sumRnew,
-                                  double *gpu_sumInew,
-                                  double *gpu_sumRref,
-                                  double *gpu_sumIref,
-                                  double *gpu_prefactRef,
-                                  const bool insert,
-                                  double *gpu_energyRecipNew,
-                                  int imageSize);
+                                  double *gpu_sumRnew, double *gpu_sumInew,
+                                  double *gpu_sumRref, double *gpu_sumIref,
+                                  double *gpu_prefactRef, const bool insert,
+                                  double *gpu_energyRecipNew, int imageSize);
 
-__global__ void BoxReciprocalGPU(double *gpu_prefact,
-                                 double *gpu_sumRnew,
-                                 double *gpu_sumInew,
-                                 double *gpu_energyRecip,
+__global__ void BoxReciprocalGPU(double *gpu_prefact, double *gpu_sumRnew,
+                                 double *gpu_sumInew, double *gpu_energyRecip,
                                  int imageSize);
 
 #endif /*GOMC_CUDA*/
diff --git a/src/GPU/CalculateForceCUDAKernel.cu b/src/GPU/CalculateForceCUDAKernel.cu
index 219a6cafb..79360471a 100644
--- a/src/GPU/CalculateForceCUDAKernel.cu
+++ b/src/GPU/CalculateForceCUDAKernel.cu
@@ -125,13 +125,14 @@ void CallBoxInterForceGPU(
       vars->gpu_vT13, vars->gpu_vT22, vars->gpu_vT23, vars->gpu_vT33,
       vars->gpu_sigmaSq, vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind,
       vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
-      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq,
-      vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_cell_x[box],
-      vars->gpu_cell_y[box], vars->gpu_cell_z[box], vars->gpu_Invcell_x[box],
-      vars->gpu_Invcell_y[box], vars->gpu_Invcell_z[box], vars->gpu_nonOrth,
-      sc_coul, sc_sigma_6, sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq,
-      vars->gpu_expConst, vars->gpu_molIndex, vars->gpu_lambdaVDW,
-      vars->gpu_lambdaCoulomb, vars->gpu_isFraction, box);
+      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
+      vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1,
+      vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
+      vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
+      vars->gpu_Invcell_z[box], vars->gpu_nonOrth, sc_coul, sc_sigma_6,
+      sc_alpha, sc_power, vars->gpu_rMin, vars->gpu_rMaxSq, vars->gpu_expConst,
+      vars->gpu_molIndex, vars->gpu_lambdaVDW, vars->gpu_lambdaCoulomb,
+      vars->gpu_isFraction, box);
   checkLastErrorCUDA(__FILE__, __LINE__);
   cudaDeviceSynchronize();
   // ReduceSum // Virial of LJ
@@ -302,10 +303,10 @@ void CallBoxForceGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
       gpu_particleKind, gpu_particleMol, gpu_REn, gpu_LJEn, vars->gpu_sigmaSq,
       vars->gpu_epsilon_Cn, vars->gpu_n, vars->gpu_VDW_Kind,
       vars->gpu_isMartini, vars->gpu_count, vars->gpu_rCut,
-      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha, vars->gpu_alphaSq,
-      vars->gpu_ewald, vars->gpu_diElectric_1, vars->gpu_nonOrth,
-      vars->gpu_cell_x[box], vars->gpu_cell_y[box], vars->gpu_cell_z[box],
-      vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
+      vars->gpu_rCutCoulomb, vars->gpu_rCutLow, vars->gpu_rOn, vars->gpu_alpha,
+      vars->gpu_alphaSq, vars->gpu_ewald, vars->gpu_diElectric_1,
+      vars->gpu_nonOrth, vars->gpu_cell_x[box], vars->gpu_cell_y[box],
+      vars->gpu_cell_z[box], vars->gpu_Invcell_x[box], vars->gpu_Invcell_y[box],
       vars->gpu_Invcell_z[box], vars->gpu_aForcex, vars->gpu_aForcey,
       vars->gpu_aForcez, vars->gpu_mForcex, vars->gpu_mForcey,
       vars->gpu_mForcez, sc_coul, sc_sigma_6, sc_alpha, sc_power,
@@ -577,9 +578,9 @@ __global__ void BoxInterForceGPU(
                 mA, mB, box, gpu_isFraction, gpu_molIndex, gpu_lambdaCoulomb);
             double pRF = CalcCoulombForceGPU(
                 distSq, qi_qj, gpu_VDW_Kind[0], gpu_ewald[0], gpu_isMartini[0],
-                gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box], gpu_diElectric_1[0],
-                gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha, sc_power,
-                lambdaCoulomb, gpu_count[0], kA, kB);
+                gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box],
+                gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha,
+                sc_power, lambdaCoulomb, gpu_count[0], kA, kB);
 
             gpu_rT11[threadID] += pRF * (virComponents.x * diff_com.x);
             gpu_rT22[threadID] += pRF * (virComponents.y * diff_com.y);
@@ -599,25 +600,24 @@ __global__ void BoxInterForceGPU(
   }
 }
 
-__global__ void
-BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
-            int numberOfCells, int atomNumber, int *gpu_mapParticleToCell,
-            double *gpu_x, double *gpu_y, double *gpu_z, double3 axis,
-            double3 halfAx, bool electrostatic, double *gpu_particleCharge,
-            int *gpu_particleKind, int *gpu_particleMol, double *gpu_REn,
-            double *gpu_LJEn, double *gpu_sigmaSq, double *gpu_epsilon_Cn,
-            double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini,
-            int *gpu_count, double *gpu_rCut, double *gpu_rCutCoulomb,
-            double *gpu_rCutLow, double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq,
-            int *gpu_ewald, double *gpu_diElectric_1, int *gpu_nonOrth,
-            double *gpu_cell_x, double *gpu_cell_y, double *gpu_cell_z,
-            double *gpu_Invcell_x, double *gpu_Invcell_y, double *gpu_Invcell_z,
-            double *gpu_aForcex, double *gpu_aForcey, double *gpu_aForcez,
-            double *gpu_mForcex, double *gpu_mForcey, double *gpu_mForcez,
-            bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power,
-            double *gpu_rMin, double *gpu_rMaxSq, double *gpu_expConst,
-            int *gpu_molIndex, double *gpu_lambdaVDW, double *gpu_lambdaCoulomb,
-            bool *gpu_isFraction, int box) {
+__global__ void BoxForceGPU(
+    int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
+    int numberOfCells, int atomNumber, int *gpu_mapParticleToCell,
+    double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx,
+    bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind,
+    int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn,
+    double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n,
+    int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut,
+    double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn,
+    double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald,
+    double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x,
+    double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
+    double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex,
+    double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex,
+    double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6,
+    double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq,
+    double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW,
+    double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box) {
   __shared__ double shr_cutoff;
   __shared__ int shr_particlesInsideCurrentCell, shr_numberOfPairs;
   __shared__ int shr_currentCellStartIndex, shr_neighborCellStartIndex;
@@ -708,9 +708,10 @@ BoxForceGPU(int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
 
             forces += CalcCoulombForceGPU(
                 distSq, qi_qj_fact, gpu_VDW_Kind[0], gpu_ewald[0],
-                gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box], gpu_rCutCoulomb[box],
-                gpu_diElectric_1[0], gpu_sigmaSq, sc_coul, sc_sigma_6, sc_alpha,
-                sc_power, lambdaCoulomb, gpu_count[0], kA, kB);
+                gpu_isMartini[0], gpu_alpha[box], gpu_alphaSq[box],
+                gpu_rCutCoulomb[box], gpu_diElectric_1[0], gpu_sigmaSq, sc_coul,
+                sc_sigma_6, sc_alpha, sc_power, lambdaCoulomb, gpu_count[0], kA,
+                kB);
           }
         }
 
@@ -868,12 +869,14 @@ CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq,
 //**************************************************************//
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
                                             int gpu_ewald, double gpu_alpha,
-                                            double gpu_alphaSq, int index, double gpu_sigmaSq,
-                                            bool sc_coul, double sc_sigma_6,
-                                            double sc_alpha, uint sc_power,
+                                            double gpu_alphaSq, int index,
+                                            double gpu_sigmaSq, bool sc_coul,
+                                            double sc_sigma_6, double sc_alpha,
+                                            uint sc_power,
                                             double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                     gpu_alphaSq);
   }
 
   if (sc_coul) {
@@ -886,15 +889,18 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+           CalcCoulombVirParticleGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
+                                     gpu_alphaSq);
   } else {
-    return gpu_lambdaCoulomb *
-           CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return gpu_lambdaCoulomb * CalcCoulombVirParticleGPU(distSq, qi_qj,
+                                                         gpu_ewald, gpu_alpha,
+                                                         gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-                                            int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
+                                            int gpu_ewald, double gpu_alpha,
+                                            double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
@@ -909,13 +915,15 @@ __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-                                         int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-                                         int index, double gpu_sigmaSq,
-                                         bool sc_coul, double sc_sigma_6,
-                                         double sc_alpha, uint sc_power,
+                                         int gpu_ewald, double gpu_alpha,
+                                         double gpu_alphaSq, int index,
+                                         double gpu_sigmaSq, bool sc_coul,
+                                         double sc_sigma_6, double sc_alpha,
+                                         uint sc_power,
                                          double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                  gpu_alphaSq);
   }
 
   if (sc_coul) {
@@ -928,15 +936,17 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+           CalcCoulombVirShiftGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
+                                  gpu_alphaSq);
   } else {
-    return gpu_lambdaCoulomb *
-           CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return gpu_lambdaCoulomb * CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald,
+                                                      gpu_alpha, gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-                                         int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
+                                         int gpu_ewald, double gpu_alpha,
+                                         double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
@@ -950,13 +960,15 @@ __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-                                        int index, double gpu_sigmaSq,
-                                        bool sc_coul, double sc_sigma_6,
-                                        double sc_alpha, uint sc_power,
+                                        int gpu_ewald, double gpu_alpha,
+                                        double gpu_alphaSq, int index,
+                                        double gpu_sigmaSq, bool sc_coul,
+                                        double sc_sigma_6, double sc_alpha,
+                                        uint sc_power,
                                         double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                 gpu_alphaSq);
   }
   if (sc_coul) {
     double sigma6 = gpu_sigmaSq * gpu_sigmaSq * gpu_sigmaSq;
@@ -968,15 +980,17 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+           CalcCoulombVirExp6GPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
+                                 gpu_alphaSq);
   } else {
-    return gpu_lambdaCoulomb *
-           CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq);
+    return gpu_lambdaCoulomb * CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald,
+                                                     gpu_alpha, gpu_alphaSq);
   }
 }
 
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq) {
+                                        int gpu_ewald, double gpu_alpha,
+                                        double gpu_alphaSq) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
@@ -990,13 +1004,14 @@ __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirSwitchMartiniGPU(
-    double distSq, double qi_qj, int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCut, double gpu_diElectric_1, int index, double gpu_sigmaSq,
-    bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power,
-    double gpu_lambdaCoulomb) {
+    double distSq, double qi_qj, int gpu_ewald, double gpu_alpha,
+    double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index,
+    double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha,
+    uint sc_power, double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
-    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
-                                          gpu_rCut, gpu_diElectric_1);
+    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                          gpu_alphaSq, gpu_rCut,
+                                          gpu_diElectric_1);
   }
 
   if (sc_coul) {
@@ -1009,21 +1024,20 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(
     double softRsq = cbrt(softDist6);
     double correction = distSq / softRsq;
     return gpu_lambdaCoulomb * correction * correction *
-           CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
-                                          gpu_rCut, gpu_diElectric_1);
+           CalcCoulombVirSwitchMartiniGPU(softRsq, qi_qj, gpu_ewald, gpu_alpha,
+                                          gpu_alphaSq, gpu_rCut,
+                                          gpu_diElectric_1);
   } else {
-    return gpu_lambdaCoulomb *
-           CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
-                                          gpu_rCut, gpu_diElectric_1);
+    return gpu_lambdaCoulomb * CalcCoulombVirSwitchMartiniGPU(
+                                   distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                   gpu_alphaSq, gpu_rCut, gpu_diElectric_1);
   }
 }
 
-__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
-                                                 int gpu_ewald,
-                                                 double gpu_alpha,
-												 double gpu_alphaSq,
-                                                 double gpu_rCut,
-                                                 double gpu_diElectric_1) {
+__device__ double
+CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald,
+                               double gpu_alpha, double gpu_alphaSq,
+                               double gpu_rCut, double gpu_diElectric_1) {
   double dist = sqrt(distSq);
   if (gpu_ewald) {
     // M_2_SQRTPI is 2/sqrt(PI)
@@ -1054,11 +1068,11 @@ __device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
 }
 
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-                                          int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-                                          double gpu_rCut, int index,
-                                          double gpu_sigmaSq, bool sc_coul,
-                                          double sc_sigma_6, double sc_alpha,
-                                          uint sc_power,
+                                          int gpu_ewald, double gpu_alpha,
+                                          double gpu_alphaSq, double gpu_rCut,
+                                          int index, double gpu_sigmaSq,
+                                          bool sc_coul, double sc_sigma_6,
+                                          double sc_alpha, uint sc_power,
                                           double gpu_lambdaCoulomb) {
   if (gpu_lambdaCoulomb >= 0.999999) {
     return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
@@ -1079,7 +1093,8 @@ __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
                                    gpu_alphaSq, gpu_rCut);
   } else {
     return gpu_lambdaCoulomb * CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald,
-                                                       gpu_alpha, gpu_alphaSq, gpu_rCut);
+                                                       gpu_alpha, gpu_alphaSq,
+                                                       gpu_rCut);
   }
 }
 
diff --git a/src/GPU/CalculateForceCUDAKernel.cuh b/src/GPU/CalculateForceCUDAKernel.cuh
index 072939c24..87aaca0c8 100644
--- a/src/GPU/CalculateForceCUDAKernel.cuh
+++ b/src/GPU/CalculateForceCUDAKernel.cuh
@@ -2,407 +2,244 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef CALCULATE_FORCE_CUDA_KERNEL_H
 #define CALCULATE_FORCE_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
-#include <vector>
-#include "XYZArray.h"
 #include "BoxDimensions.h"
-#include "VariablesCUDA.cuh"
-#include "ConstantDefinitionsCUDAKernel.cuh"
 #include "CalculateMinImageCUDAKernel.cuh"
+#include "ConstantDefinitionsCUDAKernel.cuh"
+#include "VariablesCUDA.cuh"
+#include "XYZArray.h"
+#include <vector>
 
-void CallBoxForceGPU(VariablesCUDA *vars,
-                     const std::vector<int> &cellVector,
+void CallBoxForceGPU(VariablesCUDA *vars, const std::vector<int> &cellVector,
                      const std::vector<int> &cellStartIndex,
-                     const std::vector <std::vector<int> > &neighborList,
+                     const std::vector<std::vector<int>> &neighborList,
                      const std::vector<int> &mapParticleToCell,
-                     XYZArray const &coords,
-                     BoxDimensions const &boxAxes,
+                     XYZArray const &coords, BoxDimensions const &boxAxes,
                      bool electrostatic,
                      const std::vector<double> &particleCharge,
                      const std::vector<int> &particleKind,
-                     const std::vector<int> &particleMol,
-                     double &REn,
-                     double &LJEn,
-                     double *aForcex,
-                     double *aForcey,
-                     double *aForcez,
-                     double *mForcex,
-                     double *mForcey,
-                     double *mForcez,
-                     int atomCount,
-                     int molCount,
-                     bool sc_coul,
-                     double sc_sigma_6,
-                     double sc_alpha,
-                     uint sc_power,
+                     const std::vector<int> &particleMol, double &REn,
+                     double &LJEn, double *aForcex, double *aForcey,
+                     double *aForcez, double *mForcex, double *mForcey,
+                     double *mForcez, int atomCount, int molCount, bool sc_coul,
+                     double sc_sigma_6, double sc_alpha, uint sc_power,
                      uint const box);
 
-void CallBoxInterForceGPU(VariablesCUDA *vars,
-                          const std::vector<int> &cellVector,
-                          const std::vector<int> &cellStartIndex,
-                          const std::vector<std::vector<int> > &neighborList,
-                          const std::vector<int> &mapParticleToCell,
-                          XYZArray const &currentCoords,
-                          XYZArray const &currentCOM,
-                          BoxDimensions const& boxAxes,
-                          bool electrostatic,
-                          const std::vector<double> &particleCharge,
-                          const std::vector<int> &particleKind,
-                          const std::vector<int> &particleMol,
-                          double &rT11,
-                          double &rT12,
-                          double &rT13,
-                          double &rT22,
-                          double &rT23,
-                          double &rT33,
-                          double &vT11,
-                          double &vT12,
-                          double &vT13,
-                          double &vT22,
-                          double &vT23,
-                          double &vT33,
-                          bool sc_coul,
-                          double sc_sigma_6,
-                          double sc_alpha,
-                          uint sc_power,
-                          uint const box);
+void CallBoxInterForceGPU(
+    VariablesCUDA *vars, const std::vector<int> &cellVector,
+    const std::vector<int> &cellStartIndex,
+    const std::vector<std::vector<int>> &neighborList,
+    const std::vector<int> &mapParticleToCell, XYZArray const &currentCoords,
+    XYZArray const &currentCOM, BoxDimensions const &boxAxes,
+    bool electrostatic, const std::vector<double> &particleCharge,
+    const std::vector<int> &particleKind, const std::vector<int> &particleMol,
+    double &rT11, double &rT12, double &rT13, double &rT22, double &rT23,
+    double &rT33, double &vT11, double &vT12, double &vT13, double &vT22,
+    double &vT23, double &vT33, bool sc_coul, double sc_sigma_6,
+    double sc_alpha, uint sc_power, uint const box);
 
-void CallVirialReciprocalGPU(VariablesCUDA *vars,
-                             XYZArray const &currentCoords,
+void CallVirialReciprocalGPU(VariablesCUDA *vars, XYZArray const &currentCoords,
                              XYZArray const &currentCOMDiff,
                              const std::vector<double> &particleCharge,
-                             double &rT11,
-                             double &rT12,
-                             double &rT13,
-                             double &rT22,
-                             double &rT23,
-                             double &rT33,
-                             uint imageSize,
-                             double constVal,
-                             uint box);
+                             double &rT11, double &rT12, double &rT13,
+                             double &rT22, double &rT23, double &rT33,
+                             uint imageSize, double constVal, uint box);
 
-__global__ void BoxForceGPU(int *gpu_cellStartIndex,
-                            int *gpu_cellVector,
-                            int *gpu_neighborList,
-                            int numberOfCells,
-                            int atomNumber,
-                            int *gpu_mapParticleToCell,
-                            double *gpu_x,
-                            double *gpu_y,
-                            double *gpu_z,
-                            double3 axis,
-                            double3 halfAx,
-                            bool electrostatic,
-                            double *gpu_particleCharge,
-                            int *gpu_particleKind,
-                            int *gpu_particleMol,
-                            double *gpu_REn,
-                            double *gpu_LJEn,
-                            double *gpu_sigmaSq,
-                            double *gpu_epsilon_Cn,
-                            double *gpu_n,
-                            int *gpu_VDW_Kind,
-                            int *gpu_isMartini,
-                            int *gpu_count,
-                            double *gpu_rCut,
-                            double *gpu_rCutCoulomb,
-                            double *gpu_rCutLow,
-                            double *gpu_rOn,
-                            double *gpu_alpha,
-                            double *gpu_alphaSq,
-                            int *gpu_ewald,
-                            double *gpu_diElectric_1,
-                            int *gpu_nonOrth,
-                            double *gpu_cell_x,
-                            double *gpu_cell_y,
-                            double *gpu_cell_z,
-                            double *gpu_Invcell_x,
-                            double *gpu_Invcell_y,
-                            double *gpu_Invcell_z,
-                            double *gpu_aForcex,
-                            double *gpu_aForcey,
-                            double *gpu_aForcez,
-                            double *gpu_mForcex,
-                            double *gpu_mForcey,
-                            double *gpu_mForcez,
-                            bool sc_coul,
-                            double sc_sigma_6,
-                            double sc_alpha,
-                            uint sc_power,
-                            double *gpu_rMin,
-                            double *gpu_rMaxSq,
-                            double *gpu_expConst,
-                            int *gpu_molIndex,
-                            double *gpu_lambdaVDW,
-                            double *gpu_lambdaCoulomb,
-                            bool *gpu_isFraction,
-                            int box);
+__global__ void BoxForceGPU(
+    int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
+    int numberOfCells, int atomNumber, int *gpu_mapParticleToCell,
+    double *gpu_x, double *gpu_y, double *gpu_z, double3 axis, double3 halfAx,
+    bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind,
+    int *gpu_particleMol, double *gpu_REn, double *gpu_LJEn,
+    double *gpu_sigmaSq, double *gpu_epsilon_Cn, double *gpu_n,
+    int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count, double *gpu_rCut,
+    double *gpu_rCutCoulomb, double *gpu_rCutLow, double *gpu_rOn,
+    double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald,
+    double *gpu_diElectric_1, int *gpu_nonOrth, double *gpu_cell_x,
+    double *gpu_cell_y, double *gpu_cell_z, double *gpu_Invcell_x,
+    double *gpu_Invcell_y, double *gpu_Invcell_z, double *gpu_aForcex,
+    double *gpu_aForcey, double *gpu_aForcez, double *gpu_mForcex,
+    double *gpu_mForcey, double *gpu_mForcez, bool sc_coul, double sc_sigma_6,
+    double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq,
+    double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW,
+    double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box);
 
-__global__ void BoxInterForceGPU(int *gpu_cellStartIndex,
-                                 int *gpu_cellVector,
-                                 int *gpu_neighborList,
-                                 int numberOfCells,
-                                 int atomNumber,
-                                 int *gpu_mapParticleToCell,
-                                 double *gpu_x,
-                                 double *gpu_y,
-                                 double *gpu_z,
-                                 double *gpu_comx,
-                                 double *gpu_comy,
-                                 double *gpu_comz,
-                                 double3 axis,
-                                 double3 halfAx,
-                                 bool electrostatic,
-                                 double *gpu_particleCharge,
-                                 int *gpu_particleKind,
-                                 int *gpu_particleMol,
-                                 double *gpu_rT11,
-                                 double *gpu_rT12,
-                                 double *gpu_rT13,
-                                 double *gpu_rT22,
-                                 double *gpu_rT23,
-                                 double *gpu_rT33,
-                                 double *gpu_vT11,
-                                 double *gpu_vT12,
-                                 double *gpu_vT13,
-                                 double *gpu_vT22,
-                                 double *gpu_vT23,
-                                 double *gpu_vT33,
-                                 double *gpu_sigmaSq,
-                                 double *gpu_epsilon_Cn,
-                                 double *gpu_n,
-                                 int *gpu_VDW_Kind,
-                                 int *gpu_isMartini,
-                                 int *gpu_count,
-                                 double *gpu_rCut,
-                                 double *gpu_rCutCoulomb,
-                                 double *gpu_rCutLow,
-                                 double *gpu_rOn,
-                                 double *gpu_alpha,
-                                 double *gpu_alphaSq,
-                                 int *gpu_ewald,
-                                 double *gpu_diElectric_1,
-                                 double *gpu_cell_x,
-                                 double *gpu_cell_y,
-                                 double *gpu_cell_z,
-                                 double *gpu_Invcell_x,
-                                 double *gpu_Invcell_y,
-                                 double *gpu_Invcell_z,
-                                 int *gpu_nonOrth,
-                                 bool sc_coul,
-                                 double sc_sigma_6,
-                                 double sc_alpha,
-                                 uint sc_power,
-                                 double *gpu_rMin,
-                                 double *gpu_rMaxSq,
-                                 double *gpu_expConst,
-                                 int *gpu_molIndex,
-                                 double *gpu_lambdaVDW,
-                                 double *gpu_lambdaCoulomb,
-                                 bool *gpu_isFraction,
-                                 int box);
+__global__ void BoxInterForceGPU(
+    int *gpu_cellStartIndex, int *gpu_cellVector, int *gpu_neighborList,
+    int numberOfCells, int atomNumber, int *gpu_mapParticleToCell,
+    double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comx,
+    double *gpu_comy, double *gpu_comz, double3 axis, double3 halfAx,
+    bool electrostatic, double *gpu_particleCharge, int *gpu_particleKind,
+    int *gpu_particleMol, double *gpu_rT11, double *gpu_rT12, double *gpu_rT13,
+    double *gpu_rT22, double *gpu_rT23, double *gpu_rT33, double *gpu_vT11,
+    double *gpu_vT12, double *gpu_vT13, double *gpu_vT22, double *gpu_vT23,
+    double *gpu_vT33, double *gpu_sigmaSq, double *gpu_epsilon_Cn,
+    double *gpu_n, int *gpu_VDW_Kind, int *gpu_isMartini, int *gpu_count,
+    double *gpu_rCut, double *gpu_rCutCoulomb, double *gpu_rCutLow,
+    double *gpu_rOn, double *gpu_alpha, double *gpu_alphaSq, int *gpu_ewald,
+    double *gpu_diElectric_1, double *gpu_cell_x, double *gpu_cell_y,
+    double *gpu_cell_z, double *gpu_Invcell_x, double *gpu_Invcell_y,
+    double *gpu_Invcell_z, int *gpu_nonOrth, bool sc_coul, double sc_sigma_6,
+    double sc_alpha, uint sc_power, double *gpu_rMin, double *gpu_rMaxSq,
+    double *gpu_expConst, int *gpu_molIndex, double *gpu_lambdaVDW,
+    double *gpu_lambdaCoulomb, bool *gpu_isFraction, int box);
 
-__global__ void VirialReciprocalGPU(double *gpu_x,
-                                    double *gpu_y,
-                                    double *gpu_z,
-                                    double *gpu_comDx,
-                                    double *gpu_comDy,
-                                    double *gpu_comDz,
-                                    double *gpu_kxRef,
-                                    double *gpu_kyRef,
-                                    double *gpu_kzRef,
-                                    double *gpu_prefactRef,
-                                    double *gpu_hsqrRef,
-                                    double *gpu_sumRref,
-                                    double *gpu_sumIref,
-                                    double *gpu_particleCharge,
-                                    double *gpu_rT11,
-                                    double *gpu_rT12,
-                                    double *gpu_rT13,
-                                    double *gpu_rT22,
-                                    double *gpu_rT23,
-                                    double *gpu_rT33,
-                                    double constVal,
-                                    uint imageSize,
-                                    uint atomNumber);
+__global__ void VirialReciprocalGPU(
+    double *gpu_x, double *gpu_y, double *gpu_z, double *gpu_comDx,
+    double *gpu_comDy, double *gpu_comDz, double *gpu_kxRef, double *gpu_kyRef,
+    double *gpu_kzRef, double *gpu_prefactRef, double *gpu_hsqrRef,
+    double *gpu_sumRref, double *gpu_sumIref, double *gpu_particleCharge,
+    double *gpu_rT11, double *gpu_rT12, double *gpu_rT13, double *gpu_rT22,
+    double *gpu_rT23, double *gpu_rT33, double constVal, uint imageSize,
+    uint atomNumber);
 
-__device__ double CalcEnForceGPU(double distSq, int kind1, int kind2,
-                                 double *gpu_sigmaSq,
-                                 double *gpu_n,
-                                 double *gpu_epsilon_Cn,
-                                 double gpu_rCut,
-                                 double gpu_rOn,
-                                 int gpu_isMartini,
-                                 int gpu_VDW_Kind,
-                                 int gpu_count,
-                                 double gpu_lambdaVDW,
-                                 double sc_sigma_6,
-                                 double sc_alpha,
-                                 uint sc_power,
-                                 double *gpu_rMin,
-                                 double *gpu_rMaxSq,
-                                 double *gpu_expConst);
+__device__ double
+CalcEnForceGPU(double distSq, int kind1, int kind2, double *gpu_sigmaSq,
+               double *gpu_n, double *gpu_epsilon_Cn, double gpu_rCut,
+               double gpu_rOn, int gpu_isMartini, int gpu_VDW_Kind,
+               int gpu_count, double gpu_lambdaVDW, double sc_sigma_6,
+               double sc_alpha, uint sc_power, double *gpu_rMin,
+               double *gpu_rMaxSq, double *gpu_expConst);
 
-//ElectroStatic Calculation
+// ElectroStatic Calculation
 //**************************************************************//
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-    int index, double gpu_sigmaSq,
-    bool sc_coul, double sc_sigma_6,
-    double sc_alpha, uint sc_power,
-    double gpu_lambdaCoulomb);
+                                            int gpu_ewald, double gpu_alpha,
+                                            double gpu_alphaSq, int index,
+                                            double gpu_sigmaSq, bool sc_coul,
+                                            double sc_sigma_6, double sc_alpha,
+                                            uint sc_power,
+                                            double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirParticleGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
+                                            int gpu_ewald, double gpu_alpha,
+                                            double gpu_alphaSq);
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-    int index, double gpu_sigmaSq,
-    bool sc_coul, double sc_sigma_6,
-    double sc_alpha, uint sc_power,
-    double gpu_lambdaCoulomb);
+                                         int gpu_ewald, double gpu_alpha,
+                                         double gpu_alphaSq, int index,
+                                         double gpu_sigmaSq, bool sc_coul,
+                                         double sc_sigma_6, double sc_alpha,
+                                         uint sc_power,
+                                         double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirShiftGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
-__device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-                                        int index, double gpu_sigmaSq,
-                                        bool sc_coul, double sc_sigma_6,
-                                        double sc_alpha, uint sc_power,
-                                        double gpu_lambdaCoulomb);
+                                         int gpu_ewald, double gpu_alpha,
+                                         double gpu_alphaSq);
+__device__ double
+CalcCoulombVirExp6GPU(double distSq, double qi_qj, int gpu_ewald,
+                      double gpu_alpha, double gpu_alphaSq, int index,
+                      double gpu_sigmaSq, bool sc_coul, double sc_sigma_6,
+                      double sc_alpha, uint sc_power, double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirExp6GPU(double distSq, double qi_qj,
-                                        int gpu_ewald, double gpu_alpha, double gpu_alphaSq);
-__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
-    int gpu_ewald,
-    double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCut,
-    double gpu_diElectric_1,
-    int index,
-    double gpu_sigmaSq,
-    bool sc_coul,
-    double sc_sigma_6,
-    double sc_alpha,
-    uint sc_power,
-    double gpu_lambdaCoulomb);
-__device__ double CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj,
-    int gpu_ewald,
-    double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCut,
-    double gpu_diElectric_1);
+                                        int gpu_ewald, double gpu_alpha,
+                                        double gpu_alphaSq);
+__device__ double CalcCoulombVirSwitchMartiniGPU(
+    double distSq, double qi_qj, int gpu_ewald, double gpu_alpha,
+    double gpu_alphaSq, double gpu_rCut, double gpu_diElectric_1, int index,
+    double gpu_sigmaSq, bool sc_coul, double sc_sigma_6, double sc_alpha,
+    uint sc_power, double gpu_lambdaCoulomb);
+__device__ double
+CalcCoulombVirSwitchMartiniGPU(double distSq, double qi_qj, int gpu_ewald,
+                               double gpu_alpha, double gpu_alphaSq,
+                               double gpu_rCut, double gpu_diElectric_1);
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCut, int index,
-    double gpu_sigmaSq, bool sc_coul,
-    double sc_sigma_6, double sc_alpha,
-    uint sc_power,
-    double gpu_lambdaCoulomb);
+                                          int gpu_ewald, double gpu_alpha,
+                                          double gpu_alphaSq, double gpu_rCut,
+                                          int index, double gpu_sigmaSq,
+                                          bool sc_coul, double sc_sigma_6,
+                                          double sc_alpha, uint sc_power,
+                                          double gpu_lambdaCoulomb);
 __device__ double CalcCoulombVirSwitchGPU(double distSq, double qi_qj,
-    int gpu_ewald, double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCut);
+                                          int gpu_ewald, double gpu_alpha,
+                                          double gpu_alphaSq, double gpu_rCut);
 
-//VDW Calculation
+// VDW Calculation
 //*****************************************************************//
 __device__ double CalcVirParticleGPU(double distSq, int index,
                                      double gpu_sigmaSq, double *gpu_n,
-                                     double *gpu_epsilon_Cn,
-                                     double sc_sigma_6,
+                                     double *gpu_epsilon_Cn, double sc_sigma_6,
                                      double sc_alpha, uint sc_power,
                                      double gpu_lambdaVDW);
 __device__ double CalcVirParticleGPU(double distSq, int index,
                                      double gpu_sigmaSq, double *gpu_n,
                                      double *gpu_epsilon_Cn);
-__device__ double CalcVirShiftGPU(double distSq, int index,
-                                  double gpu_sigmaSq, double *gpu_n,
-                                  double *gpu_epsilon_Cn,
+__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq,
+                                  double *gpu_n, double *gpu_epsilon_Cn,
                                   double sc_sigma_6, double sc_alpha,
-                                  uint sc_power,
-                                  double gpu_lambdaVDW);
-__device__ double CalcVirShiftGPU(double distSq, int index,
-                                  double gpu_sigmaSq, double *gpu_n,
-                                  double *gpu_epsilon_Cn);
-__device__ double CalcVirExp6GPU(double distSq, int index,
-                                 double gpu_sigmaSq, double *gpu_n,
-                                 double *gpu_rMin, double *gpu_rMaxSq,
-                                 double *gpu_expConst,
+                                  uint sc_power, double gpu_lambdaVDW);
+__device__ double CalcVirShiftGPU(double distSq, int index, double gpu_sigmaSq,
+                                  double *gpu_n, double *gpu_epsilon_Cn);
+__device__ double CalcVirExp6GPU(double distSq, int index, double gpu_sigmaSq,
+                                 double *gpu_n, double *gpu_rMin,
+                                 double *gpu_rMaxSq, double *gpu_expConst,
                                  double sc_sigma_6, double sc_alpha,
-                                 uint sc_power,
-                                 double gpu_lambdaVDW);
+                                 uint sc_power, double gpu_lambdaVDW);
 __device__ double CalcVirExp6GPU(double distSq, int index, double *gpu_n,
                                  double *gpu_rMin, double *gpu_expConst);
 __device__ double CalcVirSwitchMartiniGPU(double distSq, int index,
-    double gpu_sigmaSq, double *gpu_n,
-    double *gpu_epsilon_Cn,
-    double gpu_rCut, double rOn,
-    double sc_sigma_6, double sc_alpha,
-    uint sc_power,
-    double gpu_lambdaVDW);
+                                          double gpu_sigmaSq, double *gpu_n,
+                                          double *gpu_epsilon_Cn,
+                                          double gpu_rCut, double rOn,
+                                          double sc_sigma_6, double sc_alpha,
+                                          uint sc_power, double gpu_lambdaVDW);
 __device__ double CalcVirSwitchMartiniGPU(double distSq, int index,
-    double gpu_sigmaSq, double *gpu_n,
-    double *gpu_epsilon_Cn,
-    double gpu_rCut, double rOn);
-__device__ double CalcVirSwitchGPU(double distSq, int index,
-                                   double gpu_sigmaSq, double *gpu_epsilon_Cn,
-                                   double *gpu_n, double gpu_rCut,
-                                   double gpu_rOn,
+                                          double gpu_sigmaSq, double *gpu_n,
+                                          double *gpu_epsilon_Cn,
+                                          double gpu_rCut, double rOn);
+__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq,
+                                   double *gpu_epsilon_Cn, double *gpu_n,
+                                   double gpu_rCut, double gpu_rOn,
                                    double sc_sigma_6, double sc_alpha,
                                    uint sc_power, double gpu_lambdaVDW);
-__device__ double CalcVirSwitchGPU(double distSq, int index,
-                                   double gpu_sigmaSq, double *gpu_epsilon_Cn,
-                                   double *gpu_n, double gpu_rCut,
-                                   double gpu_rOn);
-
+__device__ double CalcVirSwitchGPU(double distSq, int index, double gpu_sigmaSq,
+                                   double *gpu_epsilon_Cn, double *gpu_n,
+                                   double gpu_rCut, double gpu_rOn);
 
 // Have to move the implementation for some functions here
 // since CUDA doesn't allow __global__ to call __device__
 // from different files
 // Wanted to call CalcCoulombForceGPU() from CalculateEnergyCUDAKernel.cu file
-__device__ inline double CalcCoulombForceGPU(double distSq, double qi_qj,
-    int gpu_VDW_Kind, int gpu_ewald,
-    int gpu_isMartini,
-    double gpu_alpha, double gpu_alphaSq,
-    double gpu_rCutCoulomb,
-    double gpu_diElectric_1,
-    double *gpu_sigmaSq,
-    bool sc_coul,
-    double sc_sigma_6,
-    double sc_alpha,
-    uint sc_power,
-    double gpu_lambdaCoulomb,
-    int gpu_count, int kind1,
-    int kind2)
-{
-  if((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) {
+__device__ inline double CalcCoulombForceGPU(
+    double distSq, double qi_qj, int gpu_VDW_Kind, int gpu_ewald,
+    int gpu_isMartini, double gpu_alpha, double gpu_alphaSq,
+    double gpu_rCutCoulomb, double gpu_diElectric_1, double *gpu_sigmaSq,
+    bool sc_coul, double sc_sigma_6, double sc_alpha, uint sc_power,
+    double gpu_lambdaCoulomb, int gpu_count, int kind1, int kind2) {
+  if ((gpu_rCutCoulomb * gpu_rCutCoulomb) < distSq) {
     return 0.0;
   }
 
   int index = FlatIndexGPU(kind1, kind2, gpu_count);
-  if(gpu_VDW_Kind == GPU_VDW_STD_KIND) {
-    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
-                                     gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
-                                     sc_power, gpu_lambdaCoulomb);
-  } else if(gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) {
-    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
-                                  gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
-                                  sc_power, gpu_lambdaCoulomb);
-  } else if(gpu_VDW_Kind == GPU_VDW_EXP6_KIND) {
-    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, index,
-                                 gpu_sigmaSq[index], sc_coul, sc_sigma_6, sc_alpha,
-                                 sc_power, gpu_lambdaCoulomb);
-  } else if(gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) {
-    return CalcCoulombVirSwitchMartiniGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
-                                          gpu_rCutCoulomb, gpu_diElectric_1,
-                                          index, gpu_sigmaSq[index], sc_coul,
-                                          sc_sigma_6, sc_alpha, sc_power,
-                                          gpu_lambdaCoulomb);
+  if (gpu_VDW_Kind == GPU_VDW_STD_KIND) {
+    return CalcCoulombVirParticleGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                     gpu_alphaSq, index, gpu_sigmaSq[index],
+                                     sc_coul, sc_sigma_6, sc_alpha, sc_power,
+                                     gpu_lambdaCoulomb);
+  } else if (gpu_VDW_Kind == GPU_VDW_SHIFT_KIND) {
+    return CalcCoulombVirShiftGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                  gpu_alphaSq, index, gpu_sigmaSq[index],
+                                  sc_coul, sc_sigma_6, sc_alpha, sc_power,
+                                  gpu_lambdaCoulomb);
+  } else if (gpu_VDW_Kind == GPU_VDW_EXP6_KIND) {
+    return CalcCoulombVirExp6GPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                 gpu_alphaSq, index, gpu_sigmaSq[index],
+                                 sc_coul, sc_sigma_6, sc_alpha, sc_power,
+                                 gpu_lambdaCoulomb);
+  } else if (gpu_VDW_Kind == GPU_VDW_SWITCH_KIND && gpu_isMartini) {
+    return CalcCoulombVirSwitchMartiniGPU(
+        distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq, gpu_rCutCoulomb,
+        gpu_diElectric_1, index, gpu_sigmaSq[index], sc_coul, sc_sigma_6,
+        sc_alpha, sc_power, gpu_lambdaCoulomb);
   } else
-    return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha, gpu_alphaSq,
-                                   gpu_rCutCoulomb, index, gpu_sigmaSq[index], sc_coul,
-                                   sc_sigma_6, sc_alpha, sc_power,
-                                   gpu_lambdaCoulomb);
+    return CalcCoulombVirSwitchGPU(distSq, qi_qj, gpu_ewald, gpu_alpha,
+                                   gpu_alphaSq, gpu_rCutCoulomb, index,
+                                   gpu_sigmaSq[index], sc_coul, sc_sigma_6,
+                                   sc_alpha, sc_power, gpu_lambdaCoulomb);
 }
 
-
 #endif /*GOMC_CUDA*/
 #endif /*CALCULATE_FORCE_CUDA_KERNEL_H*/
diff --git a/src/GPU/ConstantDefinitionsCUDAKernel.cuh b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
index 3584e46c2..c5cabf281 100644
--- a/src/GPU/ConstantDefinitionsCUDAKernel.cuh
+++ b/src/GPU/ConstantDefinitionsCUDAKernel.cuh
@@ -2,17 +2,18 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef CONSTANT_DEFINITIONS_CUDA_KERNEL_H
 #define CONSTANT_DEFINITIONS_CUDA_KERNEL_H
 
 #ifdef GOMC_CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
+#include "EnsemblePreprocessor.h"
 #include "GeomLib.h"
 #include "VariablesCUDA.cuh"
-#include "EnsemblePreprocessor.h"
+#include <cuda.h>
+#include <cuda_runtime.h>
 
 #define GPU_VDW_STD_KIND 0
 #define GPU_VDW_SHIFT_KIND 1
@@ -21,12 +22,12 @@ along with this program, also can be found at <https://opensource.org/licenses/M
 #define MAX_PAIR_SIZE 10000000
 
 void UpdateGPULambda(VariablesCUDA *vars, int *molIndex, double *lambdaVDW,
-                    double *lambdaCoulomb, bool *isFraction);
+                     double *lambdaCoulomb, bool *isFraction);
 void InitGPUForceField(VariablesCUDA &vars, double const *sigmaSq,
-                       double const *epsilon_Cn, double const *n,
-                       int VDW_Kind, int isMartini, int count,
-                       double Rcut, double RcutSq, double const *rCutCoulomb,
-                       double const *rCutCoulombSq, double RcutLow, double Ron, double const *alpha,
+                       double const *epsilon_Cn, double const *n, int VDW_Kind,
+                       int isMartini, int count, double Rcut, double RcutSq,
+                       double const *rCutCoulomb, double const *rCutCoulombSq,
+                       double RcutLow, double Ron, double const *alpha,
                        double const *alphaSq, int ewald, double diElectric_1);
 void InitCoordinatesCUDA(VariablesCUDA *vars, uint atomNumber,
                          uint maxAtomsInMol, uint maxMolNumber);
diff --git a/src/GPU/VariablesCUDA.cuh b/src/GPU/VariablesCUDA.cuh
index 30376c273..19108119c 100644
--- a/src/GPU/VariablesCUDA.cuh
+++ b/src/GPU/VariablesCUDA.cuh
@@ -2,63 +2,65 @@
 GPU OPTIMIZED MONTE CARLO (GOMC) 2.75
 Copyright (C) 2022 GOMC Group
 A copy of the MIT License can be found in License.txt
-along with this program, also can be found at <https://opensource.org/licenses/MIT>.
+along with this program, also can be found at
+<https://opensource.org/licenses/MIT>.
 ********************************************************************************/
 #ifndef VARIABLES_CUDA_H
 #define VARIABLES_CUDA_H
 
 #ifdef GOMC_CUDA
-#include <cuda.h>
-#include <stdio.h>
-#include <cuda_runtime.h>
 #include "EnsemblePreprocessor.h"
 #include "NumLib.h"
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdio.h>
 
-//Need a separate float constant for device code with the MSVC compiler
-//See CUDA Programming Guide section I.4.13 for details 
+// Need a separate float constant for device code with the MSVC compiler
+// See CUDA Programming Guide section I.4.13 for details
 static const __device__ double qqFactGPU = num::qqFact;
 
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
-{
+#define gpuErrchk(ans)                                                         \
+  { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line,
+                      bool abort = true) {
   if (code != cudaSuccess) {
-    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-    if (abort) exit(code);
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
+    if (abort)
+      exit(code);
   }
 }
 
-inline void checkLastErrorCUDA(const char *file, int line)
-{
+inline void checkLastErrorCUDA(const char *file, int line) {
   cudaError_t code = cudaGetLastError();
   if (code != cudaSuccess) {
-    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
     exit(code);
   }
 }
 
-inline void printFreeMemory()
-{
-  size_t free_byte ;
-  size_t total_byte ;
-  cudaError_t cuda_status = cudaMemGetInfo( &free_byte, &total_byte ) ;
+inline void printFreeMemory() {
+  size_t free_byte;
+  size_t total_byte;
+  cudaError_t cuda_status = cudaMemGetInfo(&free_byte, &total_byte);
 
-  if ( cudaSuccess != cuda_status ) {
+  if (cudaSuccess != cuda_status) {
     printf("Error: cudaMemGetInfo fails, %s \n",
-           cudaGetErrorString(cuda_status) );
+           cudaGetErrorString(cuda_status));
     exit(1);
   }
-  double free_db = (double)free_byte ;
-  double total_db = (double)total_byte ;
-  double used_db = total_db - free_db ;
+  double free_db = (double)free_byte;
+  double total_db = (double)total_byte;
+  double used_db = total_db - free_db;
   printf("GPU memory usage: used = %f, free = %f MB, total = %f MB\n",
-         used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0, total_db / 1024.0 / 1024.0);
+         used_db / 1024.0 / 1024.0, free_db / 1024.0 / 1024.0,
+         total_db / 1024.0 / 1024.0);
 }
 
-class VariablesCUDA
-{
+class VariablesCUDA {
 public:
-  VariablesCUDA()
-  {
+  VariablesCUDA() {
     gpu_sigmaSq = NULL;
     gpu_epsilon_Cn = NULL;
     gpu_n = NULL;
@@ -95,7 +97,7 @@ public:
   int *gpu_VDW_Kind;
   int *gpu_isMartini;
   int *gpu_count;
-  int *gpu_startAtomIdx; //start atom index of the molecule
+  int *gpu_startAtomIdx; // start atom index of the molecule
   double *gpu_rCut, *gpu_rCutSq;
   double *gpu_rCutCoulomb, *gpu_rCutCoulombSq;
   double *gpu_rCutLow;
diff --git a/src/Main.cpp b/src/Main.cpp
index 6b376e61c..165dd0a23 100644
--- a/src/Main.cpp
+++ b/src/Main.cpp
@@ -114,9 +114,10 @@ int main(int argc, char *argv[]) {
     // Print OpenMP version if recognized or OpenMP date code if not recognized.
 #ifdef _OPENMP
     std::unordered_map<unsigned, std::string> omp_map{
-        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"}, {201307, "4.0"},
-        {201511, "4.5"}, {201611, "5.0 Preview 1"}, {201811, "5.0"},
-        {202011, "5.1"}, {202111, "5.2"}, {202411, "6.0"}};
+        {200505, "2.5"}, {200805, "3.0"}, {201107, "3.1"},
+        {201307, "4.0"}, {201511, "4.5"}, {201611, "5.0 Preview 1"},
+        {201811, "5.0"}, {202011, "5.1"}, {202111, "5.2"},
+        {202411, "6.0"}};
     std::unordered_map<unsigned, std::string>::const_iterator match =
         omp_map.find(_OPENMP);
     if (match == omp_map.end())
diff --git a/src/MoveSettings.cpp b/src/MoveSettings.cpp
index ab4e8c9f3..50a77eaa3 100644
--- a/src/MoveSettings.cpp
+++ b/src/MoveSettings.cpp
@@ -5,7 +5,7 @@ A copy of the MIT License can be found in License.txt
 along with this program, also can be found at
 <https://opensource.org/licenses/MIT>.
 ********************************************************************************/
-#include "MoveSettings.h" //header spec
+#include "MoveSettings.h"  //header spec
 #include "BoxDimensions.h" //For axis sizes
 #include "BoxDimensionsNonOrth.h"
 #include "GeomLib.h"    //For M_PI
diff --git a/src/PRNG.h b/src/PRNG.h
index c3172af28..53741de2c 100644
--- a/src/PRNG.h
+++ b/src/PRNG.h
@@ -8,9 +8,6 @@ along with this program, also can be found at
 #ifndef PRNG_H
 #define PRNG_H
 
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
diff --git a/src/XYZArray.h b/src/XYZArray.h
index 055573d2f..d86293a1a 100644
--- a/src/XYZArray.h
+++ b/src/XYZArray.h
@@ -12,7 +12,7 @@ along with this program, also can be found at
 #include <string.h> //for memset, memcpy, etc.
 
 #include <algorithm> //for swap pre-c++11 compilers
-#include <utility> //for swap (most modern compilers)
+#include <utility>   //for swap (most modern compilers)
 
 #include "BasicTypes.h"
 #ifdef _OPENMP

From 5c65edf611258635086013dbfd76a98be9c2ecc3 Mon Sep 17 00:00:00 2001
From: Loren Schwiebert <loren@wayne.edu>
Date: Mon, 6 Jan 2025 12:56:55 -0500
Subject: [PATCH 42/42] Set address sanitizer MSVC flag only for MSVC builds

---
 CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 53c13006f..e55cd4b09 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,7 +47,7 @@ if(GOMC_ASAN)
    set(CMAKE_CLANG_COMP_FLAGS ${CMAKE_CLANG_COMP_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
    set(CMAKE_CLANG_CUDA_COMP_FLAGS ${CMAKE_CLANG_CUDA_COMP_FLAGS} "SHELL:-Xcompiler -fsanitize=address" "SHELL:-Xcompiler -fno-omit-frame-pointer")
    set(CMAKE_CLANG_LINK_FLAGS ${CMAKE_CLANG_LINK_FLAGS} -fsanitize=address -fno-omit-frame-pointer)
-   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /fsanitize=address")
+   set(CMAKE_MVSC_ASAN_FLAGS "/fsanitize=address")
 endif()
 
 # find OpenMP and set it up
@@ -95,8 +95,8 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     set(CMAKE_GPU_COMP_FLAGS "${CMAKE_CLANG_CUDA_COMP_FLAGS}")
     set(CMAKE_LINK_FLAGS "${CMAKE_CLANG_LINK_FLAGS}")
 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
-    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${CMAKE_MSVC_OPENMP_FLAGS} ${CMAKE_MVSC_ASAN_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
+    set(CMAKE_CXX_FLAGS_DEBUG_INIT "${CMAKE_CXX_FLAGS_DEBUG_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} ${CMAKE_MVSC_ASAN_FLAGS} /D DEBUG /MTd /Zi /Ob0 /Od /RTC1 /D_USE_MATH_DEFINES")
     set(CMAKE_CXX_FLAGS_MINSIZEREL_INIT "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O1 /Ob1 /D NDEBUG /D_USE_MATH_DEFINES")
     set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_MINSIZEREL_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /D NDEBUG /D_USE_MATH_DEFINES")
     set(CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT} ${CMAKE_MSVC_OPENMP_FLAGS} /MT /O2 /D NDEBUG /D_USE_MATH_DEFINES")