From 6408d66f1bc236da48ac2c6efa2f3946410df811 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 21 Mar 2024 14:43:37 -0400
Subject: [PATCH] test: add LAMMPS MPI tests (#3572)

Fix #3509.

Note: 0 atoms in a processor with the PyTorch backend is currently
broken. I commented with a TODO tag.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
(cherry picked from commit fb61efb1b16030184257f17cb4817d90568c8358)
Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/test_cc.yml           |  2 +-
 .github/workflows/test_cuda.yml         | 14 ++++--
 source/lmp/tests/run_mpi_pair_deepmd.py | 61 +++++++++++++++++++++++++
 source/lmp/tests/test_lammps.py         | 52 +++++++++++++++++++++
 4 files changed, 125 insertions(+), 4 deletions(-)
 create mode 100644 source/lmp/tests/run_mpi_pair_deepmd.py

diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index d1e867c144..4a2ba7968a 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -38,7 +38,7 @@ jobs:
     # TODO: remove ase version when ase has new release
     - run: |
         python -m pip install -U pip
-        python -m pip install -e .[cpu,test,lmp] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+        python -m pip install -e .[cpu,test,lmp] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
         DP_BUILD_TESTING: 1
       if: ${{ !matrix.check_memleak }}
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 5af4f95dc8..6435789933 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -65,6 +65,14 @@ jobs:
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
         CUDA_PATH: /usr/local/cuda-12.2
-    - uses: codecov/codecov-action@v4
-      env:
-        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+  pass:
+    name: Pass testing on CUDA
+    needs: [test_cuda]
+    runs-on: ubuntu-latest
+    if: always()
+    steps:
+    - name: Decide whether the needed jobs succeeded or failed
+      uses: re-actors/alls-green@release/v1
+      with:
+        jobs: ${{ toJSON(needs) }}
+        allowed-skips: test_cuda
diff --git a/source/lmp/tests/run_mpi_pair_deepmd.py b/source/lmp/tests/run_mpi_pair_deepmd.py
new file mode 100644
index 0000000000..b27774ce11
--- /dev/null
+++ b/source/lmp/tests/run_mpi_pair_deepmd.py
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Use mpi4py to run a LAMMPS pair_deepmd + model deviation (atomic, relative) task."""
+
+import argparse
+
+import numpy as np
+from lammps import (
+    PyLammps,
+)
+from mpi4py import (
+    MPI,
+)
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+
+parser = argparse.ArgumentParser()
+parser.add_argument("DATAFILE", type=str)
+parser.add_argument("PBFILE", type=str)
+parser.add_argument("PBFILE2", type=str)
+parser.add_argument("MD_FILE", type=str)
+parser.add_argument("OUTPUT", type=str)
+parser.add_argument("--balance", action="store_true")
+
+args = parser.parse_args()
+data_file = args.DATAFILE
+pb_file = args.PBFILE
+pb_file2 = args.PBFILE2
+md_file = args.MD_FILE
+output = args.OUTPUT
+balance = args.balance
+
+lammps = PyLammps()
+if balance:
+    # 4 and 2 atoms
+    lammps.processors("2 1 1")
+else:
+    # 6 and 0 atoms
+    lammps.processors("1 2 1")
+lammps.units("metal")
+lammps.boundary("p p p")
+lammps.atom_style("atomic")
+lammps.neighbor("2.0 bin")
+lammps.neigh_modify("every 10 delay 0 check no")
+lammps.read_data(data_file)
+lammps.mass("1 16")
+lammps.mass("2 2")
+lammps.timestep(0.0005)
+lammps.fix("1 all nve")
+
+relative = 1.0
+lammps.pair_style(
+    f"deepmd {pb_file} {pb_file2} out_file {md_file} out_freq 1 atomic relative {relative}"
+)
+lammps.pair_coeff("* *")
+lammps.run(0)
+pe = lammps.eval("pe")
+if rank == 0:
+    arr = [pe]
+    np.savetxt(output, np.array(arr))
+MPI.Finalize()
diff --git a/source/lmp/tests/test_lammps.py b/source/lmp/tests/test_lammps.py
index c495f16ffd..0e7c289f24 100644
--- a/source/lmp/tests/test_lammps.py
+++ b/source/lmp/tests/test_lammps.py
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import importlib
 import os
+import shutil
 import subprocess as sp
 import sys
+import tempfile
 from pathlib import (
     Path,
 )
@@ -671,3 +674,52 @@ def test_pair_deepmd_si(lammps_si):
             expected_f[lammps_si.atoms[ii].id - 1] * constants.force_metal2si
         )
     lammps_si.run(1)
+
+
+@pytest.mark.skipif(
+    shutil.which("mpirun") is None, reason="MPI is not installed on this system"
+)
+@pytest.mark.skipif(
+    importlib.util.find_spec("mpi4py") is None, reason="mpi4py is not installed"
+)
+@pytest.mark.parametrize(
+    ("balance_args",),
+    [(["--balance"],), ([],)],
+)
+def test_pair_deepmd_mpi(balance_args: list):
+    with tempfile.NamedTemporaryFile() as f:
+        sp.check_call(
+            [
+                "mpirun",
+                "-n",
+                "2",
+                sys.executable,
+                Path(__file__).parent / "run_mpi_pair_deepmd.py",
+                data_file,
+                pb_file,
+                pb_file2,
+                md_file,
+                f.name,
+                *balance_args,
+            ]
+        )
+        arr = np.loadtxt(f.name, ndmin=1)
+    pe = arr[0]
+
+    relative = 1.0
+    assert pe == pytest.approx(expected_e)
+    # load model devi
+    md = np.loadtxt(md_file.resolve())
+    norm = np.linalg.norm(np.mean([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
+    expected_md_f /= norm + relative
+    assert md[7:] == pytest.approx(expected_md_f)
+    assert md[4] == pytest.approx(np.max(expected_md_f))
+    assert md[5] == pytest.approx(np.min(expected_md_f))
+    assert md[6] == pytest.approx(np.mean(expected_md_f))
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    assert md[1] == pytest.approx(np.max(expected_md_v))
+    assert md[2] == pytest.approx(np.min(expected_md_v))
+    assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))