Merge branch 'devel' into spin_lmp

iProzd · Nov 7, 2024 · 811a0b9 · 811a0b9
2 parents 10b163e + 430dfa9
commit 811a0b9
Show file tree

Hide file tree

Showing 82 changed files with 5,146 additions and 678 deletions.
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
@@ -47,7 +47,7 @@ jobs:
          && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
       if: false  # skip as we use nvidia image
     - run: python -m pip install -U uv
-    - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0"
+    - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0" "jax[cuda12]"
     - run: |
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
@@ -61,6 +61,8 @@ jobs:
       env:
         NUM_WORKERS: 0
         CUDA_VISIBLE_DEVICES: 0
+        # See https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html
+        XLA_PYTHON_CLIENT_PREALLOCATE: false
     - name: Download libtorch
       run: |
          wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcu124.zip -O libtorch.zip

diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
@@ -25,19 +25,23 @@ jobs:
         python-version: ${{ matrix.python }}
     - run: python -m pip install -U uv
     - run: |
-        source/install/uv_with_retry.sh pip install --system mpich
+        source/install/uv_with_retry.sh pip install --system openmpi tensorflow-cpu
         source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu
+        export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])')
         export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
-        source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test,jax] horovod[tensorflow-cpu] mpi4py
+        source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py
+        source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation
       env:
         # Please note that uv has some issues with finding
         # existing TensorFlow package. Currently, it uses
         # TensorFlow in the build dependency, but if it
         # changes, setting `TENSORFLOW_ROOT`.
-        TENSORFLOW_VERSION: 2.16.1
         DP_ENABLE_PYTORCH: 1
         DP_BUILD_TESTING: 1
-        UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/njzjz/simple https://pypi.anaconda.org/mpi4py/simple"
+        UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/mpi4py/simple"
+        HOROVOD_WITH_TENSORFLOW: 1
+        HOROVOD_WITHOUT_PYTORCH: 1
+        HOROVOD_WITH_MPI: 1
     - run: dp --version
     - name: Get durations from cache
       uses: actions/cache@v4
@@ -53,6 +57,12 @@ jobs:
     - run: pytest --cov=deepmd source/tests --durations=0 --splits 6 --group ${{ matrix.group }} --store-durations --durations-path=.test_durations --splitting-algorithm least_duration
       env:
         NUM_WORKERS: 0
+    - name: Test TF2 eager mode
+      run: pytest --cov=deepmd source/tests/consistent/io/test_io.py --durations=0
+      env:
+        NUM_WORKERS: 0
+        DP_TEST_TF2_ONLY: 1
+      if: matrix.group == 1
     - run: mv .test_durations .test_durations_${{ matrix.group }}
     - name: Upload partial durations
       uses: actions/upload-artifact@v4

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.7.1
+    rev: v0.7.2
     hooks:
       - id: ruff
         args: ["--fix"]
@@ -60,7 +60,7 @@ repos:
       - id: blacken-docs
   # C++
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v19.1.2
+    rev: v19.1.3
     hooks:
       - id: clang-format
         exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)

diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py
@@ -38,7 +38,7 @@ class JAXBackend(Backend):
         | Backend.Feature.NEIGHBOR_STAT
     )
     """The features of the backend."""
-    suffixes: ClassVar[list[str]] = [".hlo", ".jax"]
+    suffixes: ClassVar[list[str]] = [".hlo", ".jax", ".savedmodel"]
     """The suffixes of the backend."""
 
     def is_available(self) -> bool:

diff --git a/deepmd/calculator.py b/deepmd/calculator.py
@@ -45,6 +45,8 @@ class DP(Calculator):
         will infer this information from model, by default None
     neighbor_list : ase.neighborlist.NeighborList, optional
         The neighbor list object. If None, then build the native neighbor list.
+    head : Union[str, None], optional
+        a specific model branch choosing from pretrained model, by default None
 
     Examples
     --------
@@ -84,10 +86,15 @@ def __init__(
         label: str = "DP",
         type_dict: Optional[dict[str, int]] = None,
         neighbor_list=None,
+        head=None,
         **kwargs,
     ) -> None:
         Calculator.__init__(self, label=label, **kwargs)
-        self.dp = DeepPot(str(Path(model).resolve()), neighbor_list=neighbor_list)
+        self.dp = DeepPot(
+            str(Path(model).resolve()),
+            neighbor_list=neighbor_list,
+            head=head,
+        )
         if type_dict:
             self.type_dict = type_dict
         else:

diff --git a/deepmd/dpmodel/atomic_model/dp_atomic_model.py b/deepmd/dpmodel/atomic_model/dp_atomic_model.py
@@ -86,6 +86,37 @@ def need_sorted_nlist_for_lower(self) -> bool:
         """Returns whether the atomic model needs sorted nlist when using `forward_lower`."""
         return self.descriptor.need_sorted_nlist_for_lower()
 
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+    ) -> None:
+        """Call descriptor enable_compression().
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        """
+        self.descriptor.enable_compression(
+            min_nbor_dist,
+            table_extrapolate,
+            table_stride_1,
+            table_stride_2,
+            check_frequency,
+        )
+
     def forward_atomic(
         self,
         extended_coord: np.ndarray,

diff --git a/deepmd/dpmodel/atomic_model/linear_atomic_model.py b/deepmd/dpmodel/atomic_model/linear_atomic_model.py
@@ -5,6 +5,7 @@
     Union,
 )
 
+import array_api_compat
 import numpy as np
 
 from deepmd.dpmodel.utils.nlist import (
@@ -69,15 +70,16 @@ def __init__(
         self.models = models
         sub_model_type_maps = [md.get_type_map() for md in models]
         err_msg = []
-        self.mapping_list = []
+        mapping_list = []
         common_type_map = set(type_map)
         self.type_map = type_map
         for tpmp in sub_model_type_maps:
             if not common_type_map.issubset(set(tpmp)):
                 err_msg.append(
                     f"type_map {tpmp} is not a subset of type_map {type_map}"
                 )
-            self.mapping_list.append(self.remap_atype(tpmp, self.type_map))
+            mapping_list.append(self.remap_atype(tpmp, self.type_map))
+        self.mapping_list = mapping_list
         assert len(err_msg) == 0, "\n".join(err_msg)
         self.mixed_types_list = [model.mixed_types() for model in self.models]
 
@@ -149,6 +151,38 @@ def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]:
         )
         return [p[0] for p in zipped], [p[1] for p in zipped]
 
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+    ) -> None:
+        """Compress model.
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        """
+        for model in self.models:
+            model.enable_compression(
+                min_nbor_dist,
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+            )
+
     def forward_atomic(
         self,
         extended_coord,
@@ -180,8 +214,9 @@ def forward_atomic(
         result_dict
             the result dict, defined by the fitting net output def.
         """
+        xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlist)
         nframes, nloc, nnei = nlist.shape
-        extended_coord = extended_coord.reshape(nframes, -1, 3)
+        extended_coord = xp.reshape(extended_coord, (nframes, -1, 3))
         sorted_rcuts, sorted_sels = self._sort_rcuts_sels()
         nlists = build_multiple_neighbor_list(
             extended_coord,
@@ -212,10 +247,10 @@ def forward_atomic(
                     aparam,
                 )["energy"]
             )
-        self.weights = self._compute_weight(extended_coord, extended_atype, nlists_)
+        weights = self._compute_weight(extended_coord, extended_atype, nlists_)
 
         fit_ret = {
-            "energy": np.sum(np.stack(ener_list) * np.stack(self.weights), axis=0),
+            "energy": xp.sum(xp.stack(ener_list) * xp.stack(weights), axis=0),
         }  # (nframes, nloc, 1)
         return fit_ret
 
@@ -288,11 +323,12 @@ def _compute_weight(
         nlists_: list[np.ndarray],
     ) -> list[np.ndarray]:
         """This should be a list of user defined weights that matches the number of models to be combined."""
+        xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlists_)
         nmodels = len(self.models)
         nframes, nloc, _ = nlists_[0].shape
         # the dtype of weights is the interface data type.
         return [
-            np.ones((nframes, nloc, 1), dtype=GLOBAL_NP_FLOAT_PRECISION) / nmodels
+            xp.ones((nframes, nloc, 1), dtype=GLOBAL_NP_FLOAT_PRECISION) / nmodels
             for _ in range(nmodels)
         ]
 
@@ -410,6 +446,7 @@ def _compute_weight(
             self.sw_rmax > self.sw_rmin
         ), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`."
 
+        xp = array_api_compat.array_namespace(extended_coord, extended_atype)
         dp_nlist = nlists_[0]
         zbl_nlist = nlists_[1]
 
@@ -418,40 +455,40 @@ def _compute_weight(
 
         # use the larger rr based on nlist
         nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist
-        masked_nlist = np.clip(nlist_larger, 0, None)
+        masked_nlist = xp.clip(nlist_larger, 0, None)
         pairwise_rr = PairTabAtomicModel._get_pairwise_dist(
             extended_coord, masked_nlist
         )
 
-        numerator = np.sum(
-            np.where(
+        numerator = xp.sum(
+            xp.where(
                 nlist_larger != -1,
-                pairwise_rr * np.exp(-pairwise_rr / self.smin_alpha),
-                np.zeros_like(nlist_larger),
+                pairwise_rr * xp.exp(-pairwise_rr / self.smin_alpha),
+                xp.zeros_like(nlist_larger),
             ),
             axis=-1,
         )  # masked nnei will be zero, no need to handle
-        denominator = np.sum(
-            np.where(
+        denominator = xp.sum(
+            xp.where(
                 nlist_larger != -1,
-                np.exp(-pairwise_rr / self.smin_alpha),
-                np.zeros_like(nlist_larger),
+                xp.exp(-pairwise_rr / self.smin_alpha),
+                xp.zeros_like(nlist_larger),
             ),
             axis=-1,
         )  # handle masked nnei.
         with np.errstate(divide="ignore", invalid="ignore"):
             sigma = numerator / denominator
         u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
-        coef = np.zeros_like(u)
+        coef = xp.zeros_like(u)
         left_mask = sigma < self.sw_rmin
         mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax)
         right_mask = sigma >= self.sw_rmax
-        coef[left_mask] = 1
+        coef = xp.where(left_mask, xp.ones_like(coef), coef)
         with np.errstate(invalid="ignore"):
             smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1
-        coef[mid_mask] = smooth[mid_mask]
-        coef[right_mask] = 0
+        coef = xp.where(mid_mask, smooth, coef)
+        coef = xp.where(right_mask, xp.zeros_like(coef), coef)
         # to handle masked atoms
-        coef = np.where(sigma != 0, coef, np.zeros_like(coef))
+        coef = xp.where(sigma != 0, coef, xp.zeros_like(coef))
         self.zbl_weight = coef
-        return [1 - np.expand_dims(coef, -1), np.expand_dims(coef, -1)]
+        return [1 - xp.expand_dims(coef, -1), xp.expand_dims(coef, -1)]
diff --git a/deepmd/dpmodel/atomic_model/make_base_atomic_model.py b/deepmd/dpmodel/atomic_model/make_base_atomic_model.py
@@ -148,6 +148,31 @@ def change_type_map(
         ) -> None:
             pass
 
+        def enable_compression(
+            self,
+            min_nbor_dist: float,
+            table_extrapolate: float = 5,
+            table_stride_1: float = 0.01,
+            table_stride_2: float = 0.1,
+            check_frequency: int = -1,
+        ) -> None:
+            """Call descriptor enable_compression().
+
+            Parameters
+            ----------
+            min_nbor_dist
+                The nearest distance between atoms
+            table_extrapolate
+                The scale of model extrapolation
+            table_stride_1
+                The uniform stride of the first table
+            table_stride_2
+                The uniform stride of the second table
+            check_frequency
+                The overflow check frequency
+            """
+            raise NotImplementedError("This atomi model doesn't support compression!")
+
         def make_atom_mask(
             self,
             atype: t_tensor,