Skip to content

Commit

Permalink
Merge branch 'devel' into spin_lmp
Browse files Browse the repository at this point in the history
  • Loading branch information
iProzd authored Nov 7, 2024
2 parents 10b163e + 430dfa9 commit 811a0b9
Show file tree
Hide file tree
Showing 82 changed files with 5,146 additions and 678 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/test_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
&& sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
if: false # skip as we use nvidia image
- run: python -m pip install -U uv
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0"
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.5.0" "jax[cuda12]"
- run: |
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
Expand All @@ -61,6 +61,8 @@ jobs:
env:
NUM_WORKERS: 0
CUDA_VISIBLE_DEVICES: 0
# See https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html
XLA_PYTHON_CLIENT_PREALLOCATE: false
- name: Download libtorch
run: |
wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.0%2Bcu124.zip -O libtorch.zip
Expand Down
18 changes: 14 additions & 4 deletions .github/workflows/test_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,23 @@ jobs:
python-version: ${{ matrix.python }}
- run: python -m pip install -U uv
- run: |
source/install/uv_with_retry.sh pip install --system mpich
source/install/uv_with_retry.sh pip install --system openmpi tensorflow-cpu
source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu
export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])')
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test,jax] horovod[tensorflow-cpu] mpi4py
source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py
source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation
env:
# Please note that uv has some issues with finding
# existing TensorFlow package. Currently, it uses
# TensorFlow in the build dependency, but if it
# changes, setting `TENSORFLOW_ROOT`.
TENSORFLOW_VERSION: 2.16.1
DP_ENABLE_PYTORCH: 1
DP_BUILD_TESTING: 1
UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/njzjz/simple https://pypi.anaconda.org/mpi4py/simple"
UV_EXTRA_INDEX_URL: "https://pypi.anaconda.org/mpi4py/simple"
HOROVOD_WITH_TENSORFLOW: 1
HOROVOD_WITHOUT_PYTORCH: 1
HOROVOD_WITH_MPI: 1
- run: dp --version
- name: Get durations from cache
uses: actions/cache@v4
Expand All @@ -53,6 +57,12 @@ jobs:
- run: pytest --cov=deepmd source/tests --durations=0 --splits 6 --group ${{ matrix.group }} --store-durations --durations-path=.test_durations --splitting-algorithm least_duration
env:
NUM_WORKERS: 0
- name: Test TF2 eager mode
run: pytest --cov=deepmd source/tests/consistent/io/test_io.py --durations=0
env:
NUM_WORKERS: 0
DP_TEST_TF2_ONLY: 1
if: matrix.group == 1
- run: mv .test_durations .test_durations_${{ matrix.group }}
- name: Upload partial durations
uses: actions/upload-artifact@v4
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
exclude: ^source/3rdparty
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.7.1
rev: v0.7.2
hooks:
- id: ruff
args: ["--fix"]
Expand Down Expand Up @@ -60,7 +60,7 @@ repos:
- id: blacken-docs
# C++
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v19.1.2
rev: v19.1.3
hooks:
- id: clang-format
exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$)
Expand Down
2 changes: 1 addition & 1 deletion deepmd/backend/jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class JAXBackend(Backend):
| Backend.Feature.NEIGHBOR_STAT
)
"""The features of the backend."""
suffixes: ClassVar[list[str]] = [".hlo", ".jax"]
suffixes: ClassVar[list[str]] = [".hlo", ".jax", ".savedmodel"]
"""The suffixes of the backend."""

def is_available(self) -> bool:
Expand Down
9 changes: 8 additions & 1 deletion deepmd/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class DP(Calculator):
will infer this information from model, by default None
neighbor_list : ase.neighborlist.NeighborList, optional
The neighbor list object. If None, then build the native neighbor list.
head : Union[str, None], optional
a specific model branch choosing from pretrained model, by default None
Examples
--------
Expand Down Expand Up @@ -84,10 +86,15 @@ def __init__(
label: str = "DP",
type_dict: Optional[dict[str, int]] = None,
neighbor_list=None,
head=None,
**kwargs,
) -> None:
Calculator.__init__(self, label=label, **kwargs)
self.dp = DeepPot(str(Path(model).resolve()), neighbor_list=neighbor_list)
self.dp = DeepPot(
str(Path(model).resolve()),
neighbor_list=neighbor_list,
head=head,
)
if type_dict:
self.type_dict = type_dict
else:
Expand Down
31 changes: 31 additions & 0 deletions deepmd/dpmodel/atomic_model/dp_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,37 @@ def need_sorted_nlist_for_lower(self) -> bool:
"""Returns whether the atomic model needs sorted nlist when using `forward_lower`."""
return self.descriptor.need_sorted_nlist_for_lower()

def enable_compression(
self,
min_nbor_dist: float,
table_extrapolate: float = 5,
table_stride_1: float = 0.01,
table_stride_2: float = 0.1,
check_frequency: int = -1,
) -> None:
"""Call descriptor enable_compression().
Parameters
----------
min_nbor_dist
The nearest distance between atoms
table_extrapolate
The scale of model extrapolation
table_stride_1
The uniform stride of the first table
table_stride_2
The uniform stride of the second table
check_frequency
The overflow check frequency
"""
self.descriptor.enable_compression(
min_nbor_dist,
table_extrapolate,
table_stride_1,
table_stride_2,
check_frequency,
)

def forward_atomic(
self,
extended_coord: np.ndarray,
Expand Down
79 changes: 58 additions & 21 deletions deepmd/dpmodel/atomic_model/linear_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Union,
)

import array_api_compat
import numpy as np

from deepmd.dpmodel.utils.nlist import (
Expand Down Expand Up @@ -69,15 +70,16 @@ def __init__(
self.models = models
sub_model_type_maps = [md.get_type_map() for md in models]
err_msg = []
self.mapping_list = []
mapping_list = []
common_type_map = set(type_map)
self.type_map = type_map
for tpmp in sub_model_type_maps:
if not common_type_map.issubset(set(tpmp)):
err_msg.append(
f"type_map {tpmp} is not a subset of type_map {type_map}"
)
self.mapping_list.append(self.remap_atype(tpmp, self.type_map))
mapping_list.append(self.remap_atype(tpmp, self.type_map))
self.mapping_list = mapping_list
assert len(err_msg) == 0, "\n".join(err_msg)
self.mixed_types_list = [model.mixed_types() for model in self.models]

Expand Down Expand Up @@ -149,6 +151,38 @@ def _sort_rcuts_sels(self) -> tuple[list[float], list[int]]:
)
return [p[0] for p in zipped], [p[1] for p in zipped]

def enable_compression(
self,
min_nbor_dist: float,
table_extrapolate: float = 5,
table_stride_1: float = 0.01,
table_stride_2: float = 0.1,
check_frequency: int = -1,
) -> None:
"""Compress model.
Parameters
----------
min_nbor_dist
The nearest distance between atoms
table_extrapolate
The scale of model extrapolation
table_stride_1
The uniform stride of the first table
table_stride_2
The uniform stride of the second table
check_frequency
The overflow check frequency
"""
for model in self.models:
model.enable_compression(
min_nbor_dist,
table_extrapolate,
table_stride_1,
table_stride_2,
check_frequency,
)

def forward_atomic(
self,
extended_coord,
Expand Down Expand Up @@ -180,8 +214,9 @@ def forward_atomic(
result_dict
the result dict, defined by the fitting net output def.
"""
xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlist)
nframes, nloc, nnei = nlist.shape
extended_coord = extended_coord.reshape(nframes, -1, 3)
extended_coord = xp.reshape(extended_coord, (nframes, -1, 3))
sorted_rcuts, sorted_sels = self._sort_rcuts_sels()
nlists = build_multiple_neighbor_list(
extended_coord,
Expand Down Expand Up @@ -212,10 +247,10 @@ def forward_atomic(
aparam,
)["energy"]
)
self.weights = self._compute_weight(extended_coord, extended_atype, nlists_)
weights = self._compute_weight(extended_coord, extended_atype, nlists_)

fit_ret = {
"energy": np.sum(np.stack(ener_list) * np.stack(self.weights), axis=0),
"energy": xp.sum(xp.stack(ener_list) * xp.stack(weights), axis=0),
} # (nframes, nloc, 1)
return fit_ret

Expand Down Expand Up @@ -288,11 +323,12 @@ def _compute_weight(
nlists_: list[np.ndarray],
) -> list[np.ndarray]:
"""This should be a list of user defined weights that matches the number of models to be combined."""
xp = array_api_compat.array_namespace(extended_coord, extended_atype, nlists_)
nmodels = len(self.models)
nframes, nloc, _ = nlists_[0].shape
# the dtype of weights is the interface data type.
return [
np.ones((nframes, nloc, 1), dtype=GLOBAL_NP_FLOAT_PRECISION) / nmodels
xp.ones((nframes, nloc, 1), dtype=GLOBAL_NP_FLOAT_PRECISION) / nmodels
for _ in range(nmodels)
]

Expand Down Expand Up @@ -410,6 +446,7 @@ def _compute_weight(
self.sw_rmax > self.sw_rmin
), "The upper boundary `sw_rmax` must be greater than the lower boundary `sw_rmin`."

xp = array_api_compat.array_namespace(extended_coord, extended_atype)
dp_nlist = nlists_[0]
zbl_nlist = nlists_[1]

Expand All @@ -418,40 +455,40 @@ def _compute_weight(

# use the larger rr based on nlist
nlist_larger = zbl_nlist if zbl_nnei >= dp_nnei else dp_nlist
masked_nlist = np.clip(nlist_larger, 0, None)
masked_nlist = xp.clip(nlist_larger, 0, None)
pairwise_rr = PairTabAtomicModel._get_pairwise_dist(
extended_coord, masked_nlist
)

numerator = np.sum(
np.where(
numerator = xp.sum(
xp.where(
nlist_larger != -1,
pairwise_rr * np.exp(-pairwise_rr / self.smin_alpha),
np.zeros_like(nlist_larger),
pairwise_rr * xp.exp(-pairwise_rr / self.smin_alpha),
xp.zeros_like(nlist_larger),
),
axis=-1,
) # masked nnei will be zero, no need to handle
denominator = np.sum(
np.where(
denominator = xp.sum(
xp.where(
nlist_larger != -1,
np.exp(-pairwise_rr / self.smin_alpha),
np.zeros_like(nlist_larger),
xp.exp(-pairwise_rr / self.smin_alpha),
xp.zeros_like(nlist_larger),
),
axis=-1,
) # handle masked nnei.
with np.errstate(divide="ignore", invalid="ignore"):
sigma = numerator / denominator
u = (sigma - self.sw_rmin) / (self.sw_rmax - self.sw_rmin)
coef = np.zeros_like(u)
coef = xp.zeros_like(u)
left_mask = sigma < self.sw_rmin
mid_mask = (self.sw_rmin <= sigma) & (sigma < self.sw_rmax)
right_mask = sigma >= self.sw_rmax
coef[left_mask] = 1
coef = xp.where(left_mask, xp.ones_like(coef), coef)
with np.errstate(invalid="ignore"):
smooth = -6 * u**5 + 15 * u**4 - 10 * u**3 + 1
coef[mid_mask] = smooth[mid_mask]
coef[right_mask] = 0
coef = xp.where(mid_mask, smooth, coef)
coef = xp.where(right_mask, xp.zeros_like(coef), coef)
# to handle masked atoms
coef = np.where(sigma != 0, coef, np.zeros_like(coef))
coef = xp.where(sigma != 0, coef, xp.zeros_like(coef))
self.zbl_weight = coef
return [1 - np.expand_dims(coef, -1), np.expand_dims(coef, -1)]
return [1 - xp.expand_dims(coef, -1), xp.expand_dims(coef, -1)]
25 changes: 25 additions & 0 deletions deepmd/dpmodel/atomic_model/make_base_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,31 @@ def change_type_map(
) -> None:
pass

def enable_compression(
self,
min_nbor_dist: float,
table_extrapolate: float = 5,
table_stride_1: float = 0.01,
table_stride_2: float = 0.1,
check_frequency: int = -1,
) -> None:
"""Call descriptor enable_compression().
Parameters
----------
min_nbor_dist
The nearest distance between atoms
table_extrapolate
The scale of model extrapolation
table_stride_1
The uniform stride of the first table
table_stride_2
The uniform stride of the second table
check_frequency
The overflow check frequency
"""
raise NotImplementedError("This atomi model doesn't support compression!")

def make_atom_mask(
self,
atype: t_tensor,
Expand Down
Loading

0 comments on commit 811a0b9

Please sign in to comment.