diff --git a/deepmd/dpmodel/utils/neighbor_stat.py b/deepmd/dpmodel/utils/neighbor_stat.py new file mode 100644 index 0000000000..a0f3c02131 --- /dev/null +++ b/deepmd/dpmodel/utils/neighbor_stat.py @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Iterator, + Optional, + Tuple, +) + +import numpy as np + +from deepmd.dpmodel.common import ( + NativeOP, +) +from deepmd.dpmodel.utils.nlist import ( + extend_coord_with_ghosts, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat + + +class NeighborStatOP(NativeOP): + """Class for getting neighbor statics data information. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + distinguish_types : bool, optional + If False, treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + distinguish_types: bool, + ) -> None: + self.rcut = rcut + self.ntypes = ntypes + self.distinguish_types = distinguish_types + + def call( + self, + coord: np.ndarray, + atype: np.ndarray, + cell: Optional[np.ndarray], + ) -> Tuple[float, np.ndarray]: + """Calculate the neareest neighbor distance between atoms, maximum nbor size of + atoms and the output data range of the environment matrix. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + + Returns + ------- + float + The minimal squared distance between two atoms + np.ndarray + The maximal number of neighbors + """ + nframes = coord.shape[0] + coord = coord.reshape(nframes, -1, 3) + nloc = coord.shape[1] + coord = coord.reshape(nframes, nloc * 3) + extend_coord, extend_atype, _ = extend_coord_with_ghosts( + coord, atype, cell, self.rcut + ) + + coord1 = extend_coord.reshape(nframes, -1) + nall = coord1.shape[1] // 3 + coord0 = coord1[:, : nloc * 3] + diff = ( + coord1.reshape([nframes, -1, 3])[:, None, :, :] + - coord0.reshape([nframes, -1, 3])[:, :, None, :] + ) + assert list(diff.shape) == [nframes, nloc, nall, 3] + # remove the diagonal elements + mask = np.eye(nloc, nall, dtype=bool) + diff[:, mask] = np.inf + rr2 = np.sum(np.square(diff), axis=-1) + min_rr2 = np.min(rr2, axis=-1) + # count the number of neighbors + if self.distinguish_types: + mask = rr2 < self.rcut**2 + nnei = np.zeros((nframes, nloc, self.ntypes), dtype=int) + for ii in range(self.ntypes): + nnei[:, :, ii] = np.sum( + mask & (extend_atype == ii)[:, None, :], axis=-1 + ) + else: + mask = rr2 < self.rcut**2 + # virtual type (<0) are not counted + nnei = np.sum(mask & (extend_atype >= 0)[:, None, :], axis=-1).reshape( + nframes, nloc, 1 + ) + max_nnei = np.max(nnei, axis=1) + return min_rr2, max_nnei + + +class NeighborStat(BaseNeighborStat): + """Neighbor statistics using pure NumPy. + + Parameters + ---------- + ntypes : int + The num of atom types + rcut : float + The cut-off radius + one_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + one_type: bool = False, + ) -> None: + super().__init__(ntypes, rcut, one_type) + self.op = NeighborStatOP(ntypes, rcut, not one_type) + + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system + """ + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii] + data_set_data = data_set._load_set(jj) + minrr2, max_nnei = self.op( + data_set_data["coord"], + data_set_data["type"], + data_set_data["box"] if data_set.pbc else None, + ) + yield np.max(max_nnei, axis=0), np.min(minrr2), jj diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py new file mode 100644 index 0000000000..f5ce0f839d --- /dev/null +++ b/deepmd/entrypoints/neighbor_stat.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +from typing import ( + List, +) + +from deepmd.common import ( + expand_sys_str, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) + +log = logging.getLogger(__name__) + + +def neighbor_stat( + *, + system: str, + rcut: float, + type_map: List[str], + one_type: bool = False, + backend: str = "tensorflow", + **kwargs, +): + """Calculate neighbor statistics. + + Parameters + ---------- + system : str + system to stat + rcut : float + cutoff radius + type_map : list[str] + type map + one_type : bool, optional, default=False + treat all types as a single type + backend : str, optional, default="tensorflow" + backend to use + **kwargs + additional arguments + + Examples + -------- + >>> neighbor_stat( + ... system=".", + ... rcut=6.0, + ... type_map=[ + ... "C", + ... "H", + ... "O", + ... "N", + ... "P", + ... "S", + ... "Mg", + ... "Na", + ... "HW", + ... "OW", + ... "mNa", + ... "mCl", + ... "mC", + ... "mH", + ... "mMg", + ... "mN", + ... "mO", + ... "mP", + ... ], + ... ) + min_nbor_dist: 0.6599510670195264 + max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5] + """ + if backend == "tensorflow": + from deepmd.tf.utils.neighbor_stat import ( + NeighborStat, + ) + elif backend == "pytorch": + from deepmd.pt.utils.neighbor_stat import ( + NeighborStat, + ) + elif backend == "numpy": + from deepmd.dpmodel.utils.neighbor_stat import ( + NeighborStat, + ) + else: + raise ValueError(f"Invalid backend {backend}") + all_sys = expand_sys_str(system) + if not len(all_sys): + raise RuntimeError("Did not find valid system") + data = DeepmdDataSystem( + systems=all_sys, + batch_size=1, + test_size=1, + rcut=rcut, + type_map=type_map, + ) + data.get_batch() + nei = NeighborStat(data.get_ntypes(), rcut, one_type=one_type) + min_nbor_dist, max_nbor_size = nei.get_stat(data) + log.info("min_nbor_dist: %f" % min_nbor_dist) + log.info("max_nbor_size: %s" % str(max_nbor_size)) + return min_nbor_dist, max_nbor_size diff --git a/deepmd/main.py b/deepmd/main.py index bede2cf1fe..d6714e1e26 100644 --- a/deepmd/main.py +++ b/deepmd/main.py @@ -603,7 +603,7 @@ def main_parser() -> argparse.ArgumentParser: parser_neighbor_stat = subparsers.add_parser( "neighbor-stat", parents=[parser_log], - help="(Supported backend: TensorFlow) Calculate neighbor statistics", + help="Calculate neighbor statistics", formatter_class=RawTextArgumentDefaultsHelpFormatter, epilog=textwrap.dedent( """\ diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py index bea063a261..29ef8761ff 100644 --- a/deepmd/pt/entrypoints/main.py +++ b/deepmd/pt/entrypoints/main.py @@ -28,6 +28,9 @@ from deepmd.entrypoints.gui import ( start_dpgui, ) +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, +) from deepmd.entrypoints.test import ( test, ) @@ -328,6 +331,8 @@ def main(args: Optional[Union[List[str], argparse.Namespace]] = None): make_model_devi(**dict_args) elif FLAGS.command == "gui": start_dpgui(**dict_args) + elif FLAGS.command == "neighbor-stat": + neighbor_stat(**dict_args) else: raise RuntimeError(f"Invalid command {FLAGS.command}!") diff --git a/deepmd/pt/utils/neighbor_stat.py b/deepmd/pt/utils/neighbor_stat.py new file mode 100644 index 0000000000..b85f3ebcd1 --- /dev/null +++ b/deepmd/pt/utils/neighbor_stat.py @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + Iterator, + Optional, + Tuple, +) + +import numpy as np +import torch + +from deepmd.pt.utils.auto_batch_size import ( + AutoBatchSize, +) +from deepmd.pt.utils.env import ( + DEVICE, +) +from deepmd.pt.utils.nlist import ( + extend_coord_with_ghosts, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat + + +class NeighborStatOP(torch.nn.Module): + """Class for getting neighbor statics data information. + + Parameters + ---------- + ntypes + The num of atom types + rcut + The cut-off radius + distinguish_types : bool, optional + If False, treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + distinguish_types: bool, + ) -> None: + super().__init__() + self.rcut = rcut + self.ntypes = ntypes + self.distinguish_types = distinguish_types + + def forward( + self, + coord: torch.Tensor, + atype: torch.Tensor, + cell: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, torch.Tensor]: + """Calculate the neareest neighbor distance between atoms, maximum nbor size of + atoms and the output data range of the environment matrix. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + + Returns + ------- + torch.Tensor + The minimal squared distance between two atoms, in the shape of (nframes,) + torch.Tensor + The maximal number of neighbors + """ + nframes = coord.shape[0] + coord = coord.view(nframes, -1, 3) + nloc = coord.shape[1] + coord = coord.view(nframes, nloc * 3) + extend_coord, extend_atype, _ = extend_coord_with_ghosts( + coord, atype, cell, self.rcut + ) + + coord1 = extend_coord.reshape(nframes, -1) + nall = coord1.shape[1] // 3 + coord0 = coord1[:, : nloc * 3] + diff = ( + coord1.reshape([nframes, -1, 3])[:, None, :, :] + - coord0.reshape([nframes, -1, 3])[:, :, None, :] + ) + assert list(diff.shape) == [nframes, nloc, nall, 3] + # remove the diagonal elements + mask = torch.eye(nloc, nall, dtype=torch.bool) + diff[:, mask] = torch.inf + rr2 = torch.sum(torch.square(diff), dim=-1) + min_rr2, _ = torch.min(rr2, dim=-1) + # count the number of neighbors + if self.distinguish_types: + mask = rr2 < self.rcut**2 + nnei = torch.zeros((nframes, nloc, self.ntypes), dtype=torch.int32) + for ii in range(self.ntypes): + nnei[:, :, ii] = torch.sum( + mask & extend_atype.eq(ii)[:, None, :], dim=-1 + ) + else: + mask = rr2 < self.rcut**2 + # virtual types (<0) are not counted + nnei = torch.sum(mask & extend_atype.ge(0)[:, None, :], dim=-1).view( + nframes, nloc, 1 + ) + max_nnei, _ = torch.max(nnei, dim=1) + return min_rr2, max_nnei + + +class NeighborStat(BaseNeighborStat): + """Neighbor statistics using pure NumPy. + + Parameters + ---------- + ntypes : int + The num of atom types + rcut : float + The cut-off radius + one_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + one_type: bool = False, + ) -> None: + super().__init__(ntypes, rcut, one_type) + op = NeighborStatOP(ntypes, rcut, not one_type) + self.op = torch.jit.script(op) + self.auto_batch_size = AutoBatchSize() + + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system + """ + for ii in range(len(data.system_dirs)): + for jj in data.data_systems[ii].dirs: + data_set = data.data_systems[ii] + data_set_data = data_set._load_set(jj) + minrr2, max_nnei = self.auto_batch_size.execute_all( + self._execute, + data_set_data["coord"].shape[0], + data_set.get_natoms(), + data_set_data["coord"], + data_set_data["type"], + data_set_data["box"] if data_set.pbc else None, + ) + yield np.max(max_nnei, axis=0), np.min(minrr2), jj + + def _execute( + self, + coord: np.ndarray, + atype: np.ndarray, + cell: Optional[np.ndarray], + ): + """Execute the operation. + + Parameters + ---------- + coord + The coordinates of atoms. + atype + The atom types. + cell + The cell. + """ + minrr2, max_nnei = self.op( + torch.from_numpy(coord).to(DEVICE), + torch.from_numpy(atype).to(DEVICE), + torch.from_numpy(cell).to(DEVICE) if cell is not None else None, + ) + minrr2 = minrr2.detach().cpu().numpy() + max_nnei = max_nnei.detach().cpu().numpy() + return minrr2, max_nnei diff --git a/deepmd/tf/entrypoints/neighbor_stat.py b/deepmd/tf/entrypoints/neighbor_stat.py index e0999bed4a..5d31cdd179 100644 --- a/deepmd/tf/entrypoints/neighbor_stat.py +++ b/deepmd/tf/entrypoints/neighbor_stat.py @@ -1,87 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later -import logging -from typing import ( - List, +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, ) -from deepmd.tf.common import ( - expand_sys_str, -) -from deepmd.tf.utils.data_system import ( - DeepmdDataSystem, -) -from deepmd.tf.utils.neighbor_stat import ( - NeighborStat, -) - -log = logging.getLogger(__name__) - - -def neighbor_stat( - *, - system: str, - rcut: float, - type_map: List[str], - one_type: bool = False, - **kwargs, -): - """Calculate neighbor statistics. - - Parameters - ---------- - system : str - system to stat - rcut : float - cutoff radius - type_map : list[str] - type map - one_type : bool, optional, default=False - treat all types as a single type - **kwargs - additional arguments - - Examples - -------- - >>> neighbor_stat( - ... system=".", - ... rcut=6.0, - ... type_map=[ - ... "C", - ... "H", - ... "O", - ... "N", - ... "P", - ... "S", - ... "Mg", - ... "Na", - ... "HW", - ... "OW", - ... "mNa", - ... "mCl", - ... "mC", - ... "mH", - ... "mMg", - ... "mN", - ... "mO", - ... "mP", - ... ], - ... ) - min_nbor_dist: 0.6599510670195264 - max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5] - """ - all_sys = expand_sys_str(system) - if not len(all_sys): - raise RuntimeError("Did not find valid system") - data = DeepmdDataSystem( - systems=all_sys, - batch_size=1, - test_size=1, - rcut=rcut, - type_map=type_map, - ) - data.get_batch() - nei = NeighborStat(data.get_ntypes(), rcut, one_type=one_type) - min_nbor_dist, max_nbor_size = nei.get_stat(data) - log.info("min_nbor_dist: %f" % min_nbor_dist) - log.info("max_nbor_size: %s" % str(max_nbor_size)) - return min_nbor_dist, max_nbor_size +__all__ = ["neighbor_stat"] diff --git a/deepmd/tf/utils/neighbor_stat.py b/deepmd/tf/utils/neighbor_stat.py index a240b515db..84e71c7a84 100644 --- a/deepmd/tf/utils/neighbor_stat.py +++ b/deepmd/tf/utils/neighbor_stat.py @@ -1,8 +1,7 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging -import math from typing import ( - List, + Iterator, Tuple, ) @@ -20,11 +19,12 @@ from deepmd.tf.utils.parallel_op import ( ParallelOp, ) +from deepmd.utils.neighbor_stat import NeighborStat as BaseNeighborStat log = logging.getLogger(__name__) -class NeighborStat: +class NeighborStat(BaseNeighborStat): """Class for getting training data information. It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix. @@ -46,9 +46,7 @@ def __init__( one_type: bool = False, ) -> None: """Constructor.""" - self.rcut = rcut - self.ntypes = ntypes - self.one_type = one_type + super().__init__(ntypes, rcut, one_type) sub_graph = tf.Graph() def builder(): @@ -91,25 +89,20 @@ def builder(): self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config) - def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]: - """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms. - - Parameters - ---------- - data - Class for manipulating many data systems. It is implemented with the help of DeepmdData. - - Returns - ------- - min_nbor_dist - The nearest distance between neighbor atoms - max_nbor_size - A list with ntypes integers, denotes the actual achieved max sel + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + np.ndarray + The maximal number of neighbors + float + The squared minimal distance between two atoms + str + The directory of the data system """ - self.min_nbor_dist = 100.0 - self.max_nbor_size = [0] - if not self.one_type: - self.max_nbor_size *= self.ntypes def feed(): for ii in range(len(data.system_dirs)): @@ -129,25 +122,4 @@ def feed(): "dir": str(jj), } - for mn, dt, jj in self.p.generate(self.sub_sess, feed()): - if np.isinf(dt): - log.warning( - "Atoms with no neighbors found in %s. Please make sure it's what you expected." - % jj - ) - if dt < self.min_nbor_dist: - if math.isclose(dt, 0.0, rel_tol=1e-6): - # it's unexpected that the distance between two atoms is zero - # zero distance will cause nan (#874) - raise RuntimeError( - "Some atoms are overlapping in %s. Please check your" - " training data to remove duplicated atoms." % jj - ) - self.min_nbor_dist = dt - self.max_nbor_size = np.maximum(mn, self.max_nbor_size) - - # do sqrt in the final - self.min_nbor_dist = math.sqrt(self.min_nbor_dist) - log.info("training data with min nbor dist: " + str(self.min_nbor_dist)) - log.info("training data with max nbor size: " + str(self.max_nbor_size)) - return self.min_nbor_dist, self.max_nbor_size + return self.p.generate(self.sub_sess, feed()) diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py new file mode 100644 index 0000000000..c6327a705e --- /dev/null +++ b/deepmd/utils/neighbor_stat.py @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import logging +import math +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Iterator, + Tuple, +) + +import numpy as np + +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) + +log = logging.getLogger(__name__) + + +class NeighborStat(ABC): + """Abstract base class for getting training data information. + + It loads data from DeepmdData object, and measures the data info, including + neareest nbor distance between atoms, max nbor size of atoms and the output + data range of the environment matrix. + + Parameters + ---------- + ntypes : int + The num of atom types + rcut : float + The cut-off radius + one_type : bool, optional, default=False + Treat all types as a single type. + """ + + def __init__( + self, + ntypes: int, + rcut: float, + one_type: bool = False, + ) -> None: + self.rcut = rcut + self.ntypes = ntypes + self.one_type = one_type + + def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, np.ndarray]: + """Get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms. + + Parameters + ---------- + data + Class for manipulating many data systems. It is implemented with the help of DeepmdData. + + Returns + ------- + min_nbor_dist + The nearest distance between neighbor atoms + max_nbor_size + An array with ntypes integers, denotes the actual achieved max sel + """ + min_nbor_dist = 100.0 + max_nbor_size = np.zeros(1 if self.one_type else self.ntypes, dtype=int) + + for mn, dt, jj in self.iterator(data): + if np.isinf(dt): + log.warning( + "Atoms with no neighbors found in %s. Please make sure it's what you expected." + % jj + ) + if dt < min_nbor_dist: + if math.isclose(dt, 0.0, rel_tol=1e-6): + # it's unexpected that the distance between two atoms is zero + # zero distance will cause nan (#874) + raise RuntimeError( + "Some atoms are overlapping in %s. Please check your" + " training data to remove duplicated atoms." % jj + ) + min_nbor_dist = dt + max_nbor_size = np.maximum(mn, max_nbor_size) + + # do sqrt in the final + min_nbor_dist = math.sqrt(min_nbor_dist) + log.info("training data with min nbor dist: " + str(min_nbor_dist)) + log.info("training data with max nbor size: " + str(max_nbor_size)) + return min_nbor_dist, max_nbor_size + + @abstractmethod + def iterator( + self, data: DeepmdDataSystem + ) -> Iterator[Tuple[np.ndarray, float, str]]: + """Abstract method for producing data. + + Yields + ------ + mn : np.ndarray + The maximal number of neighbors + dt : float + The squared minimal distance between two atoms + jj : str + The directory of the data system + """ diff --git a/source/tests/common/dpmodel/test_neighbor_stat.py b/source/tests/common/dpmodel/test_neighbor_stat.py new file mode 100644 index 0000000000..aa2ed5b3db --- /dev/null +++ b/source/tests/common/dpmodel/test_neighbor_stat.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import shutil +import unittest + +import dpdata +import numpy as np + +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, +) + + +def gen_sys(nframes): + natoms = 1000 + data = {} + X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T # + 0.1 + data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0) + data["forces"] = np.random.random([nframes, natoms, 3]) + data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape( + 1, 3, 3 + ) + data["energies"] = np.random.random([nframes, 1]) + data["atom_names"] = ["TYPE"] + data["atom_numbs"] = [27] + data["atom_types"] = np.repeat(0, 27) + return data + + +class TestNeighborStat(unittest.TestCase): + def setUp(self): + data0 = gen_sys(1) + sys0 = dpdata.LabeledSystem() + sys0.data = data0 + sys0.to_deepmd_npy("system_0", set_size=1) + + def tearDown(self): + shutil.rmtree("system_0") + + def test_neighbor_stat(self): + for rcut in (0.0, 1.0, 2.0, 4.0): + for one_type in (True, False): + with self.subTest(rcut=rcut, one_type=one_type): + rcut += 1e-3 # prevent numerical errors + min_nbor_dist, max_nbor_size = neighbor_stat( + system="system_0", + rcut=rcut, + type_map=["TYPE"], + one_type=one_type, + backend="numpy", + ) + upper = np.ceil(rcut) + 1 + X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T + # distance to (0,0,0) + distance = np.linalg.norm(positions, axis=1) + expected_neighbors = np.count_nonzero( + np.logical_and(distance > 0, distance <= rcut) + ) + self.assertAlmostEqual(min_nbor_dist, 1.0, 6) + self.assertEqual(max_nbor_size, [expected_neighbors]) diff --git a/source/tests/pt/test_neighbor_stat.py b/source/tests/pt/test_neighbor_stat.py new file mode 100644 index 0000000000..7e4be0909e --- /dev/null +++ b/source/tests/pt/test_neighbor_stat.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import shutil +import unittest + +import dpdata +import numpy as np + +from deepmd.entrypoints.neighbor_stat import ( + neighbor_stat, +) + + +def gen_sys(nframes): + natoms = 1000 + data = {} + X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T # + 0.1 + data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0) + data["forces"] = np.random.random([nframes, natoms, 3]) + data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape( + 1, 3, 3 + ) + data["energies"] = np.random.random([nframes, 1]) + data["atom_names"] = ["TYPE"] + data["atom_numbs"] = [27] + data["atom_types"] = np.repeat(0, 27) + return data + + +class TestNeighborStat(unittest.TestCase): + def setUp(self): + data0 = gen_sys(1) + sys0 = dpdata.LabeledSystem() + sys0.data = data0 + sys0.to_deepmd_npy("system_0", set_size=1) + + def tearDown(self): + shutil.rmtree("system_0") + + def test_neighbor_stat(self): + for rcut in (0.0, 1.0, 2.0, 4.0): + for one_type in (True, False): + with self.subTest(rcut=rcut, one_type=one_type): + rcut += 1e-3 # prevent numerical errors + min_nbor_dist, max_nbor_size = neighbor_stat( + system="system_0", + rcut=rcut, + type_map=["TYPE"], + one_type=one_type, + backend="pytorch", + ) + upper = np.ceil(rcut) + 1 + X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T + # distance to (0,0,0) + distance = np.linalg.norm(positions, axis=1) + expected_neighbors = np.count_nonzero( + np.logical_and(distance > 0, distance <= rcut) + ) + self.assertAlmostEqual(min_nbor_dist, 1.0, 6) + self.assertEqual(max_nbor_size, [expected_neighbors])