diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index 6c1331ec1d..722375fc68 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -112,7 +112,7 @@ def distinguish_types(self) -> bool: """Returns if the descriptor requires a neighbor list that distinguish different atomic types or not. """ - return False + return self.se_atten.distinguish_types() @property def dim_out(self): diff --git a/deepmd/pt/model/descriptor/hybrid.py b/deepmd/pt/model/descriptor/hybrid.py index fb7e374ede..e944d1277b 100644 --- a/deepmd/pt/model/descriptor/hybrid.py +++ b/deepmd/pt/model/descriptor/hybrid.py @@ -103,6 +103,14 @@ def get_dim_in(self) -> int: def get_dim_emb(self): return self.dim_emb + def distinguish_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return True in [ + descriptor.distinguish_types() for descriptor in self.descriptor_list + ] + @property def dim_out(self): """Returns the output dimension of this descriptor.""" diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index 0a302b6f92..ce40909352 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -21,7 +21,7 @@ env, ) from deepmd.pt.utils.nlist import ( - build_neighbor_list, + process_input, ) from deepmd.pt.utils.utils import ( get_activation_fn, @@ -178,6 +178,12 @@ def get_dim_emb(self) -> int: """Returns the embedding dimension g2.""" return self.g2_dim + def distinguish_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return False + @property def dim_out(self): """Returns the output dimension of this descriptor.""" @@ -272,28 +278,24 @@ def compute_input_stats(self, merged): suma2 = [] mixed_type = "real_natoms_vec" in merged[0] for system in merged: - index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3) - extended_coord = torch.gather(system["coord"], dim=1, index=index) - extended_coord = extended_coord - system["shift"] - index = system["mapping"] - extended_atype = torch.gather(system["atype"], dim=1, index=index) - nloc = system["atype"].shape[-1] - ####################################################### - # dirty hack here! the interface of dataload should be - # redesigned to support descriptors like dpa2 - ####################################################### - nlist = build_neighbor_list( - extended_coord, - extended_atype, - nloc, - self.rcut, + coord, atype, box, natoms = ( + system["coord"], + system["atype"], + system["box"], + system["natoms"], + ) + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, + self.get_rcut(), self.get_sel(), - distinguish_types=False, + distinguish_types=self.distinguish_types(), + box=box, ) env_mat, _, _ = prod_env_mat_se_a( extended_coord, nlist, - system["atype"], + atype, self.mean, self.stddev, self.rcut, @@ -301,15 +303,16 @@ def compute_input_stats(self, merged): ) if not mixed_type: sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt( - env_mat.detach().cpu().numpy(), ndescrpt, system["natoms"] + env_mat.detach().cpu().numpy(), ndescrpt, natoms ) else: + real_natoms_vec = system["real_natoms_vec"] sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt( env_mat.detach().cpu().numpy(), ndescrpt, - system["real_natoms_vec"], + real_natoms_vec, mixed_type=mixed_type, - real_atype=system["atype"].detach().cpu().numpy(), + real_atype=atype.detach().cpu().numpy(), ) sumr.append(sysr) suma.append(sysa) diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index 82e7e5185a..7d8a96e738 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -37,6 +37,9 @@ from deepmd.pt.model.network.network import ( TypeFilter, ) +from deepmd.pt.utils.nlist import ( + process_input, +) log = logging.getLogger(__name__) @@ -100,7 +103,7 @@ def distinguish_types(self): """Returns if the descriptor requires a neighbor list that distinguish different atomic types or not. """ - return True + return self.sea.distinguish_types() @property def dim_out(self): @@ -347,6 +350,12 @@ def get_dim_in(self) -> int: """Returns the input dimension.""" return self.dim_in + def distinguish_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return True + @property def dim_out(self): """Returns the output dimension of this descriptor.""" @@ -381,20 +390,31 @@ def compute_input_stats(self, merged): sumr2 = [] suma2 = [] for system in merged: - index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3) - extended_coord = torch.gather(system["coord"], dim=1, index=index) - extended_coord = extended_coord - system["shift"] + coord, atype, box, natoms = ( + system["coord"], + system["atype"], + system["box"], + system["natoms"], + ) + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, + self.get_rcut(), + self.get_sel(), + distinguish_types=self.distinguish_types(), + box=box, + ) env_mat, _, _ = prod_env_mat_se_a( extended_coord, - system["nlist"], - system["atype"], + nlist, + atype, self.mean, self.stddev, self.rcut, self.rcut_smth, ) sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt( - env_mat.detach().cpu().numpy(), self.ndescrpt, system["natoms"] + env_mat.detach().cpu().numpy(), self.ndescrpt, natoms ) sumr.append(sysr) suma.append(sysa) diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index 3469d43e40..875d235e80 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -21,6 +21,9 @@ from deepmd.pt.utils import ( env, ) +from deepmd.pt.utils.nlist import ( + process_input, +) @DescriptorBlock.register("se_atten") @@ -161,6 +164,12 @@ def get_dim_emb(self) -> int: """Returns the output dimension of embedding.""" return self.filter_neuron[-1] + def distinguish_types(self) -> bool: + """Returns if the descriptor requires a neighbor list that distinguish different + atomic types or not. + """ + return False + @property def dim_out(self): """Returns the output dimension of this descriptor.""" @@ -185,13 +194,24 @@ def compute_input_stats(self, merged): suma2 = [] mixed_type = "real_natoms_vec" in merged[0] for system in merged: - index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3) - extended_coord = torch.gather(system["coord"], dim=1, index=index) - extended_coord = extended_coord - system["shift"] + coord, atype, box, natoms = ( + system["coord"], + system["atype"], + system["box"], + system["natoms"], + ) + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, + self.get_rcut(), + self.get_sel(), + distinguish_types=self.distinguish_types(), + box=box, + ) env_mat, _, _ = prod_env_mat_se_a( extended_coord, - system["nlist"], - system["atype"], + nlist, + atype, self.mean, self.stddev, self.rcut, @@ -199,15 +219,16 @@ def compute_input_stats(self, merged): ) if not mixed_type: sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt( - env_mat.detach().cpu().numpy(), self.ndescrpt, system["natoms"] + env_mat.detach().cpu().numpy(), self.ndescrpt, natoms ) else: + real_natoms_vec = system["real_natoms_vec"] sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt( env_mat.detach().cpu().numpy(), self.ndescrpt, - system["real_natoms_vec"], + real_natoms_vec, mixed_type=mixed_type, - real_atype=system["atype"].detach().cpu().numpy(), + real_atype=atype.detach().cpu().numpy(), ) sumr.append(sysr) suma.append(sysa) diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index 1e76c6a468..8630a03804 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -14,12 +14,8 @@ fit_output_to_model_output, ) from deepmd.pt.utils.nlist import ( - build_neighbor_list, - extend_coord_with_ghosts, nlist_distinguish_types, -) -from deepmd.pt.utils.region import ( - normalize_coord, + process_input, ) @@ -92,26 +88,14 @@ def forward_common( The keys are defined by the `ModelOutputDef`. """ - nframes, nloc = atype.shape[:2] - if box is not None: - coord_normalized = normalize_coord( - coord.view(nframes, nloc, 3), - box.reshape(nframes, 3, 3), - ) - else: - coord_normalized = coord.clone() - extended_coord, extended_atype, mapping = extend_coord_with_ghosts( - coord_normalized, atype, box, self.get_rcut() - ) - nlist = build_neighbor_list( - extended_coord, - extended_atype, - nloc, + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, self.get_rcut(), self.get_sel(), distinguish_types=self.distinguish_types(), + box=box, ) - extended_coord = extended_coord.view(nframes, -1, 3) model_predict_lower = self.forward_common_lower( extended_coord, extended_atype, diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 0ec43f5a75..7f8cf4eb3c 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -267,26 +267,7 @@ def collate_batch(batch): example = batch[0] result = example.copy() for key in example.keys(): - if key == "shift" or key == "mapping": - natoms_extended = max([d[key].shape[0] for d in batch]) - n_frames = len(batch) - list = [] - for x in range(n_frames): - list.append(batch[x][key]) - if key == "shift": - result[key] = torch.zeros( - (n_frames, natoms_extended, 3), - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - ) - else: - result[key] = torch.zeros( - (n_frames, natoms_extended), - dtype=torch.long, - ) - for i in range(len(batch)): - natoms_tmp = list[i].shape[0] - result[key][i, :natoms_tmp] = list[i] - elif "find_" in key: + if "find_" in key: result[key] = batch[0][key] else: if batch[0][key] is None: diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index aca4a9ce5b..60055ebda9 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -9,7 +9,6 @@ import h5py import numpy as np import torch -import torch.distributed as dist from torch.utils.data import ( Dataset, ) @@ -189,85 +188,6 @@ def add( "high_prec": high_prec, } - # deprecated TODO - def get_batch_for_train(self, batch_size: int): - """Get a batch of data with at most `batch_size` frames. The frames are randomly picked from the data system. - - Args: - - batch_size: Frame count. - """ - if not hasattr(self, "_frames"): - self.set_size = 0 - self._set_count = 0 - self._iterator = 0 - if batch_size == "auto": - batch_size = -(-32 // self._natoms) - if self._iterator + batch_size > self.set_size: - set_idx = self._set_count % len(self._dirs) - if self.sets[set_idx] is None: - frames = self._load_set(self._dirs[set_idx]) - frames = self.preprocess(frames) - cnt = 0 - for item in self.sets: - if item is not None: - cnt += 1 - if cnt < env.CACHE_PER_SYS: - self.sets[set_idx] = frames - else: - frames = self.sets[set_idx] - self._frames = frames - self._shuffle_data() - if dist.is_initialized(): - world_size = dist.get_world_size() - rank = dist.get_rank() - ssize = self._frames["coord"].shape[0] - subsize = ssize // world_size - self._iterator = rank * subsize - self.set_size = min((rank + 1) * subsize, ssize) - else: - self.set_size = self._frames["coord"].shape[0] - self._iterator = 0 - self._set_count += 1 - iterator = min(self._iterator + batch_size, self.set_size) - idx = np.arange(self._iterator, iterator) - self._iterator += batch_size - return self._get_subdata(idx) - - # deprecated TODO - def get_batch(self, batch_size: int): - """Get a batch of data with at most `batch_size` frames. The frames are randomly picked from the data system. - Args: - - batch_size: Frame count. - """ - if not hasattr(self, "_frames"): - self.set_size = 0 - self._set_count = 0 - self._iterator = 0 - if batch_size == "auto": - batch_size = -(-32 // self._natoms) - if self._iterator + batch_size > self.set_size: - set_idx = self._set_count % len(self._dirs) - if self.sets[set_idx] is None: - frames = self._load_set(self._dirs[set_idx]) - frames = self.preprocess(frames) - cnt = 0 - for item in self.sets: - if item is not None: - cnt += 1 - if cnt < env.CACHE_PER_SYS: - self.sets[set_idx] = frames - else: - frames = self.sets[set_idx] - self._frames = frames - self._shuffle_data() - self.set_size = self._frames["coord"].shape[0] - self._iterator = 0 - self._set_count += 1 - iterator = min(self._iterator + batch_size, self.set_size) - idx = np.arange(self._iterator, iterator) - self._iterator += batch_size - return self._get_subdata(idx) - def get_ntypes(self): """Number of atom types in the system.""" if self._type_map is not None: @@ -470,63 +390,6 @@ def _load_data( data = np.zeros([nframes, ndof]).astype(env.GLOBAL_NP_FLOAT_PRECISION) return np.float32(0.0), data - # deprecated TODO - def preprocess(self, batch): - n_frames = batch["coord"].shape[0] - for kk in self._data_dict.keys(): - if "find_" in kk: - pass - else: - batch[kk] = torch.tensor(batch[kk], dtype=env.GLOBAL_PT_FLOAT_PRECISION) - if self._data_dict[kk]["atomic"]: - batch[kk] = batch[kk].view( - n_frames, -1, self._data_dict[kk]["ndof"] - ) - - for kk in ["type", "real_natoms_vec"]: - if kk in batch.keys(): - batch[kk] = torch.tensor(batch[kk], dtype=torch.long) - batch["atype"] = batch.pop("type") - - keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"] - coord = batch["coord"] - atype = batch["atype"] - box = batch["box"] - rcut = self.rcut - sec = self.sec - assert batch["atype"].max() < len(self._type_map) - nlist, nlist_loc, nlist_type, shift, mapping = [], [], [], [], [] - - for sid in range(n_frames): - region = Region3D(box[sid]) - nloc = atype[sid].shape[0] - _coord = normalize_coord(coord[sid], region, nloc) - coord[sid] = _coord - a, b, c, d, e = make_env_mat( - _coord, atype[sid], region, rcut, sec, type_split=self.type_split - ) - nlist.append(a) - nlist_loc.append(b) - nlist_type.append(c) - shift.append(d) - mapping.append(e) - nlist = torch.stack(nlist) - nlist_loc = torch.stack(nlist_loc) - nlist_type = torch.stack(nlist_type) - batch["nlist"] = nlist - batch["nlist_loc"] = nlist_loc - batch["nlist_type"] = nlist_type - natoms_extended = max([item.shape[0] for item in shift]) - batch["shift"] = torch.zeros( - (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION - ) - batch["mapping"] = torch.zeros((n_frames, natoms_extended), dtype=torch.long) - for i in range(len(shift)): - natoms_tmp = shift[i].shape[0] - batch["shift"][i, :natoms_tmp] = shift[i] - batch["mapping"][i, :natoms_tmp] = mapping[i] - return batch - def _shuffle_data(self): nframes = self._frames["coord"].shape[0] idx = np.arange(nframes) @@ -563,46 +426,21 @@ def single_preprocess(self, batch, sid): for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): batch[kk] = torch.tensor(batch[kk][sid], dtype=torch.long) - clean_coord = batch.pop("coord") - clean_type = batch.pop("type") - nloc = clean_type.shape[0] + batch["atype"] = batch["type"] rcut = self.rcut sec = self.sec - nlist, nlist_loc, nlist_type, shift, mapping = [], [], [], [], [] - if self.pbc: - box = batch["box"] - region = Region3D(box) - else: - box = None + if not self.pbc: batch["box"] = None - region = None if self.noise_settings is None: - batch["atype"] = clean_type - batch["coord"] = clean_coord - coord = clean_coord - atype = batch["atype"] + return batch + else: # TODO need to clean up this method! if self.pbc: - _coord = normalize_coord(coord, region, nloc) - + region = Region3D(batch["box"]) else: - _coord = coord.clone() - batch["coord"] = _coord - nlist, nlist_loc, nlist_type, shift, mapping = make_env_mat( - _coord, - atype, - region, - rcut, - sec, - pbc=self.pbc, - type_split=self.type_split, - ) - batch["nlist"] = nlist - batch["nlist_loc"] = nlist_loc - batch["nlist_type"] = nlist_type - batch["shift"] = shift - batch["mapping"] = mapping - return batch - else: + region = None + clean_coord = batch.pop("coord") + clean_type = batch.pop("type") + nloc = clean_type.shape[0] batch["clean_type"] = clean_type if self.pbc: _clean_coord = normalize_coord(clean_coord, region, nloc) @@ -678,7 +516,7 @@ def single_preprocess(self, batch, sid): else: _coord = noised_coord.clone() try: - nlist, nlist_loc, nlist_type, shift, mapping = make_env_mat( + _ = make_env_mat( _coord, masked_type, region, @@ -694,13 +532,8 @@ def single_preprocess(self, batch, sid): f"Add noise times beyond max tries {self.max_fail_num}!" ) continue - batch["atype"] = masked_type + batch["type"] = masked_type batch["coord"] = noised_coord - batch["nlist"] = nlist - batch["nlist_loc"] = nlist_loc - batch["nlist_type"] = nlist_type - batch["shift"] = shift - batch["mapping"] = mapping return batch def _get_item(self, index): @@ -783,104 +616,3 @@ def __getitem__(self, index): b_data = self._data_system._get_item(index) b_data["natoms"] = torch.tensor(self._natoms_vec) return b_data - - -# deprecated TODO -class DeepmdDataSet(Dataset): - def __init__( - self, - systems: List[str], - batch_size: int, - type_map: List[str], - rcut=None, - sel=None, - weight=None, - type_split=True, - ): - """Construct DeePMD-style dataset containing frames cross different systems. - - Args: - - systems: Paths to systems. - - batch_size: Max frame count in a batch. - - type_map: Atom types. - """ - self._batch_size = batch_size - self._type_map = type_map - if sel is not None: - if isinstance(sel, int): - sel = [sel] - sec = torch.cumsum(torch.tensor(sel), dim=0) - if isinstance(systems, str): - with h5py.File(systems) as file: - systems = [os.path.join(systems, item) for item in file.keys()] - self._data_systems = [ - DeepmdDataSystem( - ii, rcut, sec, type_map=self._type_map, type_split=type_split - ) - for ii in systems - ] - # check mix_type format - error_format_msg = ( - "if one of the system is of mixed_type format, " - "then all of the systems in this dataset should be of mixed_type format!" - ) - self.mixed_type = self._data_systems[0].mixed_type - for sys_item in self._data_systems[1:]: - assert sys_item.mixed_type == self.mixed_type, error_format_msg - - if weight is None: - - def weight(name, sys): - return sys.nframes - - self.probs = [ - weight(item, self._data_systems[i]) for i, item in enumerate(systems) - ] - self.probs = np.array(self.probs, dtype=float) - self.probs /= self.probs.sum() - self._ntypes = max([ii.get_ntypes() for ii in self._data_systems]) - self._natoms_vec = [ - ii.get_natoms_vec(self._ntypes) for ii in self._data_systems - ] - self.cache = [{} for _ in self._data_systems] - - @property - def nsystems(self): - return len(self._data_systems) - - def __len__(self): - return self.nsystems - - def __getitem__(self, index=None): - """Get a batch of frames from the selected system.""" - if index is None: - index = dp_random.choice(np.arange(self.nsystems), p=self.probs) - b_data = self._data_systems[index].get_batch(self._batch_size) - b_data["natoms"] = torch.tensor(self._natoms_vec[index]) - batch_size = b_data["coord"].shape[0] - b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) - return b_data - - # deprecated TODO - def get_training_batch(self, index=None): - """Get a batch of frames from the selected system.""" - if index is None: - index = dp_random.choice(np.arange(self.nsystems), p=self.probs) - b_data = self._data_systems[index].get_batch_for_train(self._batch_size) - b_data["natoms"] = torch.tensor(self._natoms_vec[index]) - batch_size = b_data["coord"].shape[0] - b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) - return b_data - - def get_batch(self, sys_idx=None): - """TF-compatible batch for testing.""" - pt_batch = self[sys_idx] - np_batch = {} - for key in ["coord", "box", "force", "energy", "virial", "atype", "natoms"]: - if key in pt_batch.keys(): - np_batch[key] = pt_batch[key].cpu().numpy() - batch_size = pt_batch["coord"].shape[0] - np_batch["coord"] = np_batch["coord"].reshape(batch_size, -1) - np_batch["natoms"] = np_batch["natoms"][0] - np_batch["force"] = np_batch["force"].reshape(batch_size, -1) - return np_batch, pt_batch diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py index fdb2627f04..9d43ca8191 100644 --- a/deepmd/pt/utils/nlist.py +++ b/deepmd/pt/utils/nlist.py @@ -12,10 +12,42 @@ env, ) from deepmd.pt.utils.region import ( + normalize_coord, to_face_distance, ) +def process_input( + coord, + atype, + rcut: float, + sel: List[int], + distinguish_types: bool = False, + box: Optional[torch.Tensor] = None, +): + nframes, nloc = atype.shape[:2] + if box is not None: + coord_normalized = normalize_coord( + coord.view(nframes, nloc, 3), + box.reshape(nframes, 3, 3), + ) + else: + coord_normalized = coord.clone() + extended_coord, extended_atype, mapping = extend_coord_with_ghosts( + coord_normalized, atype, box, rcut + ) + nlist = build_neighbor_list( + extended_coord, + extended_atype, + nloc, + rcut, + sel, + distinguish_types=distinguish_types, + ) + extended_coord = extended_coord.view(nframes, -1, 3) + return extended_coord, extended_atype, mapping, nlist + + def build_neighbor_list( coord1: torch.Tensor, atype: torch.Tensor, diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 932ba9a409..edcdf742db 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -5,10 +5,6 @@ import numpy as np import torch -from deepmd.pt.utils import ( - env, -) - log = logging.getLogger(__name__) @@ -31,11 +27,6 @@ def make_stat_input(datasets, dataloaders, nbatches): "atype", "box", "natoms", - "mapping", - "nlist", - "nlist_loc", - "nlist_type", - "shift", ] if datasets[0].mixed_type: keys.append("real_natoms_vec") @@ -53,25 +44,6 @@ def make_stat_input(datasets, dataloaders, nbatches): if dd in keys: sys_stat[dd].append(stat_data[dd]) for key in keys: - if key == "mapping" or key == "shift": - extend = max(d.shape[1] for d in sys_stat[key]) - for jj in range(len(sys_stat[key])): - l = [] - item = sys_stat[key][jj] - for ii in range(item.shape[0]): - l.append(item[ii]) - n_frames = len(item) - if key == "shift": - shape = torch.zeros( - (n_frames, extend, 3), - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - ) - else: - shape = torch.zeros((n_frames, extend), dtype=torch.long) - for i in range(len(item)): - natoms_tmp = l[i].shape[0] - shape[i, :natoms_tmp] = l[i] - sys_stat[key][jj] = shape if not isinstance(sys_stat[key][0], list): if sys_stat[key][0] is None: sys_stat[key] = None diff --git a/source/tests/pt/model/test_descriptor.py b/source/tests/pt/model/test_descriptor.py index 2dd996349b..aa5b6017bf 100644 --- a/source/tests/pt/model/test_descriptor.py +++ b/source/tests/pt/model/test_descriptor.py @@ -21,13 +21,16 @@ env, ) from deepmd.pt.utils.dataset import ( - DeepmdDataSet, + DeepmdDataSetForLoader, ) from deepmd.pt.utils.env import ( DEVICE, GLOBAL_NP_FLOAT_PRECISION, GLOBAL_PT_FLOAT_PRECISION, ) +from deepmd.pt.utils.nlist import ( + process_input, +) from deepmd.tf.common import ( expand_sys_str, ) @@ -35,6 +38,10 @@ op_module, ) +from .test_embedding_net import ( + get_single_batch, +) + CUR_DIR = os.path.dirname(__file__) @@ -103,10 +110,14 @@ def setUp(self): self.systems = config["training"]["validation_data"]["systems"] if isinstance(self.systems, str): self.systems = expand_sys_str(self.systems) - ds = DeepmdDataSet( - self.systems, self.bsz, model_config["type_map"], self.rcut, self.sel + ds = DeepmdDataSetForLoader( + self.systems[0], + model_config["type_map"], + self.rcut, + self.sel, + type_split=True, ) - self.np_batch, self.pt_batch = ds.get_batch() + self.np_batch, self.pt_batch = get_single_batch(ds) self.sec = np.cumsum(self.sel) self.ntypes = len(self.sel) self.nnei = sum(self.sel) @@ -122,7 +133,7 @@ def test_consistency(self): dtype=GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE, ) - base_d, base_force, nlist = base_se_a( + base_d, base_force, base_nlist = base_se_a( rcut=self.rcut, rcut_smth=self.rcut_smth, sel=self.sel, @@ -132,14 +143,20 @@ def test_consistency(self): ) pt_coord = self.pt_batch["coord"].to(env.DEVICE) + atype = self.pt_batch["atype"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) - extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.pt_batch["shift"].to(env.DEVICE) - my_d, _, _ = prod_env_mat_se_a( - extended_coord.to(DEVICE), - self.pt_batch["nlist"].to(env.DEVICE), + extended_coord, extended_atype, mapping, nlist = process_input( + pt_coord, self.pt_batch["atype"].to(env.DEVICE), + self.rcut, + self.sel, + distinguish_types=True, + box=self.pt_batch["box"].to(env.DEVICE), + ) + my_d, _, _ = prod_env_mat_se_a( + extended_coord, + nlist, + atype, avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE), std_ones.reshape([-1, self.nnei, 4]).to(DEVICE), self.rcut, @@ -151,16 +168,16 @@ def test_consistency(self): base_force = base_force.reshape(bsz, -1, 3) base_d = base_d.reshape(bsz, -1, self.nnei, 4) my_d = my_d.view(bsz, -1, self.nnei, 4).cpu().detach().numpy() - nlist = nlist.reshape(bsz, -1, self.nnei) + base_nlist = base_nlist.reshape(bsz, -1, self.nnei) - mapping = self.pt_batch["mapping"].cpu() - my_nlist = self.pt_batch["nlist"].view(bsz, -1).cpu() + mapping = mapping.cpu() + my_nlist = nlist.view(bsz, -1).cpu() mask = my_nlist == -1 my_nlist = my_nlist * ~mask my_nlist = torch.gather(mapping, dim=-1, index=my_nlist) my_nlist = my_nlist * ~mask - mask.long() my_nlist = my_nlist.cpu().view(bsz, -1, self.nnei).numpy() - self.assertTrue(np.allclose(nlist, my_nlist)) + self.assertTrue(np.allclose(base_nlist, my_nlist)) self.assertTrue(np.allclose(np.mean(base_d, axis=2), np.mean(my_d, axis=2))) self.assertTrue(np.allclose(np.std(base_d, axis=2), np.std(my_d, axis=2))) # descriptors may be different when there are multiple neighbors in the same distance diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py index 21a43803c9..57adef0784 100644 --- a/source/tests/pt/model/test_descriptor_dpa1.py +++ b/source/tests/pt/model/test_descriptor_dpa1.py @@ -19,11 +19,7 @@ env, ) from deepmd.pt.utils.nlist import ( - build_neighbor_list, - extend_coord_with_ghosts, -) -from deepmd.pt.utils.region import ( - normalize_coord, + process_input, ) dtype = torch.float64 @@ -245,20 +241,9 @@ def test_descriptor_block(self): **dparams, ).to(env.DEVICE) des.load_state_dict(torch.load(self.file_model_param)) - rcut = dparams["rcut"] - nsel = dparams["sel"] coord = self.coord atype = self.atype box = self.cell - nf, nloc = coord.shape[:2] - coord_normalized = normalize_coord(coord, box.reshape(-1, 3, 3)) - extended_coord, extended_atype, mapping = extend_coord_with_ghosts( - coord_normalized, atype, box, rcut - ) - # single nlist - nlist = build_neighbor_list( - extended_coord, extended_atype, nloc, rcut, nsel, distinguish_types=False - ) # handel type_embedding type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) @@ -266,6 +251,14 @@ def test_descriptor_block(self): ## to save model parameters # torch.save(des.state_dict(), 'model_weights.pth') # torch.save(type_embedding.state_dict(), 'model_weights.pth') + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, + des.get_rcut(), + des.get_sel(), + distinguish_types=des.distinguish_types(), + box=box, + ) descriptor, env_mat, diff, rot_mat, sw = des( nlist, extended_coord, @@ -307,18 +300,13 @@ def test_descriptor(self): coord = self.coord atype = self.atype box = self.cell - nf, nloc = coord.shape[:2] - coord_normalized = normalize_coord(coord, box.reshape(-1, 3, 3)) - extended_coord, extended_atype, mapping = extend_coord_with_ghosts( - coord_normalized, atype, box, des.get_rcut() - ) - nlist = build_neighbor_list( - extended_coord, - extended_atype, - nloc, + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, des.get_rcut(), - des.get_nsel(), - distinguish_types=False, + des.get_sel(), + distinguish_types=des.distinguish_types(), + box=box, ) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, diff --git a/source/tests/pt/model/test_descriptor_dpa2.py b/source/tests/pt/model/test_descriptor_dpa2.py index e614e64c2f..c80c92f3d2 100644 --- a/source/tests/pt/model/test_descriptor_dpa2.py +++ b/source/tests/pt/model/test_descriptor_dpa2.py @@ -19,11 +19,9 @@ env, ) from deepmd.pt.utils.nlist import ( - build_neighbor_list, - extend_coord_with_ghosts, -) -from deepmd.pt.utils.region import ( - normalize_coord, + build_multiple_neighbor_list, + get_multiple_nlist_key, + process_input, ) dtype = torch.float64 @@ -114,6 +112,7 @@ def setUp(self): self.file_model_param = Path(CUR_DIR) / "models" / "dpa2.pth" self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pth" + # TODO This test for hybrid descriptor should be removed! def test_descriptor_hyb(self): # torch.manual_seed(0) model_hybrid_dpa2 = self.model_json @@ -129,34 +128,13 @@ def test_descriptor_hyb(self): # type_embd of repformer is removed model_dict.pop("descriptor_list.1.type_embd.embedding.weight") des.load_state_dict(model_dict) - all_rcut = [ii["rcut"] for ii in dlist] - all_nsel = [ii["sel"] for ii in dlist] + all_rcut = sorted([ii["rcut"] for ii in dlist]) + all_nsel = sorted([ii["sel"] for ii in dlist]) rcut_max = max(all_rcut) + sel_max = max(all_nsel) coord = self.coord atype = self.atype box = self.cell - nf, nloc = coord.shape[:2] - coord_normalized = normalize_coord(coord, box.reshape(-1, 3, 3)) - extended_coord, extended_atype, mapping = extend_coord_with_ghosts( - coord_normalized, atype, box, rcut_max - ) - ## single nlist - # nlist = build_neighbor_list( - # extended_coord, extended_atype, nloc, - # rcut_max, nsel, distinguish_types=False) - nlist_list = [] - for rcut, sel in zip(all_rcut, all_nsel): - nlist_list.append( - build_neighbor_list( - extended_coord, - extended_atype, - nloc, - rcut, - sel, - distinguish_types=False, - ) - ) - nlist = torch.cat(nlist_list, -1) # handel type_embedding type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) @@ -164,6 +142,26 @@ def test_descriptor_hyb(self): ## to save model parameters # torch.save(des.state_dict(), 'model_weights.pth') # torch.save(type_embedding.state_dict(), 'model_weights.pth') + extended_coord, extended_atype, mapping, nlist_max = process_input( + coord, + atype, + rcut_max, + sel_max, + distinguish_types=des.distinguish_types(), + box=box, + ) + nlist_dict = build_multiple_neighbor_list( + extended_coord, + nlist_max, + all_rcut, + all_nsel, + ) + nlist_list = [] + for ii in des.descriptor_list: + nlist_list.append( + nlist_dict[get_multiple_nlist_key(ii.get_rcut(), ii.get_nsel())] + ) + nlist = torch.cat(nlist_list, -1) descriptor, env_mat, diff, rot_mat, sw = des( nlist, extended_coord, @@ -202,18 +200,13 @@ def test_descriptor(self): coord = self.coord atype = self.atype box = self.cell - nf, nloc = coord.shape[:2] - coord_normalized = normalize_coord(coord, box.reshape(-1, 3, 3)) - extended_coord, extended_atype, mapping = extend_coord_with_ghosts( - coord_normalized, atype, box, des.repinit.rcut - ) - nlist = build_neighbor_list( - extended_coord, - extended_atype, - nloc, - des.repinit.rcut, - des.repinit.sel, - distinguish_types=False, + extended_coord, extended_atype, mapping, nlist = process_input( + coord, + atype, + des.get_rcut(), + des.get_sel(), + distinguish_types=des.distinguish_types(), + box=box, ) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, diff --git a/source/tests/pt/model/test_dp_model.py b/source/tests/pt/model/test_dp_model.py index 6e009d3934..e9a6afc8a8 100644 --- a/source/tests/pt/model/test_dp_model.py +++ b/source/tests/pt/model/test_dp_model.py @@ -23,6 +23,7 @@ from deepmd.pt.utils.nlist import ( build_neighbor_list, extend_coord_with_ghosts, + process_input, ) from deepmd.pt.utils.utils import ( to_numpy_array, @@ -431,20 +432,13 @@ def test_self_consistency(self): to_numpy_array(ret0["atom_virial"]), to_numpy_array(ret1["atom_virial"]), ) - - coord_ext, atype_ext, mapping = extend_coord_with_ghosts( + coord_ext, atype_ext, mapping, nlist = process_input( to_torch_tensor(self.coord), to_torch_tensor(self.atype), - to_torch_tensor(self.cell), - self.rcut, - ) - nlist = build_neighbor_list( - coord_ext, - atype_ext, - self.nloc, self.rcut, self.sel, distinguish_types=md0.distinguish_types(), + box=to_torch_tensor(self.cell), ) args = [coord_ext, atype_ext, nlist] ret2 = md0.forward_lower(*args, do_atomic_virial=True) diff --git a/source/tests/pt/model/test_embedding_net.py b/source/tests/pt/model/test_embedding_net.py index 407f4949b5..1f457a4271 100644 --- a/source/tests/pt/model/test_embedding_net.py +++ b/source/tests/pt/model/test_embedding_net.py @@ -25,12 +25,15 @@ dp_random, ) from deepmd.pt.utils.dataset import ( - DeepmdDataSet, + DeepmdDataSetForLoader, ) from deepmd.pt.utils.env import ( DEVICE, GLOBAL_NP_FLOAT_PRECISION, ) +from deepmd.pt.utils.nlist import ( + process_input, +) from deepmd.tf.common import ( expand_sys_str, ) @@ -43,6 +46,25 @@ def gen_key(worb, depth, elemid): return (worb, depth, elemid) +def get_single_batch(dataset, index=None): + if index is None: + index = dp_random.choice(np.arange(len(dataset))) + pt_batch = dataset[index] + np_batch = {} + # TODO deprecated + for key in ["mapping", "shift", "nlist"]: + if key in pt_batch.keys(): + pt_batch[key] = pt_batch[key].unsqueeze(0) + for key in ["coord", "box", "force", "energy", "virial", "atype", "natoms"]: + if key in pt_batch.keys(): + pt_batch[key] = pt_batch[key].unsqueeze(0) + np_batch[key] = pt_batch[key].cpu().numpy() + np_batch["coord"] = np_batch["coord"].reshape(1, -1) + np_batch["natoms"] = np_batch["natoms"][0] + np_batch["force"] = np_batch["force"].reshape(1, -1) + return np_batch, pt_batch + + def base_se_a(descriptor, coord, atype, natoms, box): g = tf.Graph() with g.as_default(): @@ -105,12 +127,16 @@ def setUp(self): self.systems = config["training"]["validation_data"]["systems"] if isinstance(self.systems, str): self.systems = expand_sys_str(self.systems) - ds = DeepmdDataSet( - self.systems, self.bsz, model_config["type_map"], self.rcut, self.sel + ds = DeepmdDataSetForLoader( + self.systems[0], + model_config["type_map"], + self.rcut, + self.sel, + type_split=True, ) self.filter_neuron = model_config["descriptor"]["neuron"] self.axis_neuron = model_config["descriptor"]["axis_neuron"] - self.np_batch, self.torch_batch = ds.get_batch() + self.np_batch, self.torch_batch = get_single_batch(ds) def test_consistency(self): dp_d = DescrptSeA_tf( @@ -154,20 +180,18 @@ def test_consistency(self): pt_coord = self.torch_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = ( - self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) - ) - extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE) - extended_atype = torch.gather( + extended_coord, extended_atype, mapping, nlist = process_input( + pt_coord, self.torch_batch["atype"].to(env.DEVICE), - dim=1, - index=self.torch_batch["mapping"].to(env.DEVICE), + self.rcut, + self.sel, + distinguish_types=True, + box=self.torch_batch["box"].to(env.DEVICE), ) descriptor_out, _, _, _, _ = descriptor( extended_coord, extended_atype, - self.torch_batch["nlist"].to(env.DEVICE), + nlist, ) my_embedding = descriptor_out.cpu().detach().numpy() fake_energy = torch.sum(descriptor_out) diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py index 522b30b2df..efe013a8a1 100644 --- a/source/tests/pt/model/test_model.py +++ b/source/tests/pt/model/test_model.py @@ -331,7 +331,9 @@ def test_consistency(self): # print(dst.mean(), dst.std()) dst.copy_(src) # Start forward computing - batch = my_ds.systems[0]._data_system.preprocess(batch) + batch = my_ds.systems[0]._data_system.single_preprocess(batch, 0) + for key in ["coord", "atype", "box", "energy", "force"]: + batch[key] = batch[key].unsqueeze(0) batch["coord"].requires_grad_(True) batch["natoms"] = torch.tensor( batch["natoms_vec"], device=batch["coord"].device diff --git a/source/tests/pt/test_loss.py b/source/tests/pt/test_loss.py index 14934c7be0..f0c75ef288 100644 --- a/source/tests/pt/test_loss.py +++ b/source/tests/pt/test_loss.py @@ -16,7 +16,7 @@ EnergyStdLoss, ) from deepmd.pt.utils.dataset import ( - DeepmdDataSet, + DeepmdDataSetForLoader, ) from deepmd.tf.common import ( expand_sys_str, @@ -25,6 +25,10 @@ EnerStdLoss, ) +from .model.test_embedding_net import ( + get_single_batch, +) + CUR_DIR = os.path.dirname(__file__) @@ -39,12 +43,13 @@ def get_batch(): rcut = model_config["descriptor"]["rcut"] # self.rcut_smth = model_config['descriptor']['rcut_smth'] sel = model_config["descriptor"]["sel"] - batch_size = config["training"]["training_data"]["batch_size"] systems = config["training"]["validation_data"]["systems"] if isinstance(systems, str): systems = expand_sys_str(systems) - dataset = DeepmdDataSet(systems, batch_size, model_config["type_map"], rcut, sel) - np_batch, pt_batch = dataset.get_batch() + dataset = DeepmdDataSetForLoader( + systems[0], model_config["type_map"], rcut, sel, type_split=True + ) + np_batch, pt_batch = get_single_batch(dataset) return np_batch, pt_batch diff --git a/source/tests/pt/test_stat.py b/source/tests/pt/test_stat.py index 240c354a69..bc95575a5a 100644 --- a/source/tests/pt/test_stat.py +++ b/source/tests/pt/test_stat.py @@ -165,10 +165,7 @@ def test_descriptor(self): "energy", "atype", "natoms", - "extended_coord", - "nlist", - "shift", - "mapping", + "box", ]: if key in sys.keys(): sys[key] = sys[key].to(env.DEVICE)