Merge branch 'devel' into 3742

deepmodeling · May 27, 2024 · 5f49f40 · 5f49f40
2 parents 5554dd2 + d754672
commit 5f49f40
Show file tree

Hide file tree

Showing 41 changed files with 199 additions and 335 deletions.
diff --git a/deepmd/common.py b/deepmd/common.py
@@ -40,7 +40,6 @@
 __all__ = [
     "select_idx_map",
     "make_default_mesh",
-    "j_must_have",
     "j_loader",
     "expand_sys_str",
     "get_np_precision",
@@ -127,15 +126,20 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
     return default_mesh
 
 
-# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys
-# maybe rename this to j_deprecated and only warn about deprecated keys,
-# if the deprecated_key argument is left empty function puppose is only custom
-# error since dict[key] already raises KeyError when the key is missing
-def j_must_have(
+def j_deprecated(
     jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
 ) -> "_DICT_VAL":
     """Assert that supplied dictionary conaines specified key.
 
+    Parameters
+    ----------
+    jdata : Dict[str, _DICT_VAL]
+        dictionary to check
+    key : str
+        key to check
+    deprecated_key : List[str], optional
+        list of deprecated keys, by default []
+
     Returns
     -------
     _DICT_VAL
@@ -186,7 +190,6 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
         raise TypeError("config file must be json, or yaml/yml")
 
 
-# TODO port expand_sys_str completely to pathlib when all callers are ported
 def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
     """Recursively iterate over directories taking those that contain `type.raw` file.
 

diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
@@ -845,10 +845,8 @@ def call(
         else:
             raise NotImplementedError
 
-        input_r = dmatrix.reshape(-1, nnei, 4)[:, :, 1:4] / np.maximum(
-            np.linalg.norm(
-                dmatrix.reshape(-1, nnei, 4)[:, :, 1:4], axis=-1, keepdims=True
-            ),
+        input_r = rr.reshape(-1, nnei, 4)[:, :, 1:4] / np.maximum(
+            np.linalg.norm(rr.reshape(-1, nnei, 4)[:, :, 1:4], axis=-1, keepdims=True),
             1e-12,
         )
         gg = self.dpa1_attention(

diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py
@@ -559,7 +559,7 @@ def forward(
                 raise NotImplementedError
 
             input_r = torch.nn.functional.normalize(
-                dmatrix.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
+                rr.reshape(-1, self.nnei, 4)[:, :, 1:4], dim=-1
             )
             gg = self.dpa1_attention(
                 gg, nlist_mask, input_r=input_r, sw=sw

diff --git a/deepmd/pt/utils/env_mat_stat.py b/deepmd/pt/utils/env_mat_stat.py
@@ -141,7 +141,6 @@ def iter(
                 zero_mean,
                 one_stddev,
                 self.descriptor.get_rcut(),
-                # TODO: export rcut_smth from DescriptorBlock
                 self.descriptor.get_rcut_smth(),
                 radial_only,
                 protection=self.descriptor.get_env_protection(),

diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py
@@ -26,7 +26,6 @@
     expand_sys_str,
     get_np_precision,
     j_loader,
-    j_must_have,
     make_default_mesh,
     select_idx_map,
 )
@@ -47,7 +46,6 @@
     # from deepmd.common
     "select_idx_map",
     "make_default_mesh",
-    "j_must_have",
     "j_loader",
     "expand_sys_str",
     "get_np_precision",

diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
@@ -15,7 +15,6 @@
 
 from deepmd.tf.common import (
     j_loader,
-    j_must_have,
 )
 from deepmd.tf.env import (
     reset_default_tf_session_config,
@@ -211,7 +210,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
             modifier.build_fv_graph()
 
     # get training info
-    stop_batch = j_must_have(jdata["training"], "numb_steps")
+    stop_batch = jdata["training"]["numb_steps"]
     origin_type_map = jdata["model"].get("origin_type_map", None)
     if (
         origin_type_map is not None and not origin_type_map

diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
@@ -29,7 +29,6 @@
 )
 from deepmd.tf.common import (
     get_precision,
-    j_must_have,
 )
 from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
@@ -91,7 +90,7 @@ def __init__(self, jdata, run_opt, is_compress=False):
 
     def _init_param(self, jdata):
         # model config
-        model_param = j_must_have(jdata, "model")
+        model_param = jdata["model"]
 
         # nvnmd
         self.nvnmd_param = jdata.get("nvnmd", {})
@@ -123,7 +122,7 @@ def get_lr_and_coef(lr_param):
             return lr, scale_lr_coef
 
         # learning rate
-        lr_param = j_must_have(jdata, "learning_rate")
+        lr_param = jdata["learning_rate"]
         self.lr, self.scale_lr_coef = get_lr_and_coef(lr_param)
         # loss
         # infer loss type by fitting_type

diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
@@ -61,6 +61,11 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
             self.maximum_working_batch_size = initial_batch_size
             if self.is_gpu_available():
                 self.minimal_not_working_batch_size = 2**31
+                log.info(
+                    "If you encounter the error 'an illegal memory access was encountered', this may be due to a TensorFlow issue. "
+                    "To avoid this, set the environment variable DP_INFER_BATCH_SIZE to a smaller value than the last adjusted batch size. "
+                    "The environment variable DP_INFER_BATCH_SIZE controls the inference batch size (nframes * natoms). "
+                )
             else:
                 self.minimal_not_working_batch_size = (
                     self.maximum_working_batch_size + 1

diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
@@ -17,7 +17,7 @@
 import numpy as np
 
 from deepmd.common import (
-    j_must_have,
+    j_deprecated,
 )
 
 
@@ -127,8 +127,8 @@ def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
     descriptor["sel"] = jdata["sel_a"]
     _jcopy(jdata, descriptor, ("rcut",))
     descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
-    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
-    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
+    descriptor["neuron"] = jdata["filter_neuron"]
+    descriptor["axis_neuron"] = j_deprecated(jdata, "axis_neuron", ["n_axis_neuron"])
     descriptor["resnet_dt"] = False
     if "resnet_dt" in jdata:
         descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
@@ -154,7 +154,7 @@ def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
     seed = jdata.get("seed", None)
     if seed is not None:
         fitting_net["seed"] = seed
-    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
+    fitting_net["neuron"] = j_deprecated(jdata, "fitting_neuron", ["n_neuron"])
     fitting_net["resnet_dt"] = True
     if "resnet_dt" in jdata:
         fitting_net["resnet_dt"] = jdata["resnet_dt"]
@@ -237,16 +237,16 @@ def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
     training["disp_file"] = "lcurve.out"
     if "disp_file" in jdata:
         training["disp_file"] = jdata["disp_file"]
-    training["disp_freq"] = j_must_have(jdata, "disp_freq")
-    training["numb_test"] = j_must_have(jdata, "numb_test")
-    training["save_freq"] = j_must_have(jdata, "save_freq")
-    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
-    training["disp_training"] = j_must_have(jdata, "disp_training")
-    training["time_training"] = j_must_have(jdata, "time_training")
+    training["disp_freq"] = jdata["disp_freq"]
+    training["numb_test"] = jdata["numb_test"]
+    training["save_freq"] = jdata["save_freq"]
+    training["save_ckpt"] = jdata["save_ckpt"]
+    training["disp_training"] = jdata["disp_training"]
+    training["time_training"] = jdata["time_training"]
     if "profiling" in jdata:
         training["profiling"] = jdata["profiling"]
         if training["profiling"]:
-            training["profiling_file"] = j_must_have(jdata, "profiling_file")
+            training["profiling_file"] = jdata["profiling_file"]
     return training
 
 
@@ -378,7 +378,7 @@ def is_deepmd_v0_input(jdata):
         return "model" not in jdata.keys()
 
     def is_deepmd_v1_input(jdata):
-        return "systems" in j_must_have(jdata, "training").keys()
+        return "systems" in jdata["training"].keys()
 
     if is_deepmd_v0_input(jdata):
         jdata = convert_input_v0_v1(jdata, warning, None)

diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
@@ -18,7 +18,6 @@
 import deepmd.utils.random as dp_random
 from deepmd.common import (
     expand_sys_str,
-    j_must_have,
     make_default_mesh,
 )
 from deepmd.env import (
@@ -792,10 +791,10 @@ def get_data(
     DeepmdDataSystem
         The data system
     """
-    systems = j_must_have(jdata, "systems")
+    systems = jdata["systems"]
     systems = process_systems(systems)
 
-    batch_size = j_must_have(jdata, "batch_size")
+    batch_size = jdata["batch_size"]
     sys_probs = jdata.get("sys_probs", None)
     auto_prob = jdata.get("auto_prob", "prob_sys_size")
     optional_type_map = not multi_task_mode

diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py
@@ -201,15 +201,13 @@ def skip_tf(self) -> bool:
             precision,
             use_econf_tebd,
         ) = self.param
-        # TODO (excluded_types != [] and attn_layer > 0) need fix
         return (
             CommonTest.skip_tf
             or (
                 env_protection != 0.0
                 or smooth_type_embedding
                 or not normalize
                 or temperature != 1.0
-                or (excluded_types != [] and attn_layer > 0)
                 or (type_one_side and tebd_input_mode == "strip")  # not consistent yet
             )
             or self.is_meaningless_zero_attention_layer_tests(

diff --git a/source/tests/tf/test_data_large_batch.py b/source/tests/tf/test_data_large_batch.py
@@ -5,9 +5,6 @@
 import numpy as np
 from packaging.version import parse as parse_version
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptSeAtten,
 )
@@ -50,11 +47,11 @@ def test_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {
@@ -248,11 +245,11 @@ def test_stripped_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {
@@ -446,11 +443,11 @@ def test_compressible_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {

diff --git a/source/tests/tf/test_data_modifier.py b/source/tests/tf/test_data_modifier.py
@@ -3,9 +3,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
@@ -61,12 +58,12 @@ def _setUp(self):
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata["training"], "systems")
+        systems = jdata["training"]["systems"]
         # systems[0] = tests_path / systems[0]
         systems = [tests_path / ii for ii in systems]
         set_pfx = "set"
-        batch_size = j_must_have(jdata["training"], "batch_size")
-        test_size = j_must_have(jdata["training"], "numb_test")
+        batch_size = jdata["training"]["batch_size"]
+        test_size = jdata["training"]["numb_test"]
         data = DeepmdDataSystem(
             systems, batch_size, test_size, rcut, set_prefix=set_pfx
         )

diff --git a/source/tests/tf/test_data_modifier_shuffle.py b/source/tests/tf/test_data_modifier_shuffle.py
@@ -4,9 +4,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
@@ -64,10 +61,10 @@ def _setUp(self):
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata["training"], "systems")
+        systems = jdata["training"]["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata["training"], "batch_size")
-        test_size = j_must_have(jdata["training"], "numb_test")
+        batch_size = jdata["training"]["batch_size"]
+        test_size = jdata["training"]["numb_test"]
         data = DeepmdDataSystem(
             systems, batch_size, test_size, rcut, set_prefix=set_pfx
         )

diff --git a/source/tests/tf/test_descrpt_hybrid.py b/source/tests/tf/test_descrpt_hybrid.py
@@ -4,9 +4,6 @@
 import numpy as np
 from packaging.version import parse as parse_version
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptHybrid,
 )
@@ -40,10 +37,8 @@ def test_descriptor_hybrid(self):
         jfile = "water_hybrid.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata, "batch_size")
-        test_size = j_must_have(jdata, "numb_test")
         batch_size = 2
         test_size = 1
         rcut = 6

diff --git a/source/tests/tf/test_descrpt_se_a_mask.py b/source/tests/tf/test_descrpt_se_a_mask.py
@@ -3,9 +3,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptSeAMask,
 )
@@ -231,12 +228,12 @@ def test_descriptor_se_a_mask(self):
         jdata["training"]["validation_data"]["systems"] = [
             str(tests_path / "data_dp_mask")
         ]
-        systems = j_must_have(jdata["training"]["validation_data"], "systems")
+        systems = jdata["training"]["validation_data"]["systems"]
         set_pfx = "set"
         batch_size = 2
         test_size = 1
         rcut = 20.0  # For DataSystem interface compatibility, not used in this test.
-        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        sel = jdata["model"]["descriptor"]["sel"]
         ntypes = len(sel)
         total_atom_num = np.cumsum(sel)[-1]