chore: rename j_must_have to j_deprecated and only warn about depreca…

…ted keys (#3816) Fix #3523.  ## Summary by CodeRabbit - **Refactor** - Simplified code by removing the `j_must_have` function and directly accessing dictionary keys in various test files. - Replaced `j_must_have` with direct dictionary access for improved code readability and maintenance. - **Chores** - Updated test files to directly access dictionary values, enhancing code readability and maintainability.  --------- Signed-off-by: Jinzhe Zeng <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
deepmodeling · May 24, 2024 · 6aac9f8 · 6aac9f8
1 parent 7b16911
commit 6aac9f8
Show file tree

Hide file tree

Showing 36 changed files with 191 additions and 326 deletions.
diff --git a/deepmd/common.py b/deepmd/common.py
@@ -40,7 +40,6 @@
 __all__ = [
     "select_idx_map",
     "make_default_mesh",
-    "j_must_have",
     "j_loader",
     "expand_sys_str",
     "get_np_precision",
@@ -127,15 +126,20 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
     return default_mesh
 
 
-# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys
-# maybe rename this to j_deprecated and only warn about deprecated keys,
-# if the deprecated_key argument is left empty function puppose is only custom
-# error since dict[key] already raises KeyError when the key is missing
-def j_must_have(
+def j_deprecated(
     jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
 ) -> "_DICT_VAL":
     """Assert that supplied dictionary conaines specified key.
 
+    Parameters
+    ----------
+    jdata : Dict[str, _DICT_VAL]
+        dictionary to check
+    key : str
+        key to check
+    deprecated_key : List[str], optional
+        list of deprecated keys, by default []
+
     Returns
     -------
     _DICT_VAL

diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py
@@ -26,7 +26,6 @@
     expand_sys_str,
     get_np_precision,
     j_loader,
-    j_must_have,
     make_default_mesh,
     select_idx_map,
 )
@@ -47,7 +46,6 @@
     # from deepmd.common
     "select_idx_map",
     "make_default_mesh",
-    "j_must_have",
     "j_loader",
     "expand_sys_str",
     "get_np_precision",

diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
@@ -15,7 +15,6 @@
 
 from deepmd.tf.common import (
     j_loader,
-    j_must_have,
 )
 from deepmd.tf.env import (
     reset_default_tf_session_config,
@@ -211,7 +210,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
             modifier.build_fv_graph()
 
     # get training info
-    stop_batch = j_must_have(jdata["training"], "numb_steps")
+    stop_batch = jdata["training"]["numb_steps"]
     origin_type_map = jdata["model"].get("origin_type_map", None)
     if (
         origin_type_map is not None and not origin_type_map

diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
@@ -29,7 +29,6 @@
 )
 from deepmd.tf.common import (
     get_precision,
-    j_must_have,
 )
 from deepmd.tf.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
@@ -91,7 +90,7 @@ def __init__(self, jdata, run_opt, is_compress=False):
 
     def _init_param(self, jdata):
         # model config
-        model_param = j_must_have(jdata, "model")
+        model_param = jdata["model"]
 
         # nvnmd
         self.nvnmd_param = jdata.get("nvnmd", {})
@@ -123,7 +122,7 @@ def get_lr_and_coef(lr_param):
             return lr, scale_lr_coef
 
         # learning rate
-        lr_param = j_must_have(jdata, "learning_rate")
+        lr_param = jdata["learning_rate"]
         self.lr, self.scale_lr_coef = get_lr_and_coef(lr_param)
         # loss
         # infer loss type by fitting_type

diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
@@ -17,7 +17,7 @@
 import numpy as np
 
 from deepmd.common import (
-    j_must_have,
+    j_deprecated,
 )
 
 
@@ -127,8 +127,8 @@ def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
     descriptor["sel"] = jdata["sel_a"]
     _jcopy(jdata, descriptor, ("rcut",))
     descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
-    descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
-    descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
+    descriptor["neuron"] = jdata["filter_neuron"]
+    descriptor["axis_neuron"] = j_deprecated(jdata, "axis_neuron", ["n_axis_neuron"])
     descriptor["resnet_dt"] = False
     if "resnet_dt" in jdata:
         descriptor["resnet_dt"] = jdata["filter_resnet_dt"]
@@ -154,7 +154,7 @@ def _fitting_net(jdata: Dict[str, Any]) -> Dict[str, Any]:
     seed = jdata.get("seed", None)
     if seed is not None:
         fitting_net["seed"] = seed
-    fitting_net["neuron"] = j_must_have(jdata, "fitting_neuron", ["n_neuron"])
+    fitting_net["neuron"] = j_deprecated(jdata, "fitting_neuron", ["n_neuron"])
     fitting_net["resnet_dt"] = True
     if "resnet_dt" in jdata:
         fitting_net["resnet_dt"] = jdata["resnet_dt"]
@@ -237,16 +237,16 @@ def _training(jdata: Dict[str, Any]) -> Dict[str, Any]:
     training["disp_file"] = "lcurve.out"
     if "disp_file" in jdata:
         training["disp_file"] = jdata["disp_file"]
-    training["disp_freq"] = j_must_have(jdata, "disp_freq")
-    training["numb_test"] = j_must_have(jdata, "numb_test")
-    training["save_freq"] = j_must_have(jdata, "save_freq")
-    training["save_ckpt"] = j_must_have(jdata, "save_ckpt")
-    training["disp_training"] = j_must_have(jdata, "disp_training")
-    training["time_training"] = j_must_have(jdata, "time_training")
+    training["disp_freq"] = jdata["disp_freq"]
+    training["numb_test"] = jdata["numb_test"]
+    training["save_freq"] = jdata["save_freq"]
+    training["save_ckpt"] = jdata["save_ckpt"]
+    training["disp_training"] = jdata["disp_training"]
+    training["time_training"] = jdata["time_training"]
     if "profiling" in jdata:
         training["profiling"] = jdata["profiling"]
         if training["profiling"]:
-            training["profiling_file"] = j_must_have(jdata, "profiling_file")
+            training["profiling_file"] = jdata["profiling_file"]
     return training
 
 
@@ -378,7 +378,7 @@ def is_deepmd_v0_input(jdata):
         return "model" not in jdata.keys()
 
     def is_deepmd_v1_input(jdata):
-        return "systems" in j_must_have(jdata, "training").keys()
+        return "systems" in jdata["training"].keys()
 
     if is_deepmd_v0_input(jdata):
         jdata = convert_input_v0_v1(jdata, warning, None)

diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
@@ -18,7 +18,6 @@
 import deepmd.utils.random as dp_random
 from deepmd.common import (
     expand_sys_str,
-    j_must_have,
     make_default_mesh,
 )
 from deepmd.env import (
@@ -792,10 +791,10 @@ def get_data(
     DeepmdDataSystem
         The data system
     """
-    systems = j_must_have(jdata, "systems")
+    systems = jdata["systems"]
     systems = process_systems(systems)
 
-    batch_size = j_must_have(jdata, "batch_size")
+    batch_size = jdata["batch_size"]
     sys_probs = jdata.get("sys_probs", None)
     auto_prob = jdata.get("auto_prob", "prob_sys_size")
     optional_type_map = not multi_task_mode

diff --git a/source/tests/tf/test_data_large_batch.py b/source/tests/tf/test_data_large_batch.py
@@ -5,9 +5,6 @@
 import numpy as np
 from packaging.version import parse as parse_version
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptSeAtten,
 )
@@ -50,11 +47,11 @@ def test_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {
@@ -248,11 +245,11 @@ def test_stripped_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {
@@ -446,11 +443,11 @@ def test_compressible_data_mixed_type(self):
         jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        type_map = j_must_have(jdata["model"], "type_map")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        type_map = jdata["model"]["type_map"]
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
         data_requirement = {

diff --git a/source/tests/tf/test_data_modifier.py b/source/tests/tf/test_data_modifier.py
@@ -3,9 +3,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
@@ -61,12 +58,12 @@ def _setUp(self):
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata["training"], "systems")
+        systems = jdata["training"]["systems"]
         # systems[0] = tests_path / systems[0]
         systems = [tests_path / ii for ii in systems]
         set_pfx = "set"
-        batch_size = j_must_have(jdata["training"], "batch_size")
-        test_size = j_must_have(jdata["training"], "numb_test")
+        batch_size = jdata["training"]["batch_size"]
+        test_size = jdata["training"]["numb_test"]
         data = DeepmdDataSystem(
             systems, batch_size, test_size, rcut, set_prefix=set_pfx
         )

diff --git a/source/tests/tf/test_data_modifier_shuffle.py b/source/tests/tf/test_data_modifier_shuffle.py
@@ -4,9 +4,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     tf,
@@ -64,10 +61,10 @@ def _setUp(self):
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata["training"], "systems")
+        systems = jdata["training"]["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata["training"], "batch_size")
-        test_size = j_must_have(jdata["training"], "numb_test")
+        batch_size = jdata["training"]["batch_size"]
+        test_size = jdata["training"]["numb_test"]
         data = DeepmdDataSystem(
             systems, batch_size, test_size, rcut, set_prefix=set_pfx
         )

diff --git a/source/tests/tf/test_descrpt_hybrid.py b/source/tests/tf/test_descrpt_hybrid.py
@@ -4,9 +4,6 @@
 import numpy as np
 from packaging.version import parse as parse_version
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptHybrid,
 )
@@ -40,10 +37,8 @@ def test_descriptor_hybrid(self):
         jfile = "water_hybrid.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata, "batch_size")
-        test_size = j_must_have(jdata, "numb_test")
         batch_size = 2
         test_size = 1
         rcut = 6

diff --git a/source/tests/tf/test_descrpt_se_a_mask.py b/source/tests/tf/test_descrpt_se_a_mask.py
@@ -3,9 +3,6 @@
 
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptSeAMask,
 )
@@ -231,12 +228,12 @@ def test_descriptor_se_a_mask(self):
         jdata["training"]["validation_data"]["systems"] = [
             str(tests_path / "data_dp_mask")
         ]
-        systems = j_must_have(jdata["training"]["validation_data"], "systems")
+        systems = jdata["training"]["validation_data"]["systems"]
         set_pfx = "set"
         batch_size = 2
         test_size = 1
         rcut = 20.0  # For DataSystem interface compatibility, not used in this test.
-        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        sel = jdata["model"]["descriptor"]["sel"]
         ntypes = len(sel)
         total_atom_num = np.cumsum(sel)[-1]
 

diff --git a/source/tests/tf/test_descrpt_se_a_type.py b/source/tests/tf/test_descrpt_se_a_type.py
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import numpy as np
 
-from deepmd.tf.common import (
-    j_must_have,
-)
 from deepmd.tf.descriptor import (
     DescrptSeA,
 )
@@ -33,15 +30,12 @@ def test_descriptor_two_sides(self):
         jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata, "batch_size")
-        test_size = j_must_have(jdata, "numb_test")
         batch_size = 2
         test_size = 1
-        stop_batch = j_must_have(jdata, "stop_batch")
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        sel = jdata["model"]["descriptor"]["sel"]
         ntypes = len(sel)
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
@@ -197,15 +191,14 @@ def test_descriptor_one_side(self):
         jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, "systems")
+        systems = jdata["systems"]
         set_pfx = "set"
-        batch_size = j_must_have(jdata, "batch_size")
-        test_size = j_must_have(jdata, "numb_test")
+        batch_size = jdata["batch_size"]
+        test_size = jdata["numb_test"]
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, "stop_batch")
-        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
-        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        rcut = jdata["model"]["descriptor"]["rcut"]
+        sel = jdata["model"]["descriptor"]["sel"]
         ntypes = len(sel)
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)