From 11771fa420b038d9ab5c2622a158bacef78149ac Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 16 Nov 2023 18:03:42 -0500
Subject: [PATCH 1/4] fix restarting from compressed training with type
 embedding

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/dos.py           |  4 ++-
 deepmd/model/ener.py          |  4 ++-
 deepmd/model/model.py         | 55 +++++++++++++++++++++++++++++++++++
 deepmd/model/multi.py         |  4 ++-
 deepmd/model/pairwise_dprc.py |  4 ++-
 deepmd/model/tensor.py        |  4 ++-
 deepmd/utils/type_embed.py    |  8 +----
 7 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/deepmd/model/dos.py b/deepmd/model/dos.py
index 697fad9a9e..22e291a0f0 100644
--- a/deepmd/model/dos.py
+++ b/deepmd/model/dos.py
@@ -155,10 +155,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                frz_model=frz_model,
+                ckpt_meta=ckpt_meta,
             )
             input_dict["type_embedding"] = type_embedding
         input_dict["atype"] = atype_
diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index 1976c1ad51..0d8d66b305 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -203,10 +203,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None and "type_embedding" not in input_dict:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                ckpt_meta=ckpt_meta,
+                frz_model=frz_model,
             )
             input_dict["type_embedding"] = type_embedding
         # spin if any
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index 3f24e42aec..4a242904de 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -331,6 +331,61 @@ def build_descrpt(
             self.descrpt.pass_tensors_from_frz_model(*imported_tensors[:-1])
         return dout
 
+    def build_type_embedding(
+        self,
+        ntypes: int,
+        frz_model: Optional[str] = None,
+        ckpt_meta: Optional[str] = None,
+        suffix: str = "",
+        reuse: Optional[Union[bool, Enum]] = None,
+    ) -> tf.Tensor:
+        """Build the type embedding part of the model.
+
+        Parameters
+        ----------
+        ntypes : int
+            The number of types
+        frz_model : str, optional
+            The path to the frozen model
+        ckpt_meta : str, optional
+            The path prefix of the checkpoint and meta files
+        suffix : str, optional
+            The suffix of the scope
+        reuse : bool or tf.AUTO_REUSE, optional
+            Whether to reuse the variables
+
+        Returns
+        -------
+        tf.Tensor
+            The type embedding tensor
+        """
+        assert self.typeebd is not None
+        if frz_model is None and ckpt_meta is None:
+            dout = self.typeebd.build(
+                ntypes,
+                reuse=reuse,
+                suffix=suffix,
+            )
+        else:
+            # nothing input
+            feed_dict = {}
+            return_elements = [
+                f"t_typeebd{suffix}:0",
+            ]
+            if frz_model is not None:
+                imported_tensors = self._import_graph_def_from_frz_model(
+                    frz_model, feed_dict, return_elements
+                )
+            elif ckpt_meta is not None:
+                imported_tensors = self._import_graph_def_from_ckpt_meta(
+                    ckpt_meta, feed_dict, return_elements
+                )
+            else:
+                raise RuntimeError("should not reach here")  # pragma: no cover
+            dout = imported_tensors[-1]
+            self.typeebd.type_embedding_from_graph = dout
+        return dout
+
     def _import_graph_def_from_frz_model(
         self, frz_model: str, feed_dict: dict, return_elements: List[str]
     ):
diff --git a/deepmd/model/multi.py b/deepmd/model/multi.py
index bfc67b9792..83b231c0e8 100644
--- a/deepmd/model/multi.py
+++ b/deepmd/model/multi.py
@@ -317,10 +317,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                frz_model=frz_model,
+                ckpt_meta=ckpt_meta,
             )
             input_dict["type_embedding"] = type_embedding
         input_dict["atype"] = atype_
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 6983a31cfd..f74571febb 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -173,10 +173,12 @@ def build(
         atype_qmmm = gather_placeholder(atype_qmmm, forward_qmmm_map, placeholder=-1)
         box_qm = box
 
-        type_embedding = self.typeebd.build(
+        type_embedding = self.build_type_embedding(
             self.ntypes,
             reuse=reuse,
             suffix=suffix,
+            frz_model=frz_model,
+            ckpt_meta=ckpt_meta,
         )
         input_dict_qm["type_embedding"] = type_embedding
         input_dict_qmmm["type_embedding"] = type_embedding
diff --git a/deepmd/model/tensor.py b/deepmd/model/tensor.py
index 9099b753a4..6a21e085f3 100644
--- a/deepmd/model/tensor.py
+++ b/deepmd/model/tensor.py
@@ -135,10 +135,12 @@ def build(
 
         # type embedding if any
         if self.typeebd is not None:
-            type_embedding = self.typeebd.build(
+            type_embedding = self.build_type_embedding(
                 self.ntypes,
                 reuse=reuse,
                 suffix=suffix,
+                ckpt_meta=ckpt_meta,
+                frz_model=frz_model,
             )
             input_dict["type_embedding"] = type_embedding
             input_dict["atype"] = atype_
diff --git a/deepmd/utils/type_embed.py b/deepmd/utils/type_embed.py
index aadbb3c6e0..c8ab01f7f5 100644
--- a/deepmd/utils/type_embed.py
+++ b/deepmd/utils/type_embed.py
@@ -16,7 +16,6 @@
     nvnmd_cfg,
 )
 from deepmd.utils.graph import (
-    get_tensor_by_name_from_graph,
     get_type_embedding_net_variables_from_graph_def,
 )
 from deepmd.utils.network import (
@@ -109,7 +108,6 @@ def __init__(
         self.trainable = trainable
         self.uniform_seed = uniform_seed
         self.type_embedding_net_variables = None
-        self.type_embedding_from_graph = None
         self.padding = padding
         self.model_type = None
 
@@ -135,8 +133,6 @@ def build(
         embedded_types
             The computational graph for embedded types
         """
-        if self.model_type is not None and self.model_type == "compressed_model":
-            return self.type_embedding_from_graph
         types = tf.convert_to_tensor(list(range(ntypes)), dtype=tf.int32)
         ebd_type = tf.cast(
             tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)),
@@ -166,7 +162,7 @@ def build(
         if self.padding:
             last_type = tf.cast(tf.zeros([1, self.neuron[-1]]), self.filter_precision)
             ebd_type = tf.concat([ebd_type, last_type], 0)  # (ntypes + 1) * neuron[-1]
-        self.ebd_type = tf.identity(ebd_type, name="t_typeebd")
+        self.ebd_type = tf.identity(ebd_type, name="t_typeebd" + suffix)
         return self.ebd_type
 
     def init_variables(
@@ -193,5 +189,3 @@ def init_variables(
         self.type_embedding_net_variables = (
             get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
         )
-        type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd")
-        self.type_embedding_from_graph = tf.convert_to_tensor(type_embedding)

From d9dd12daa01686585f5859ef70cdae0e8f6d118d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 16 Nov 2023 18:06:54 -0500
Subject: [PATCH 2/4] remove useless variable

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index 4a242904de..dd439056b4 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -383,7 +383,6 @@ def build_type_embedding(
             else:
                 raise RuntimeError("should not reach here")  # pragma: no cover
             dout = imported_tensors[-1]
-            self.typeebd.type_embedding_from_graph = dout
         return dout
 
     def _import_graph_def_from_frz_model(

From 6692300637d2a995dec10baf1b9851b391ae226b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 16 Nov 2023 13:24:17 -0500
Subject: [PATCH 3/4] add tests for compressed training and restart

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_compressed_training.py | 60 ++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 source/tests/test_compressed_training.py

diff --git a/source/tests/test_compressed_training.py b/source/tests/test_compressed_training.py
new file mode 100644
index 0000000000..a9420a5bd0
--- /dev/null
+++ b/source/tests/test_compressed_training.py
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+
+# from deepmd.entrypoints.compress import compress
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+from packaging.version import parse as parse_version
+
+from deepmd.env import (
+    tf,
+)
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("2"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestCompressedTrainingSeAtten(unittest.TestCase):
+    def setUp(self) -> None:
+        data_file = str(tests_path / os.path.join("model_compression", "data"))
+        self.input_file = str(tests_path / "input.json")
+        self.frozen_model = str(tests_path / "dp-compress-training-original.pb")
+        self.compressed_model = str(tests_path / "dp-compress-training-compressed.pb")
+        self.frozen_compress_training_model = str(
+            tests_path / "dp-compress-training-compress-training.pb"
+        )
+        self.ckpt_file = str(tests_path / "dp-compress-training.ckpt")
+        jdata = j_loader(
+            str(tests_path / os.path.join("model_compression", "input.json"))
+        )
+        jdata["model"]["descriptor"] = {}
+        jdata["model"]["descriptor"]["type"] = "se_atten_v2"
+        jdata["model"]["descriptor"]["sel"] = 20
+        jdata["model"]["descriptor"]["attn_layer"] = 0
+        jdata["training"]["training_data"]["systems"] = data_file
+        jdata["training"]["validation_data"]["systems"] = data_file
+        jdata["training"]["save_ckpt"] = self.ckpt_file
+        with open(self.input_file, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+
+    def test_compressed_training(self):
+        run_dp(f"dp train {self.input_file}")
+        run_dp(f"dp freeze -o {self.frozen_model}")
+        run_dp(f"dp compress -i {self.frozen_model} -o {self.compressed_model}")
+        # compress training
+        run_dp(f"dp train {self.input_file} -f {self.compressed_model}")
+        # restart compress training
+        run_dp(f"dp train {self.input_file} -r {self.ckpt_file}")
+        # freeze compress training
+        run_dp(f"dp freeze -o {self.frozen_compress_training_model}")
+        # it should not be able to compress again
+        with self.assertRaises(RuntimeError):
+            run_dp(
+                f"dp compress -i {self.frozen_compress_training_model} -o {self.compressed_model}"
+            )

From 8a73399d2d62568ef0bac27f9a73be3efc6ed114 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 17 Nov 2023 22:00:14 -0500
Subject: [PATCH 4/4] assign `-c` for `dp freeze`

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_compressed_training.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/source/tests/test_compressed_training.py b/source/tests/test_compressed_training.py
index a9420a5bd0..0a0bbeaadf 100644
--- a/source/tests/test_compressed_training.py
+++ b/source/tests/test_compressed_training.py
@@ -30,6 +30,7 @@ def setUp(self) -> None:
             tests_path / "dp-compress-training-compress-training.pb"
         )
         self.ckpt_file = str(tests_path / "dp-compress-training.ckpt")
+        self.checkpoint_dir = str(tests_path)
         jdata = j_loader(
             str(tests_path / os.path.join("model_compression", "input.json"))
         )
@@ -45,14 +46,16 @@ def setUp(self) -> None:
 
     def test_compressed_training(self):
         run_dp(f"dp train {self.input_file}")
-        run_dp(f"dp freeze -o {self.frozen_model}")
+        run_dp(f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_model}")
         run_dp(f"dp compress -i {self.frozen_model} -o {self.compressed_model}")
         # compress training
         run_dp(f"dp train {self.input_file} -f {self.compressed_model}")
         # restart compress training
         run_dp(f"dp train {self.input_file} -r {self.ckpt_file}")
         # freeze compress training
-        run_dp(f"dp freeze -o {self.frozen_compress_training_model}")
+        run_dp(
+            f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_compress_training_model}"
+        )
         # it should not be able to compress again
         with self.assertRaises(RuntimeError):
             run_dp(