Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix restarting from compressed training with type embedding #2996

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion deepmd/model/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,12 @@

# type embedding if any
if self.typeebd is not None:
type_embedding = self.typeebd.build(
type_embedding = self.build_type_embedding(

Check warning on line 158 in deepmd/model/dos.py

View check run for this annotation

Codecov / codecov/patch

deepmd/model/dos.py#L158

Added line #L158 was not covered by tests
self.ntypes,
reuse=reuse,
suffix=suffix,
frz_model=frz_model,
ckpt_meta=ckpt_meta,
)
input_dict["type_embedding"] = type_embedding
input_dict["atype"] = atype_
Expand Down
4 changes: 3 additions & 1 deletion deepmd/model/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,12 @@ def build(

# type embedding if any
if self.typeebd is not None and "type_embedding" not in input_dict:
type_embedding = self.typeebd.build(
type_embedding = self.build_type_embedding(
self.ntypes,
reuse=reuse,
suffix=suffix,
ckpt_meta=ckpt_meta,
frz_model=frz_model,
)
input_dict["type_embedding"] = type_embedding
# spin if any
Expand Down
54 changes: 54 additions & 0 deletions deepmd/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,60 @@ def build_descrpt(
self.descrpt.pass_tensors_from_frz_model(*imported_tensors[:-1])
return dout

def build_type_embedding(
self,
ntypes: int,
frz_model: Optional[str] = None,
ckpt_meta: Optional[str] = None,
suffix: str = "",
reuse: Optional[Union[bool, Enum]] = None,
) -> tf.Tensor:
"""Build the type embedding part of the model.

Parameters
----------
ntypes : int
The number of types
frz_model : str, optional
The path to the frozen model
ckpt_meta : str, optional
The path prefix of the checkpoint and meta files
suffix : str, optional
The suffix of the scope
reuse : bool or tf.AUTO_REUSE, optional
Whether to reuse the variables

Returns
-------
tf.Tensor
The type embedding tensor
"""
assert self.typeebd is not None
if frz_model is None and ckpt_meta is None:
dout = self.typeebd.build(
ntypes,
reuse=reuse,
suffix=suffix,
)
else:
# nothing input
feed_dict = {}
return_elements = [
f"t_typeebd{suffix}:0",
]
if frz_model is not None:
imported_tensors = self._import_graph_def_from_frz_model(
frz_model, feed_dict, return_elements
)
elif ckpt_meta is not None:
imported_tensors = self._import_graph_def_from_ckpt_meta(
ckpt_meta, feed_dict, return_elements
)
else:
raise RuntimeError("should not reach here") # pragma: no cover
dout = imported_tensors[-1]
return dout

def _import_graph_def_from_frz_model(
self, frz_model: str, feed_dict: dict, return_elements: List[str]
):
Expand Down
4 changes: 3 additions & 1 deletion deepmd/model/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,12 @@

# type embedding if any
if self.typeebd is not None:
type_embedding = self.typeebd.build(
type_embedding = self.build_type_embedding(

Check warning on line 320 in deepmd/model/multi.py

View check run for this annotation

Codecov / codecov/patch

deepmd/model/multi.py#L320

Added line #L320 was not covered by tests
self.ntypes,
reuse=reuse,
suffix=suffix,
frz_model=frz_model,
ckpt_meta=ckpt_meta,
)
input_dict["type_embedding"] = type_embedding
input_dict["atype"] = atype_
Expand Down
4 changes: 3 additions & 1 deletion deepmd/model/pairwise_dprc.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,12 @@ def build(
atype_qmmm = gather_placeholder(atype_qmmm, forward_qmmm_map, placeholder=-1)
box_qm = box

type_embedding = self.typeebd.build(
type_embedding = self.build_type_embedding(
self.ntypes,
reuse=reuse,
suffix=suffix,
frz_model=frz_model,
ckpt_meta=ckpt_meta,
)
input_dict_qm["type_embedding"] = type_embedding
input_dict_qmmm["type_embedding"] = type_embedding
Expand Down
4 changes: 3 additions & 1 deletion deepmd/model/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,12 @@ def build(

# type embedding if any
if self.typeebd is not None:
type_embedding = self.typeebd.build(
type_embedding = self.build_type_embedding(
self.ntypes,
reuse=reuse,
suffix=suffix,
ckpt_meta=ckpt_meta,
frz_model=frz_model,
)
input_dict["type_embedding"] = type_embedding
input_dict["atype"] = atype_
Expand Down
8 changes: 1 addition & 7 deletions deepmd/utils/type_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
nvnmd_cfg,
)
from deepmd.utils.graph import (
get_tensor_by_name_from_graph,
get_type_embedding_net_variables_from_graph_def,
)
from deepmd.utils.network import (
Expand Down Expand Up @@ -109,7 +108,6 @@ def __init__(
self.trainable = trainable
self.uniform_seed = uniform_seed
self.type_embedding_net_variables = None
self.type_embedding_from_graph = None
self.padding = padding
self.model_type = None

Expand All @@ -135,8 +133,6 @@ def build(
embedded_types
The computational graph for embedded types
"""
if self.model_type is not None and self.model_type == "compressed_model":
return self.type_embedding_from_graph
types = tf.convert_to_tensor(list(range(ntypes)), dtype=tf.int32)
ebd_type = tf.cast(
tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)),
Expand Down Expand Up @@ -166,7 +162,7 @@ def build(
if self.padding:
last_type = tf.cast(tf.zeros([1, self.neuron[-1]]), self.filter_precision)
ebd_type = tf.concat([ebd_type, last_type], 0) # (ntypes + 1) * neuron[-1]
self.ebd_type = tf.identity(ebd_type, name="t_typeebd")
self.ebd_type = tf.identity(ebd_type, name="t_typeebd" + suffix)
return self.ebd_type

def init_variables(
Expand All @@ -193,5 +189,3 @@ def init_variables(
self.type_embedding_net_variables = (
get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
)
type_embedding = get_tensor_by_name_from_graph(graph, "t_typeebd")
self.type_embedding_from_graph = tf.convert_to_tensor(type_embedding)
63 changes: 63 additions & 0 deletions source/tests/test_compressed_training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import json
import os
import unittest

# from deepmd.entrypoints.compress import compress
from common import (
j_loader,
run_dp,
tests_path,
)
from packaging.version import parse as parse_version

from deepmd.env import (
tf,
)


@unittest.skipIf(
parse_version(tf.__version__) < parse_version("2"),
f"The current tf version {tf.__version__} is too low to run the new testing model.",
)
class TestCompressedTrainingSeAtten(unittest.TestCase):
def setUp(self) -> None:
data_file = str(tests_path / os.path.join("model_compression", "data"))
self.input_file = str(tests_path / "input.json")
self.frozen_model = str(tests_path / "dp-compress-training-original.pb")
self.compressed_model = str(tests_path / "dp-compress-training-compressed.pb")
self.frozen_compress_training_model = str(
tests_path / "dp-compress-training-compress-training.pb"
)
self.ckpt_file = str(tests_path / "dp-compress-training.ckpt")
self.checkpoint_dir = str(tests_path)
jdata = j_loader(
str(tests_path / os.path.join("model_compression", "input.json"))
)
jdata["model"]["descriptor"] = {}
jdata["model"]["descriptor"]["type"] = "se_atten_v2"
jdata["model"]["descriptor"]["sel"] = 20
jdata["model"]["descriptor"]["attn_layer"] = 0
jdata["training"]["training_data"]["systems"] = data_file
jdata["training"]["validation_data"]["systems"] = data_file
jdata["training"]["save_ckpt"] = self.ckpt_file
with open(self.input_file, "w") as fp:
json.dump(jdata, fp, indent=4)

def test_compressed_training(self):
run_dp(f"dp train {self.input_file}")
run_dp(f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_model}")
run_dp(f"dp compress -i {self.frozen_model} -o {self.compressed_model}")
# compress training
run_dp(f"dp train {self.input_file} -f {self.compressed_model}")
# restart compress training
run_dp(f"dp train {self.input_file} -r {self.ckpt_file}")
# freeze compress training
run_dp(
f"dp freeze -c {self.checkpoint_dir} -o {self.frozen_compress_training_model}"
)
# it should not be able to compress again
with self.assertRaises(RuntimeError):
run_dp(
f"dp compress -i {self.frozen_compress_training_model} -o {self.compressed_model}"
)
Loading