From f32104675e188891d4c1ab137d6ca6401b709f72 Mon Sep 17 00:00:00 2001 From: nahso <47053538+nahso@users.noreply.github.com> Date: Fri, 24 Nov 2023 09:20:51 +0800 Subject: [PATCH] Add the missing initializations for extra embedding variables (#3005) --- deepmd/descriptor/se_a.py | 41 ++-- deepmd/descriptor/se_atten.py | 49 ++--- deepmd/utils/compress.py | 6 +- deepmd/utils/graph.py | 85 ++++++++ source/tests/test_init_frz_model_se_a_tebd.py | 196 ++++++++++++++++++ 5 files changed, 331 insertions(+), 46 deletions(-) create mode 100644 source/tests/test_init_frz_model_se_a_tebd.py diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index 2de0b63245..6b0a7f7ab1 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -41,6 +41,8 @@ GraphWithoutTensorError, ) from deepmd.utils.graph import ( + get_extra_embedding_net_suffix, + get_extra_embedding_net_variables_from_graph_def, get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, ) @@ -204,7 +206,7 @@ def __init__( self.type_one_side = type_one_side self.spin = spin self.stripped_type_embedding = stripped_type_embedding - self.extra_embeeding_net_variables = None + self.extra_embedding_net_variables = None self.layer_size = len(neuron) # extend sel_a for spin system @@ -470,11 +472,13 @@ def enable_compression( ) if self.stripped_type_embedding: + one_side_suffix = get_extra_embedding_net_suffix(type_one_side=True) + two_side_suffix = get_extra_embedding_net_suffix(type_one_side=False) ret_two_side = get_pattern_nodes_from_graph_def( - graph_def, f"filter_type_all{suffix}/.+_two_side_ebd" + graph_def, f"filter_type_all{suffix}/.+{two_side_suffix}" ) ret_one_side = get_pattern_nodes_from_graph_def( - graph_def, f"filter_type_all{suffix}/.+_one_side_ebd" + graph_def, f"filter_type_all{suffix}/.+{one_side_suffix}" ) if len(ret_two_side) == 0 and len(ret_one_side) == 0: raise RuntimeError( @@ -487,19 +491,19 @@ def enable_compression( elif len(ret_two_side) != 0: self.final_type_embedding = get_two_side_type_embedding(self, graph) self.matrix = get_extra_side_embedding_net_variable( - self, graph_def, "two_side", "matrix", suffix + self, graph_def, two_side_suffix, "matrix", suffix ) self.bias = get_extra_side_embedding_net_variable( - self, graph_def, "two_side", "bias", suffix + self, graph_def, two_side_suffix, "bias", suffix ) self.extra_embedding = make_data(self, self.final_type_embedding) else: self.final_type_embedding = get_type_embedding(self, graph) self.matrix = get_extra_side_embedding_net_variable( - self, graph_def, "one_side", "matrix", suffix + self, graph_def, one_side_suffix, "matrix", suffix ) self.bias = get_extra_side_embedding_net_variable( - self, graph_def, "one_side", "bias", suffix + self, graph_def, one_side_suffix, "bias", suffix ) self.extra_embedding = make_data(self, self.final_type_embedding) @@ -961,20 +965,21 @@ def _filter_lower( if not self.compress: if self.type_one_side: - one_side_type_embedding_suffix = "_one_side_ebd" net_output = embedding_net( type_embedding, self.filter_neuron, self.filter_precision, activation_fn=activation_fn, resnet_dt=self.filter_resnet_dt, - name_suffix=one_side_type_embedding_suffix, + name_suffix=get_extra_embedding_net_suffix( + self.type_one_side + ), stddev=stddev, bavg=bavg, seed=self.seed, trainable=trainable, uniform_seed=self.uniform_seed, - initial_variables=self.extra_embeeding_net_variables, + initial_variables=self.extra_embedding_net_variables, mixed_prec=self.mixed_prec, ) net_output = tf.nn.embedding_lookup( @@ -1004,20 +1009,21 @@ def _filter_lower( index_of_two_side = tf.reshape(idx, [-1]) self.extra_embedding_index = index_of_two_side - two_side_type_embedding_suffix = "_two_side_ebd" net_output = embedding_net( two_side_type_embedding, self.filter_neuron, self.filter_precision, activation_fn=activation_fn, resnet_dt=self.filter_resnet_dt, - name_suffix=two_side_type_embedding_suffix, + name_suffix=get_extra_embedding_net_suffix( + self.type_one_side + ), stddev=stddev, bavg=bavg, seed=self.seed, trainable=trainable, uniform_seed=self.uniform_seed, - initial_variables=self.extra_embeeding_net_variables, + initial_variables=self.extra_embedding_net_variables, mixed_prec=self.mixed_prec, ) net_output = tf.nn.embedding_lookup(net_output, idx) @@ -1327,6 +1333,15 @@ def init_variables( self.dstd = new_dstd if self.original_sel is None: self.original_sel = sel + if self.stripped_type_embedding: + self.extra_embedding_net_variables = ( + get_extra_embedding_net_variables_from_graph_def( + graph_def, + suffix, + get_extra_embedding_net_suffix(self.type_one_side), + self.layer_size, + ) + ) @property def explicit_ntypes(self) -> bool: diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py index 8e4c3c3ef6..1ceda23065 100644 --- a/deepmd/descriptor/se_atten.py +++ b/deepmd/descriptor/se_atten.py @@ -42,9 +42,10 @@ ) from deepmd.utils.graph import ( get_attention_layer_variables_from_graph_def, + get_extra_embedding_net_suffix, + get_extra_embedding_net_variables_from_graph_def, get_pattern_nodes_from_graph_def, get_tensor_by_name_from_graph, - get_tensor_by_type, ) from deepmd.utils.network import ( embedding_net, @@ -391,11 +392,12 @@ def enable_compression( raise RuntimeError("can not compress model when attention layer is not 0.") ret = get_pattern_nodes_from_graph_def( - graph_def, f"filter_type_all{suffix}/.+_two_side_ebd" + graph_def, + f"filter_type_all{suffix}/.+{get_extra_embedding_net_suffix(type_one_side=False)}", ) if len(ret) == 0: raise RuntimeError( - "can not find variables of embedding net `*_two_side_ebd` from graph_def, maybe it is not a compressible model." + f"can not find variables of embedding net `*{get_extra_embedding_net_suffix(type_one_side=False)}` from graph_def, maybe it is not a compressible model." ) self.compress = True @@ -420,11 +422,12 @@ def enable_compression( ) self.final_type_embedding = get_two_side_type_embedding(self, graph) + type_side_suffix = get_extra_embedding_net_suffix(type_one_side=False) self.matrix = get_extra_side_embedding_net_variable( - self, graph_def, "two_side", "matrix", suffix + self, graph_def, type_side_suffix, "matrix", suffix ) self.bias = get_extra_side_embedding_net_variable( - self, graph_def, "two_side", "bias", suffix + self, graph_def, type_side_suffix, "bias", suffix ) self.two_embd = make_data(self, self.final_type_embedding) @@ -1125,14 +1128,15 @@ def _filter_lower( two_side_type_embedding, [-1, two_side_type_embedding.shape[-1]], ) - two_side_type_embedding_suffix = "_two_side_ebd" embedding_of_two_side_type_embedding = embedding_net( two_side_type_embedding, self.filter_neuron, self.filter_precision, activation_fn=activation_fn, resnet_dt=self.filter_resnet_dt, - name_suffix=two_side_type_embedding_suffix, + name_suffix=get_extra_embedding_net_suffix( + type_one_side=False + ), stddev=stddev, bavg=bavg, seed=self.seed, @@ -1292,18 +1296,6 @@ def init_variables( """ super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix) - if self.stripped_type_embedding: - self.two_side_embeeding_net_variables = {} - for i in range(1, self.layer_size + 1): - matrix_pattern = f"filter_type_all{suffix}/matrix_{i}_two_side_ebd" - self.two_side_embeeding_net_variables[ - matrix_pattern - ] = self._get_two_embed_variables(graph_def, matrix_pattern) - bias_pattern = f"filter_type_all{suffix}/bias_{i}_two_side_ebd" - self.two_side_embeeding_net_variables[ - bias_pattern - ] = self._get_two_embed_variables(graph_def, bias_pattern) - self.attention_layer_variables = get_attention_layer_variables_from_graph_def( graph_def, suffix=suffix ) @@ -1322,18 +1314,15 @@ def init_variables( f"attention_layer_{i}{suffix}/layer_normalization_{i}/gamma" ] - def _get_two_embed_variables(self, graph_def, pattern: str): - node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern] - dtype = tf.as_dtype(node.dtype).as_numpy_dtype - tensor_shape = tf.TensorShape(node.tensor_shape).as_list() - if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): - tensor_value = np.frombuffer( - node.tensor_content, - dtype=tf.as_dtype(node.dtype).as_numpy_dtype, + if self.stripped_type_embedding: + self.two_side_embeeding_net_variables = ( + get_extra_embedding_net_variables_from_graph_def( + graph_def, + suffix, + get_extra_embedding_net_suffix(type_one_side=False), + self.layer_size, + ) ) - else: - tensor_value = get_tensor_by_type(node, dtype) - return np.reshape(tensor_value, tensor_shape) def build_type_exclude_mask( self, diff --git a/deepmd/utils/compress.py b/deepmd/utils/compress.py index c6e68dfe19..7a79dec520 100644 --- a/deepmd/utils/compress.py +++ b/deepmd/utils/compress.py @@ -43,15 +43,15 @@ def get_two_side_type_embedding(self, graph): def get_extra_side_embedding_net_variable( - self, graph_def, type_side, varialbe_name, suffix + self, graph_def, type_side_suffix, varialbe_name, suffix ): ret = {} for i in range(1, self.layer_size + 1): target = get_pattern_nodes_from_graph_def( graph_def, - f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd", + f"filter_type_all{suffix}/{varialbe_name}_{i}{type_side_suffix}", ) - node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}_{type_side}_ebd"] + node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}{type_side_suffix}"] ret["layer_" + str(i)] = node return ret diff --git a/deepmd/utils/graph.py b/deepmd/utils/graph.py index 2a795a45a2..ad4ee0224a 100644 --- a/deepmd/utils/graph.py +++ b/deepmd/utils/graph.py @@ -237,6 +237,91 @@ def get_embedding_net_variables_from_graph_def( return embedding_net_variables +def get_extra_embedding_net_suffix(type_one_side: bool): + """Get the extra embedding net suffix according to the value of type_one_side. + + Parameters + ---------- + type_one_side + The value of type_one_side + + Returns + ------- + str + The extra embedding net suffix + """ + if type_one_side: + extra_suffix = "_one_side_ebd" + else: + extra_suffix = "_two_side_ebd" + return extra_suffix + + +def get_variables_from_graph_def_as_numpy_array(graph_def: tf.GraphDef, pattern: str): + """Get variables from the given tf.GraphDef object, with numpy array returns. + + Parameters + ---------- + graph_def + The input tf.GraphDef object + pattern : str + The name of variable + + Returns + ------- + np.ndarray + The numpy array of the variable + """ + node = get_pattern_nodes_from_graph_def(graph_def, pattern)[pattern] + dtype = tf.as_dtype(node.dtype).as_numpy_dtype + tensor_shape = tf.TensorShape(node.tensor_shape).as_list() + if (len(tensor_shape) != 1) or (tensor_shape[0] != 1): + tensor_value = np.frombuffer( + node.tensor_content, + dtype=tf.as_dtype(node.dtype).as_numpy_dtype, + ) + else: + tensor_value = get_tensor_by_type(node, dtype) + return np.reshape(tensor_value, tensor_shape) + + +def get_extra_embedding_net_variables_from_graph_def( + graph_def: tf.GraphDef, suffix: str, extra_suffix: str, layer_size: int +): + """Get extra embedding net variables from the given tf.GraphDef object. + The "extra embedding net" means the embedding net with only type embeddings input, + which occurs in "se_atten_v2" and "se_a_ebd_v2" descriptor. + + Parameters + ---------- + graph_def + The input tf.GraphDef object + suffix : str + The "common" suffix in the descriptor + extra_suffix : str + This value depends on the value of "type_one_side". + It should always be "_one_side_ebd" or "_two_side_ebd" + layer_size : int + The layer size of the embedding net + + Returns + ------- + Dict + The extra embedding net variables within the given tf.GraphDef object + """ + extra_embedding_net_variables = {} + for i in range(1, layer_size + 1): + matrix_pattern = f"filter_type_all{suffix}/matrix_{i}{extra_suffix}" + extra_embedding_net_variables[ + matrix_pattern + ] = get_variables_from_graph_def_as_numpy_array(graph_def, matrix_pattern) + bias_pattern = f"filter_type_all{suffix}/bias_{i}{extra_suffix}" + extra_embedding_net_variables[ + bias_pattern + ] = get_variables_from_graph_def_as_numpy_array(graph_def, bias_pattern) + return extra_embedding_net_variables + + def get_embedding_net_variables(model_file: str, suffix: str = "") -> Dict: """Get the embedding net variables with the given frozen model(model_file). diff --git a/source/tests/test_init_frz_model_se_a_tebd.py b/source/tests/test_init_frz_model_se_a_tebd.py new file mode 100644 index 0000000000..e54cae9781 --- /dev/null +++ b/source/tests/test_init_frz_model_se_a_tebd.py @@ -0,0 +1,196 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import unittest + +import numpy as np +from common import ( + j_loader, + run_dp, + tests_path, +) + +from deepmd.env import ( + GLOBAL_NP_FLOAT_PRECISION, + tf, +) +from deepmd.train.run_options import ( + RunOptions, +) +from deepmd.train.trainer import ( + DPTrainer, +) +from deepmd.utils.argcheck import ( + normalize, +) +from deepmd.utils.compat import ( + update_deepmd_input, +) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) + +if GLOBAL_NP_FLOAT_PRECISION == np.float32: + default_places = 4 +else: + default_places = 10 + + +def _file_delete(file): + if os.path.isdir(file): + os.rmdir(file) + elif os.path.isfile(file): + os.remove(file) + + +def _init_models(): + data_file = str(tests_path / os.path.join("init_frz_model", "data")) + frozen_model = str(tests_path / "init_frz_se_a_tebd.pb") + ckpt = str(tests_path / "init_frz_se_a_tebd.ckpt") + run_opt_ckpt = RunOptions(init_model=ckpt, log_level=20) + run_opt_frz = RunOptions(init_frz_model=frozen_model, log_level=20) + INPUT = str(tests_path / "input.json") + jdata = j_loader(str(tests_path / os.path.join("init_frz_model", "input.json"))) + jdata["model"]["descriptor"]["type"] = "se_a_ebd_v2" + jdata["training"]["training_data"]["systems"] = data_file + jdata["training"]["validation_data"]["systems"] = data_file + jdata["training"]["save_ckpt"] = ckpt + with open(INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + ret = run_dp("dp train " + INPUT) + np.testing.assert_equal(ret, 0, "DP train failed!") + ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model) + np.testing.assert_equal(ret, 0, "DP freeze failed!") + + jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json") + jdata = normalize(jdata) + model_ckpt = DPTrainer(jdata, run_opt=run_opt_ckpt) + model_frz = DPTrainer(jdata, run_opt=run_opt_frz) + rcut = model_ckpt.model.get_rcut() + type_map = model_ckpt.model.get_type_map() + data = DeepmdDataSystem( + systems=[data_file], + batch_size=1, + test_size=1, + rcut=rcut, + type_map=type_map, + trn_all_set=True, + ) + data_requirement = { + "energy": { + "ndof": 1, + "atomic": False, + "must": False, + "high_prec": True, + "type_sel": None, + "repeat": 1, + "default": 0.0, + }, + "force": { + "ndof": 3, + "atomic": True, + "must": False, + "high_prec": False, + "type_sel": None, + "repeat": 1, + "default": 0.0, + }, + "virial": { + "ndof": 9, + "atomic": False, + "must": False, + "high_prec": False, + "type_sel": None, + "repeat": 1, + "default": 0.0, + }, + "atom_ener": { + "ndof": 1, + "atomic": True, + "must": False, + "high_prec": False, + "type_sel": None, + "repeat": 1, + "default": 0.0, + }, + "atom_pref": { + "ndof": 1, + "atomic": True, + "must": False, + "high_prec": False, + "type_sel": None, + "repeat": 3, + "default": 0.0, + }, + } + data.add_dict(data_requirement) + stop_batch = jdata["training"]["numb_steps"] + + return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch + + +( + INPUT, + CKPT, + FROZEN_MODEL, + CKPT_TRAINER, + FRZ_TRAINER, + VALID_DATA, + STOP_BATCH, +) = _init_models() + + +class TestInitFrzModelA(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.dp_ckpt = CKPT_TRAINER + cls.dp_frz = FRZ_TRAINER + cls.valid_data = VALID_DATA + cls.stop_batch = STOP_BATCH + + @classmethod + def tearDownClass(cls): + _file_delete(INPUT) + _file_delete(FROZEN_MODEL) + _file_delete("out.json") + _file_delete(str(tests_path / "checkpoint")) + _file_delete(CKPT + ".meta") + _file_delete(CKPT + ".index") + _file_delete(CKPT + ".data-00000-of-00001") + _file_delete(CKPT + "-0.meta") + _file_delete(CKPT + "-0.index") + _file_delete(CKPT + "-0.data-00000-of-00001") + _file_delete(CKPT + "-1.meta") + _file_delete(CKPT + "-1.index") + _file_delete(CKPT + "-1.data-00000-of-00001") + _file_delete("input_v2_compat.json") + _file_delete("lcurve.out") + + def test_single_frame(self): + valid_batch = self.valid_data.get_batch() + natoms = valid_batch["natoms_vec"] + tf.reset_default_graph() + self.dp_ckpt.build(self.valid_data, self.stop_batch) + self.dp_ckpt._init_session() + feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False) + ckpt_rmse_ckpt = self.dp_ckpt.loss.eval( + self.dp_ckpt.sess, feed_dict_ckpt, natoms + ) + tf.reset_default_graph() + + self.dp_frz.build(self.valid_data, self.stop_batch) + self.dp_frz._init_session() + feed_dict_frz = self.dp_frz.get_feed_dict(valid_batch, is_training=False) + ckpt_rmse_frz = self.dp_frz.loss.eval(self.dp_frz.sess, feed_dict_frz, natoms) + tf.reset_default_graph() + + # check values + np.testing.assert_almost_equal( + ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places + ) + np.testing.assert_almost_equal( + ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places + ) + np.testing.assert_almost_equal( + ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places + )