From 268591cf8e03a8d23256efe711e5b3c3a4b71ce6 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 8 Mar 2024 03:24:49 -0500 Subject: [PATCH 1/4] pt: make get_data non-blocking (#3422) `to(DEVICE)` is cpu-blocking but `to(DEVICE, non-blocking=True)` is not blocking. This improves performance by at least 0.1s/100 steps. Before, `get_data` is blocking: ![1709698811097](https://github.com/deepmodeling/deepmd-kit/assets/9496702/b86b3928-41e7-46d3-8692-ca96b3a6475a) ![1709698811150](https://github.com/deepmodeling/deepmd-kit/assets/9496702/c4365203-3f3d-4de8-aae6-d8587f0e95a0) After, `get_data` is not blocking: ![1709698811122](https://github.com/deepmodeling/deepmd-kit/assets/9496702/d991c8f0-35c8-4b5d-822e-77af961e9b6e) ![1709698811169](https://github.com/deepmodeling/deepmd-kit/assets/9496702/a56160c2-78c7-4a44-aa96-1df0b520a60a) The subsequent blocking is `phys2inter` (via `torch.linalg.inv`). Signed-off-by: Jinzhe Zeng --- deepmd/pt/train/training.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 6938db9b3c..93afc38575 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -973,9 +973,11 @@ def get_data(self, is_train=True, task_key="Default"): continue elif not isinstance(batch_data[key], list): if batch_data[key] is not None: - batch_data[key] = batch_data[key].to(DEVICE) + batch_data[key] = batch_data[key].to(DEVICE, non_blocking=True) else: - batch_data[key] = [item.to(DEVICE) for item in batch_data[key]] + batch_data[key] = [ + item.to(DEVICE, non_blocking=True) for item in batch_data[key] + ] # we may need a better way to classify which are inputs and which are labels # now wrapper only supports the following inputs: input_keys = [ From fefc0e6cf64e3dcd6d78e9ce8707f4fc8c2a3b17 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 8 Mar 2024 03:25:01 -0500 Subject: [PATCH 2/4] pt: fix print_on_training when there is no validation data (#3423) #3405 changed results from `None` to `{}` but `print_on_training` wasn't revised. --- deepmd/pt/train/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 93afc38575..62bc5a4c97 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1037,7 +1037,7 @@ def print_on_training(self, fout, step_id, cur_lr, train_results, valid_results) print_str = "" print_str += "%7d" % step_id if not self.multi_task: - if valid_results is not None: + if valid_results: prop_fmt = " %11.2e %11.2e" for k in train_keys: print_str += prop_fmt % (valid_results[k], train_results[k]) @@ -1047,7 +1047,7 @@ def print_on_training(self, fout, step_id, cur_lr, train_results, valid_results) print_str += prop_fmt % (train_results[k]) else: for model_key in self.model_keys: - if valid_results[model_key] is not None: + if valid_results[model_key]: prop_fmt = " %11.2e %11.2e" for k in sorted(valid_results[model_key].keys()): print_str += prop_fmt % ( From dabbd35cfcc0c75eff7e567c69f6c8b228e14cbe Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 8 Mar 2024 03:25:17 -0500 Subject: [PATCH 3/4] Consistent activation functions between backends (#3431) 1. add relu, gelu, gelu_tf, relu6, softplus, sigmoid, and linear to dpmodel; 2. add gelu_tf, relu6, soft6, softplus, and sigmoid to pt; 3. change gelu in pt from non-approximate to approximate. If one still wants to use the non-approximate version, we may consider to add a new key; 4. add linear to tf; 5. none in tf now returns `lambda x: x` instead of `None` to be type consistent; 6. support uppercase in all backends; 7. add consistent tests. Signed-off-by: Jinzhe Zeng --- deepmd/common.py | 10 +++- deepmd/dpmodel/utils/network.py | 59 +++++++++++++++++--- deepmd/pt/utils/utils.py | 20 +++++-- deepmd/tf/common.py | 13 ++--- source/tests/consistent/test_activation.py | 63 ++++++++++++++++++++++ 5 files changed, 146 insertions(+), 19 deletions(-) create mode 100644 source/tests/consistent/test_activation.py diff --git a/deepmd/common.py b/deepmd/common.py index 29d32111a8..c776975591 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -52,7 +52,15 @@ _DICT_VAL = TypeVar("_DICT_VAL") _PRECISION = Literal["default", "float16", "float32", "float64"] _ACTIVATION = Literal[ - "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf" + "relu", + "relu6", + "softplus", + "sigmoid", + "tanh", + "gelu", + "gelu_tf", + "none", + "linear", ] __all__.extend( [ diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index feb3355e77..6206367b1b 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -10,6 +10,7 @@ datetime, ) from typing import ( + Callable, ClassVar, Dict, List, @@ -309,14 +310,7 @@ def call(self, x: np.ndarray) -> np.ndarray: """ if self.w is None or self.activation_function is None: raise ValueError("w, b, and activation_function must be set") - if self.activation_function == "tanh": - fn = np.tanh - elif self.activation_function.lower() == "none": - - def fn(x): - return x - else: - raise NotImplementedError(self.activation_function) + fn = get_activation_fn(self.activation_function) y = ( np.matmul(x, self.w) + self.b if self.b is not None @@ -332,6 +326,55 @@ def fn(x): return y +def get_activation_fn(activation_function: str) -> Callable[[np.ndarray], np.ndarray]: + activation_function = activation_function.lower() + if activation_function == "tanh": + return np.tanh + elif activation_function == "relu": + + def fn(x): + # https://stackoverflow.com/a/47936476/9567349 + return x * (x > 0) + + return fn + elif activation_function in ("gelu", "gelu_tf"): + + def fn(x): + # generated by GitHub Copilot + return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3))) + + return fn + elif activation_function == "relu6": + + def fn(x): + # generated by GitHub Copilot + return np.minimum(np.maximum(x, 0), 6) + + return fn + elif activation_function == "softplus": + + def fn(x): + # generated by GitHub Copilot + return np.log(1 + np.exp(x)) + + return fn + elif activation_function == "sigmoid": + + def fn(x): + # generated by GitHub Copilot + return 1 / (1 + np.exp(-x)) + + return fn + elif activation_function.lower() in ("none", "linear"): + + def fn(x): + return x + + return fn + else: + raise NotImplementedError(activation_function) + + def make_multilayer_network(T_NetworkLayer, ModuleBase): class NN(ModuleBase): """Native representation of a neural network. diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index f5a4cd84b6..10dcadadac 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -21,10 +21,16 @@ def get_activation_fn(activation: str) -> Callable: """Returns the activation function corresponding to `activation`.""" if activation.lower() == "relu": return F.relu - elif activation.lower() == "gelu": - return F.gelu + elif activation.lower() == "gelu" or activation.lower() == "gelu_tf": + return lambda x: F.gelu(x, approximate="tanh") elif activation.lower() == "tanh": return torch.tanh + elif activation.lower() == "relu6": + return F.relu6 + elif activation.lower() == "softplus": + return F.softplus + elif activation.lower() == "sigmoid": + return torch.sigmoid elif activation.lower() == "linear" or activation.lower() == "none": return lambda x: x else: @@ -42,10 +48,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: if self.activation.lower() == "relu": return F.relu(x) - elif self.activation.lower() == "gelu": - return F.gelu(x) + elif self.activation.lower() == "gelu" or self.activation.lower() == "gelu_tf": + return F.gelu(x, approximate="tanh") elif self.activation.lower() == "tanh": return torch.tanh(x) + elif self.activation.lower() == "relu6": + return F.relu6(x) + elif self.activation.lower() == "softplus": + return F.softplus(x) + elif self.activation.lower() == "sigmoid": + return torch.sigmoid(x) elif self.activation.lower() == "linear" or self.activation.lower() == "none": return x else: diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py index b1872e72ed..0d59990a29 100644 --- a/deepmd/tf/common.py +++ b/deepmd/tf/common.py @@ -135,14 +135,14 @@ def gelu_wrapper(x): "tanh": tf.nn.tanh, "gelu": gelu, "gelu_tf": gelu_tf, - "None": None, - "none": None, + "linear": lambda x: x, + "none": lambda x: x, } def get_activation_func( activation_fn: Union["_ACTIVATION", None], -) -> Union[Callable[[tf.Tensor], tf.Tensor], None]: +) -> Callable[[tf.Tensor], tf.Tensor]: """Get activation function callable based on string name. Parameters @@ -161,10 +161,11 @@ def get_activation_func( if unknown activation function is specified """ if activation_fn is None: - return None - if activation_fn not in ACTIVATION_FN_DICT: + activation_fn = "none" + assert activation_fn is not None + if activation_fn.lower() not in ACTIVATION_FN_DICT: raise RuntimeError(f"{activation_fn} is not a valid activation function") - return ACTIVATION_FN_DICT[activation_fn] + return ACTIVATION_FN_DICT[activation_fn.lower()] def get_precision(precision: "_PRECISION") -> Any: diff --git a/source/tests/consistent/test_activation.py b/source/tests/consistent/test_activation.py new file mode 100644 index 0000000000..bb06df9082 --- /dev/null +++ b/source/tests/consistent/test_activation.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import unittest + +import numpy as np + +from deepmd.dpmodel.utils.network import get_activation_fn as get_activation_fn_dp + +from .common import ( + INSTALLED_PT, + INSTALLED_TF, + parameterized, +) + +if INSTALLED_PT: + from deepmd.pt.utils.utils import get_activation_fn as get_activation_fn_pt + from deepmd.pt.utils.utils import ( + to_numpy_array, + to_torch_tensor, + ) +if INSTALLED_TF: + from deepmd.tf.common import get_activation_func as get_activation_fn_tf + from deepmd.tf.env import ( + tf, + ) + + +@parameterized( + ( + "Relu", + "Relu6", + "Softplus", + "Sigmoid", + "Tanh", + "Gelu", + "Gelu_tf", + "Linear", + "None", + ), +) +class TestActivationFunctionConsistent(unittest.TestCase): + def setUp(self): + (self.activation,) = self.param + self.random_input = np.random.default_rng().normal(scale=10, size=(10, 10)) + self.ref = get_activation_fn_dp(self.activation)(self.random_input) + + @unittest.skipUnless(INSTALLED_TF, "TensorFlow is not installed") + def test_tf_consistent_with_ref(self): + if INSTALLED_TF: + place_holder = tf.placeholder(tf.float64, self.random_input.shape) + t_test = get_activation_fn_tf(self.activation)(place_holder) + with tf.Session() as sess: + test = sess.run(t_test, feed_dict={place_holder: self.random_input}) + np.testing.assert_allclose(self.ref, test, atol=1e-10) + + @unittest.skipUnless(INSTALLED_PT, "PyTorch is not installed") + def test_pt_consistent_with_ref(self): + if INSTALLED_PT: + test = to_numpy_array( + get_activation_fn_pt(self.activation)( + to_torch_tensor(self.random_input) + ) + ) + np.testing.assert_allclose(self.ref, test, atol=1e-10) From 66edd1f991254bd0c5dbbd3ff04774ac7c34c76d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 8 Mar 2024 03:26:51 -0500 Subject: [PATCH 4/4] fix errors when `dp` is executed without any subcommands (#3437) Signed-off-by: Jinzhe Zeng --- deepmd/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deepmd/main.py b/deepmd/main.py index 870a04a088..09457419e8 100644 --- a/deepmd/main.py +++ b/deepmd/main.py @@ -798,7 +798,8 @@ def main(): ): deepmd_main = BACKENDS[args.backend]().entry_point_hook elif args.command is None: - pass + # help message has been printed in parse_args + return else: raise RuntimeError(f"unknown command {args.command}")