diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index 2853a420c9..799a55e9ff 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -47,11 +47,9 @@ jobs:
         CMAKE_GENERATOR: Ninja
         CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }}
     # test lammps
-    # ASE issue: https://gitlab.com/ase/ase/-/merge_requests/2843
-    # TODO: remove ase version when ase has new release
     - run: |
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
-        python -m uv pip install --system -e .[cpu,test,lmp] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+        python -m uv pip install --system -e .[cpu,test,lmp] mpi4py
       env:
         DP_BUILD_TESTING: 1
       if: ${{ !matrix.check_memleak }}
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 3cb75ecf1a..d7ff72bafa 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -50,7 +50,7 @@ jobs:
     - run: python -m uv pip install --system "tensorflow>=2.15.0rc0" "torch>=2.2.0"
     - run: |
         export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
-        python -m uv pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
+        python -m uv pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py
       env:
         DP_VARIANT: cuda
         DP_ENABLE_NATIVE_OPTIMIZATION: 1
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index e612a9e4f0..2682d06520 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -1,6 +1,26 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Any,
+    Callable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
 import numpy as np
 
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+    NativeOP,
+)
+from deepmd.dpmodel.utils import (
+    EmbeddingNet,
+    EnvMat,
+    NetworkCollection,
+    PairExcludeMask,
+)
 from deepmd.dpmodel.utils.network import (
     LayerNorm,
     NativeLayer,
@@ -28,32 +48,6 @@
     check_version_compatibility,
 )
 
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
-from typing import (
-    Any,
-    Callable,
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
-
-from deepmd.dpmodel import (
-    DEFAULT_PRECISION,
-    PRECISION_DICT,
-    NativeOP,
-)
-from deepmd.dpmodel.utils import (
-    EmbeddingNet,
-    EnvMat,
-    NetworkCollection,
-    PairExcludeMask,
-)
-
 from .base_descriptor import (
     BaseDescriptor,
 )
diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
index a0a245c6c4..73cc06dfe1 100644
--- a/deepmd/dpmodel/descriptor/dpa2.py
+++ b/deepmd/dpmodel/descriptor/dpa2.py
@@ -1,6 +1,20 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
 import numpy as np
 
+from deepmd.dpmodel import (
+    NativeOP,
+)
+from deepmd.dpmodel.utils import (
+    EnvMat,
+    NetworkCollection,
+)
 from deepmd.dpmodel.utils.network import (
     Identity,
     NativeLayer,
@@ -25,30 +39,10 @@
     check_version_compatibility,
 )
 
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
-from typing import (
-    List,
-    Optional,
-    Tuple,
-    Union,
-)
-
-from deepmd.dpmodel import (
-    NativeOP,
-)
-from deepmd.dpmodel.utils import (
-    EnvMat,
-    NetworkCollection,
-)
 from deepmd.utils.finetune import (
     get_index_between_two_maps,
     map_pair_exclude_types,
 )
-
 from .base_descriptor import (
     BaseDescriptor,
 )
diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py
index f0275b44b1..c9ac48efec 100644
--- a/deepmd/dpmodel/descriptor/repformers.py
+++ b/deepmd/dpmodel/descriptor/repformers.py
@@ -1,22 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import numpy as np
-
-from deepmd.dpmodel.utils.network import (
-    LayerNorm,
-    NativeLayer,
-)
-from deepmd.utils.path import (
-    DPPath,
-)
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
-
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
 from typing import (
     Callable,
     List,
@@ -25,6 +7,8 @@
     Union,
 )
 
+import numpy as np
+
 from deepmd.dpmodel import (
     PRECISION_DICT,
     NativeOP,
@@ -34,8 +18,16 @@
     PairExcludeMask,
 )
 from deepmd.dpmodel.utils.network import (
+    LayerNorm,
+    NativeLayer,
     get_activation_fn,
 )
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .descriptor import (
     DescriptorBlock,
diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py
index c51d297849..55beceb4d6 100644
--- a/deepmd/dpmodel/descriptor/se_e2_a.py
+++ b/deepmd/dpmodel/descriptor/se_e2_a.py
@@ -1,30 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import itertools
-
-import numpy as np
-
-from deepmd.dpmodel.utils.update_sel import (
-    UpdateSel,
-)
-from deepmd.env import (
-    GLOBAL_NP_FLOAT_PRECISION,
-)
-from deepmd.utils.data_system import (
-    DeepmdDataSystem,
-)
-from deepmd.utils.path import (
-    DPPath,
-)
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
-
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
 import copy
+import itertools
 from typing import (
     Any,
     List,
@@ -32,6 +8,8 @@
     Tuple,
 )
 
+import numpy as np
+
 from deepmd.dpmodel import (
     DEFAULT_PRECISION,
     PRECISION_DICT,
@@ -43,6 +21,21 @@
     NetworkCollection,
     PairExcludeMask,
 )
+from deepmd.dpmodel.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .base_descriptor import (
     BaseDescriptor,
diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py
index 238421be41..47e9d6d31a 100644
--- a/deepmd/dpmodel/descriptor/se_r.py
+++ b/deepmd/dpmodel/descriptor/se_r.py
@@ -1,24 +1,4 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-import numpy as np
-
-from deepmd.dpmodel.utils.update_sel import (
-    UpdateSel,
-)
-from deepmd.utils.data_system import (
-    DeepmdDataSystem,
-)
-from deepmd.utils.path import (
-    DPPath,
-)
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
-
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
 import copy
 from typing import (
     Any,
@@ -27,6 +7,8 @@
     Tuple,
 )
 
+import numpy as np
+
 from deepmd.dpmodel import (
     DEFAULT_PRECISION,
     PRECISION_DICT,
@@ -38,9 +20,21 @@
     NetworkCollection,
     PairExcludeMask,
 )
+from deepmd.dpmodel.utils.update_sel import (
+    UpdateSel,
+)
 from deepmd.env import (
     GLOBAL_NP_FLOAT_PRECISION,
 )
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 from .base_descriptor import (
     BaseDescriptor,
diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py
index 2dd1ddcc32..671f92ecd5 100644
--- a/deepmd/dpmodel/descriptor/se_t.py
+++ b/deepmd/dpmodel/descriptor/se_t.py
@@ -1,8 +1,25 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
 import itertools
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
 
+from deepmd.dpmodel import (
+    DEFAULT_PRECISION,
+    PRECISION_DICT,
+    NativeOP,
+)
+from deepmd.dpmodel.utils import (
+    EmbeddingNet,
+    EnvMat,
+    NetworkCollection,
+    PairExcludeMask,
+)
 from deepmd.dpmodel.utils.update_sel import (
     UpdateSel,
 )
@@ -19,30 +36,6 @@
     check_version_compatibility,
 )
 
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
-import copy
-from typing import (
-    List,
-    Optional,
-    Tuple,
-)
-
-from deepmd.dpmodel import (
-    DEFAULT_PRECISION,
-    PRECISION_DICT,
-    NativeOP,
-)
-from deepmd.dpmodel.utils import (
-    EmbeddingNet,
-    EnvMat,
-    NetworkCollection,
-    PairExcludeMask,
-)
-
 from .base_descriptor import (
     BaseDescriptor,
 )
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 2a68b793d4..7a17cc459d 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -17,20 +17,14 @@
 
 import numpy as np
 
-from deepmd.utils.version import (
-    check_version_compatibility,
-)
-
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
 from deepmd.dpmodel import (
     DEFAULT_PRECISION,
     PRECISION_DICT,
     NativeOP,
 )
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
 
 
 class Identity(NativeOP):
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 26619da839..86f515e3a0 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -288,7 +288,7 @@ def train(FLAGS):
 
 def freeze(FLAGS):
     model = torch.jit.script(inference.Tester(FLAGS.model, head=FLAGS.head).model)
-    if '"type": "dpa2"' in model.model_def_script:
+    if '"type": "dpa2"' in model.get_model_def_script():
         extra_files = {"type": "dpa2"}
     else:
         extra_files = {"type": "else"}
diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py
index e5ea339fb7..dada7ed1b8 100644
--- a/deepmd/pt/model/network/mlp.py
+++ b/deepmd/pt/model/network/mlp.py
@@ -41,11 +41,6 @@
     to_torch_tensor,
 )
 
-try:
-    from deepmd._version import version as __version__
-except ImportError:
-    __version__ = "unknown"
-
 
 def empty_t(shape, precision):
     return torch.empty(shape, dtype=precision, device=device)
diff --git a/deepmd/tf/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py
index 1d3be03c89..476e6515eb 100644
--- a/deepmd/tf/descriptor/se_r.py
+++ b/deepmd/tf/descriptor/se_r.py
@@ -769,9 +769,6 @@ def serialize(self, suffix: str = "") -> dict:
             raise NotImplementedError("spin is unsupported")
         assert self.davg is not None
         assert self.dstd is not None
-        # TODO: tf: handle type embedding in DescrptSeR.serialize
-        # not sure how to handle type embedding - type embedding is not a model parameter,
-        # but instead a part of the input data. Maybe the interface should be refactored...
         return {
             "@class": "Descriptor",
             "type": "se_r",
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 5bebdf580d..acd1a500a7 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -21,7 +21,11 @@ Attention-based descriptor $\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}$, whic
 ```
 
 where $\hat{\mathcal{G}}^i$ represents the embedding matrix $\mathcal{G}^i$ after additional self-attention mechanism and $\mathcal{R}^i$ is defined by the full case in the [`se_e2_a`](./train-se-e2-a.md).
-Note that we obtain $\mathcal{G}^i$ using the type embedding method by default in this descriptor.
+Note that we obtain $\mathcal{G}^i$ using the type embedding method by default in this descriptor. By default, we concat $s(r_{ij})$ and the type embeddings of central and neighboring atoms $\mathcal{A}^i$ and $\mathcal{A}^j$ as input of the embedding network $\mathcal{N}_{e,2}$:
+
+```math
+   (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^i, \mathcal{A}^j\})  \quad \mathrm{or}\quad(\mathcal{G}^i)_j = \mathcal{N}_{e,2}(\{s(r_{ij}), \mathcal{A}^j\})
+```
 
 To perform the self-attention mechanism, the queries $\mathcal{Q}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, keys $\mathcal{K}^{i,l} \in \mathbb{R}^{N_c\times d_k}$, and values $\mathcal{V}^{i,l} \in \mathbb{R}^{N_c\times d_v}$ are first obtained:
 
@@ -122,6 +126,16 @@ We highly recommend using the version 2.0 of the attention-based descriptor `"se
       "set_davg_zero": false
 ```
 
+When using PyTorch backend, you must continue to use descriptor `"se_atten"` and specify `tebd_input_mode` as `"strip"` and `smooth_type_embedding` as `"true"`, which achieves the effect of `"se_atten_v2"`. The `tebd_input_mode` can take `"concat"` and `"strip"` as values. When using TensorFlow backend, you need to use descriptor `"se_atten_v2"` and do not need to set `tebd_input_mode` and `smooth_type_embedding` because the default value of `tebd_input_mode` is `"strip"`, and the default value of `smooth_type_embedding` is `"true"` in TensorFlow backend. When `tebd_input_mode` is set to `"strip"`, the embedding matrix $\mathcal{G}^i$ is constructed as:
+
+```math
+   (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})) + \mathcal{N}_{e,2}(s(r_{ij})) \odot ({N}_{e,2}(\{\mathcal{A}^i, \mathcal{A}^j\}) \odot s(r_{ij})) \quad \mathrm{or}
+```
+
+```math
+    (\mathcal{G}^i)_j = \mathcal{N}_{e,2}(s(r_{ij})) + \mathcal{N}_{e,2}(s(r_{ij})) \odot ({N}_{e,2}(\{\mathcal{A}^j\}) \odot s(r_{ij}))
+```
+
 Practical evidence demonstrates that `"se_atten_v2"` offers better and more stable performance compared to `"se_atten"`.
 
 Notice: Model compression for the `se_atten_v2` descriptor is exclusively designed for models with the training parameter {ref}`attn_layer <model/descriptor[se_atten_v2]/attn_layer>` set to 0.
diff --git a/pyproject.toml b/pyproject.toml
index 2f04857daf..75aab35936 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,9 @@ repository = "https://github.com/deepmodeling/deepmd-kit"
 [tool.deepmd_build_backend.optional-dependencies]
 test = [
     "dpdata>=0.2.7",
-    "ase",
+    # ASE issue: https://gitlab.com/ase/ase/-/merge_requests/2843
+    # fixed in 3.23.0
+    "ase>=3.23.0",
     "pytest",
     "pytest-cov",
     "pytest-sugar",