From b688534854d98a3eb7388361a70910b403841d49 Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:33:46 +0000
Subject: [PATCH 1/6] docs(argcheck): fix typos and wording in doc strings

Problem
- argcheck.py contains multiple doc string typos and awkward phrases
- some repeated descriptions are grammatically inconsistent and harder to read

Change
- fix spelling and grammar issues in descriptor and fitting doc strings
- normalize repeated wording for trainable flags, neighbor selection, and embedding descriptions
- keep the change documentation-only with no behavior changes

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 114 +++++++++++++++++++--------------------
 1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 5fe1d4f3f1..0c22759b2f 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -44,7 +44,7 @@
 doc_only_pt_expt_supported = "(Supported Backend: PyTorch Exportable) "
 doc_only_pd_supported = "(Supported Backend: Paddle) "
 # descriptors
-doc_loc_frame = "Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame."
+doc_loc_frame = "Defines a local frame at each atom, and computes the descriptor as local coordinates under this frame."
 doc_se_e2_a = "Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor."
 doc_se_e2_r = "Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor."
 doc_se_e3 = "Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor."
@@ -52,12 +52,12 @@
 doc_se_atten = "Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor."
 doc_se_atten_v2 = "Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism with new modifications will be used by this descriptor."
 doc_se_a_mask = "Used by the smooth edition of Deep Potential. It can accept a variable number of atoms in a frame (Non-PBC system). *aparam* are required as an indicator matrix for the real/virtual sign of input atoms."
-doc_hybrid = "Concatenate of a list of descriptors as a new descriptor."
+doc_hybrid = "Concatenates a list of descriptors into a new descriptor."
 # fitting
 doc_ener = "Fit an energy model (potential energy surface)."
-doc_dos = "Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has number of frames lines and number of energy grid columns (times number of atoms in `atom_dos.npy`). See `loss` parameter."
-doc_dipole = "Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter."
-doc_polar = "Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file with has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter."
+doc_dos = "Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has a number of frame lines and a number of energy-grid columns (times the number of atoms in `atom_dos.npy`). See `loss` parameter."
+doc_dipole = "Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times the number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter."
+doc_polar = "Fit an atomic polarizability model. Global polarizability labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file either has number of frames lines and 9 times the number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter."
 # modifier
 doc_dipole_charge = "Use WFCC to model the electronic structure of the system. Correct the long-range interaction."
 
@@ -100,12 +100,12 @@ def deprecate_something(data: dict | None) -> bool:
 
 
 def type_embedding_args() -> list[Argument]:
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_seed = "Random seed for parameter initialization"
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
     doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
 
@@ -252,9 +252,9 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> list[Argument]:
 @descrpt_args_plugin.register("loc_frame", doc=doc_only_tf_supported + doc_loc_frame)
 def descrpt_local_frame_args() -> list[Argument]:
     doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
-    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
+    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only the relative distances of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
     doc_rcut = "The cut-off radius. The default value is 6.0"
-    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
+    doc_axis_rule = "A list of integers. The length should be 6 times the number of types. \n\n\
 - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
 - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
 - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
@@ -272,18 +272,18 @@ def descrpt_local_frame_args() -> list[Argument]:
 
 @descrpt_args_plugin.register("se_e2_a", alias=["se_a"], doc=doc_se_e2_a)
 def descrpt_se_a_args() -> list[Argument]:
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
@@ -342,16 +342,16 @@ def descrpt_se_a_args() -> list[Argument]:
     "se_e3", alias=["se_at", "se_a_3be", "se_t"], doc=doc_se_e3
 )
 def descrpt_se_t_args() -> list[Argument]:
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
@@ -413,17 +413,17 @@ def descrpt_se_a_tpe_args() -> list[Argument]:
 
 @descrpt_args_plugin.register("se_e2_r", alias=["se_r"], doc=doc_se_e2_r)
 def descrpt_se_r_args() -> list[Argument]:
-    doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
@@ -489,26 +489,26 @@ def descrpt_hybrid_args() -> list[Argument]:
 
 
 def descrpt_se_atten_common_args() -> list[Argument]:
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If 'False', type embeddings of both neighbor and central atoms are considered. If 'True', only type embeddings of neighbor atoms are considered. Default is 'False'."
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_attn = "The length of hidden vectors in attention layers"
     doc_attn_layer = "The number of attention layers. Note that model compression of `se_atten` works for any attn_layer value (for pytorch backend only, for other backends, attn_layer=0 is still needed to compress) when tebd_input_mode=='strip'. When attn_layer!=0, only type embedding is compressed, geometric parts are not compressed."
     doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
-    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
+    doc_attn_mask = "Whether to mask the diagonal in the attention matrix"
 
     return [
         Argument(
@@ -577,7 +577,7 @@ def descrpt_se_atten_args() -> list[Argument]:
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
         "If `temperature` is None, the scaling of attention weights is (N_hidden_dim * scaling_factor)**0.5. "
-        "Else, the scaling of attention weights is setting to `temperature`."
+        "Else, the scaling of attention weights is set to `temperature`."
     )
     doc_normalize = (
         "Whether to normalize the hidden vectors during attention calculation."
@@ -592,7 +592,7 @@ def descrpt_se_atten_args() -> list[Argument]:
         "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
-        "The output is `out_ij = embeding_t(input_t) * embeding_s(r_ij) + embeding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
     )
     doc_stripped_type_embedding = (
         "(Deprecated, kept only for compatibility.) Whether to strip the type embedding into a separate embedding network. "
@@ -686,17 +686,17 @@ def descrpt_se_atten_args() -> list[Argument]:
 
 @descrpt_args_plugin.register("se_e3_tebd", doc=doc_only_pt_supported)
 def descrpt_se_e3_tebd_args() -> list[Argument]:
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
@@ -714,7 +714,7 @@ def descrpt_se_e3_tebd_args() -> list[Argument]:
         "The output is `out_jk = embedding(input_jk)` for the three-body representation of atom i with neighbors j and k."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the angular embedding network output. "
         "The input is `input_t = concat([tebd_j, tebd_k])`."
-        "The output is `out_jk = embeding_t(input_t) * embeding_s(angle_jk) + embeding_s(angle_jk)` for the three-body representation of atom i with neighbors j and k."
+        "The output is `out_jk = embedding_t(input_t) * embedding_s(angle_jk) + embedding_s(angle_jk)` for the three-body representation of atom i with neighbors j and k."
     )
 
     return [
@@ -812,7 +812,7 @@ def descrpt_se_atten_v2_args() -> list[Argument]:
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
         "If `temperature` is None, the scaling of attention weights is (N_hidden_dim * scaling_factor)**0.5. "
-        "Else, the scaling of attention weights is setting to `temperature`."
+        "Else, the scaling of attention weights is set to `temperature`."
     )
     doc_normalize = (
         "Whether to normalize the hidden vectors during attention calculation."
@@ -898,7 +898,7 @@ def descrpt_dpa2_args() -> list[Argument]:
     )
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
-    doc_trainable = "If the parameters in the embedding net is trainable."
+    doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
     doc_add_tebd_to_repinit_out = "Add type embedding to the output representation from repinit before inputting it into repformer."
     doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
@@ -966,9 +966,9 @@ def dpa2_repinit_args() -> list[Argument]:
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_nsel = 'Maximally possible number of selected neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_neuron = (
-        "Number of neurons in each hidden layers of the embedding net."
+        "Number of neurons in each hidden layer of the embedding net."
         "When two layers are of the same size or one layer is twice as large as the previous layer, "
         "a skip connection is built."
     )
@@ -981,7 +981,7 @@ def dpa2_repinit_args() -> list[Argument]:
         "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
         "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
-        "The output is `out_ij = embeding_t(input_t) * embeding_s(r_ij) + embeding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
     )
     doc_set_davg_zero = (
         "Set the normalization average to zero. "
@@ -994,13 +994,13 @@ def dpa2_repinit_args() -> list[Argument]:
         "Whether to concatenate three-body representation in the output descriptor."
     )
     doc_three_body_neuron = (
-        "Number of neurons in each hidden layers of the three-body embedding net."
+        "Number of neurons in each hidden layer of the three-body embedding net."
         "When two layers are of the same size or one layer is twice as large as the previous layer, "
         "a skip connection is built."
     )
     doc_three_body_sel = 'Maximally possible number of selected neighbors in the three-body representation. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_three_body_rcut = "The cut-off radius in the three-body representation."
     doc_three_body_rcut_smth = "Where to start smoothing in the three-body representation. For example the 1/r term is smoothed from `three_body_rcut` to `three_body_rcut_smth`."
 
@@ -1110,7 +1110,7 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_nsel = 'Maximally possible number of selected neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_nlayers = "The number of repformer layers."
     doc_g1_dim = "The dimension of invariant single-atom representation."
     doc_g2_dim = "The dimension of invariant pair-atom representation."
@@ -1376,7 +1376,7 @@ def descrpt_dpa3_args() -> list[Argument]:
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
-    doc_trainable = "If the parameters in the embedding net is trainable."
+    doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
     doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
     doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
@@ -1461,12 +1461,12 @@ def dpa3_repflow_args() -> list[Argument]:
     doc_e_rcut_smth = "Where to start smoothing for edge. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_e_sel = 'Maximally possible number of selected edge neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_a_rcut = "The angle cut-off radius."
     doc_a_rcut_smth = "Where to start smoothing for angle. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_a_sel = 'Maximally possible number of selected angle neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     doc_a_compress_rate = (
         "The compression rate for angular messages. The default value is 0, indicating no compression. "
         " If a non-zero integer c is provided, the node and edge dimensions will be compressed "
@@ -1694,16 +1694,16 @@ def descrpt_se_a_ebd_v2_args() -> list[Argument]:
 def descrpt_se_a_mask_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
 
-    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
     doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
-    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_trainable = "Whether the parameters in the embedding net are trainable"
     doc_seed = "Random seed for parameter initialization"
 
     return [
@@ -1763,13 +1763,13 @@ def fitting_ener() -> list[Argument]:
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
     doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_neuron = "The number of neurons in each hidden layer of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed of hidden layers followed by an output layer, the length of this list should be equal to len(`neuron`)+1."
     doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_atom_ener = "Specify the atomic energy in vacuum for each type"
@@ -1855,13 +1855,13 @@ def fitting_dos() -> list[Argument]:
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
     doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_neuron = "The number of neurons in each hidden layer of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed of hidden layers followed by an output layer, the length of this list should be equal to len(`neuron`)+1."
     doc_rcond = "The condition number used to determine the initial energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_numb_dos = (
@@ -1918,7 +1918,7 @@ def fitting_property() -> list[Argument]:
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
     doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built"
+    doc_neuron = "The number of neurons in each hidden layer of the fitting net. When two hidden layers are of the same size, a skip connection is built"
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
@@ -1931,7 +1931,7 @@ def fitting_property() -> list[Argument]:
     doc_property_name = "The names of fitting property, which should be consistent with the property name in the dataset."
     doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed of hidden layers followed by an output layer, the length of this list should be equal to len(`neuron`)+1."
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
@@ -1998,7 +1998,7 @@ def fitting_polar() -> list[Argument]:
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
     doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_neuron = "The number of neurons in each hidden layer of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
@@ -2084,7 +2084,7 @@ def fitting_dipole() -> list[Argument]:
     doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
     doc_default_fparam = "The default frame parameter. If set, when `fparam.npy` files are not included in the data system, this value will be used as the default value for the frame parameter in the fitting net."
     doc_dim_case_embd = "The dimension of the case embedding embedding. When training or fine-tuning a multitask model with case embedding embeddings, this number should be set to the number of model branches."
-    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
+    doc_neuron = "The number of neurons in each hidden layer of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
@@ -2452,10 +2452,10 @@ def frozen_model_args() -> Argument:
 def pairtab_model_args() -> Argument:
     doc_tab_file = "Path to the tabulation file."
     doc_rcut = "The cut-off radius."
-    doc_sel = 'This parameter set the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
+    doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
     ca = Argument(
         "pairtab",
         dict,

From 1e4e7140871ce3fc6bafe8f563a289091989839d Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:41:33 +0000
Subject: [PATCH 2/6] docs(argcheck): clarify dpa terminology in doc strings

Problem
- DPA-related doc strings still contain jargon-heavy names such as repinit, repformer, repflow, g1, g2, h2, and tebd.
- These terms are familiar to maintainers but harder for new readers to interpret from the generated documentation alone.

Change
- clarify DPA2 and DPA3 doc strings with short human-readable explanations
- keep internal names such as g1, g2, h2, repinit, repformer, repflow, and tebd for code-to-doc traceability
- improve wording for type-embedding and symmetrization-related options

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 124 +++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 0c22759b2f..589081fbc2 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -106,8 +106,8 @@ def type_embedding_args() -> list[Argument]:
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_trainable = "Whether the parameters in the embedding net are trainable"
-    doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
-    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
+    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
 
     return [
         Argument("neuron", list[int], optional=True, default=[8], doc=doc_neuron),
@@ -278,7 +278,7 @@ def descrpt_se_a_args() -> list[Argument]:
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
@@ -496,7 +496,7 @@ def descrpt_se_atten_common_args() -> list[Argument]:
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If 'False', type embeddings of both neighbor and central atoms are considered. If 'True', only type embeddings of neighbor atoms are considered. Default is 'False'."
@@ -570,9 +570,9 @@ def descrpt_se_atten_args() -> list[Argument]:
         "Whether to use trainable shift and scale weights in layer normalization."
     )
     doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
-    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_tebd_dim = "Dimension of the atom-type embedding (`tebd`)."
     doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
-    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
+    doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -583,14 +583,14 @@ def descrpt_se_atten_args() -> list[Argument]:
         "Whether to normalize the hidden vectors during attention calculation."
     )
     doc_concat_output_tebd = (
-        "Whether to concat type embedding at the output of the descriptor."
+        "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_tebd_input_mode = (
-        "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
-        "- 'concat': Concatenate the type embedding with the smoothed radial information as the union input for the embedding network. "
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
+        "- 'concat': Concatenate the type embedding with the smoothed radial information as the combined input to the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
         "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
-        "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
+        "- 'strip': Use a separate embedding network for the type embedding and combine its output with the radial embedding-network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
     )
@@ -702,13 +702,13 @@ def descrpt_se_e3_tebd_args() -> list[Argument]:
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_smooth = "Whether to use smooth process in calculation when using stripped type embedding. Whether to dot smooth factor (both neighbors j and k) on the network output (out_jk) of type embedding to keep the network smooth, instead of setting `set_davg_zero` to be True."
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
-    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_tebd_dim = "Dimension of the atom-type embedding (`tebd`)."
     doc_use_econf_tebd = r"Whether to use electronic configuration type embedding."
     doc_concat_output_tebd = (
-        "Whether to concat type embedding at the output of the descriptor."
+        "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_tebd_input_mode = (
-        "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
         "- 'concat': Concatenate the type embedding with the smoothed angular information as the union input for the embedding network. "
         "The input is `input_jk = concat([angle_jk, tebd_j, tebd_k])`. "
         "The output is `out_jk = embedding(input_jk)` for the three-body representation of atom i with neighbors j and k."
@@ -805,9 +805,9 @@ def descrpt_se_atten_v2_args() -> list[Argument]:
         "Whether to use trainable shift and scale weights in layer normalization."
     )
     doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
-    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_tebd_dim = "Dimension of the atom-type embedding (`tebd`)."
     doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
-    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
+    doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -818,7 +818,7 @@ def descrpt_se_atten_v2_args() -> list[Argument]:
         "Whether to normalize the hidden vectors during attention calculation."
     )
     doc_concat_output_tebd = (
-        "Whether to concat type embedding at the output of the descriptor."
+        "Whether to concatenate the type embedding to the descriptor output."
     )
 
     return [
@@ -885,12 +885,12 @@ def descrpt_se_atten_v2_args() -> list[Argument]:
 @descrpt_args_plugin.register("dpa2", doc=doc_only_pt_supported)
 def descrpt_dpa2_args() -> list[Argument]:
     # repinit args
-    doc_repinit = "The arguments used to initialize the repinit block."
+    doc_repinit = "Arguments for the `repinit` block, which builds the initial atom-wise representations before `repformer`."
     # repformer args
-    doc_repformer = "The arguments used to initialize the repformer block."
+    doc_repformer = "Arguments for the `repformer` block, which refines the representations produced by `repinit`."
     # descriptor args
     doc_concat_output_tebd = (
-        "Whether to concat type embedding at the output of the descriptor."
+        "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_smooth = (
@@ -900,9 +900,9 @@ def descrpt_dpa2_args() -> list[Argument]:
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
-    doc_add_tebd_to_repinit_out = "Add type embedding to the output representation from repinit before inputting it into repformer."
-    doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
-    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
+    doc_add_tebd_to_repinit_out = "Whether to add the type embedding to the output of `repinit` before passing it to `repformer`."
+    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     return [
         # repinit args
         Argument("repinit", dict, dpa2_repinit_args(), doc=doc_repinit),
@@ -972,14 +972,14 @@ def dpa2_repinit_args() -> list[Argument]:
         "When two layers are of the same size or one layer is twice as large as the previous layer, "
         "a skip connection is built."
     )
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
-    doc_tebd_dim = "The dimension of atom type embedding."
+    doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
+    doc_tebd_dim = "Dimension of the atom-type embedding (`tebd`)."
     doc_tebd_input_mode = (
-        "The input mode of the type embedding. Supported modes are ['concat', 'strip']."
-        "- 'concat': Concatenate the type embedding with the smoothed radial information as the union input for the embedding network. "
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
+        "- 'concat': Concatenate the type embedding with the smoothed radial information as the combined input to the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
         "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
-        "- 'strip': Use a separated embedding network for the type embedding and combine the output with the radial embedding network output. "
+        "- 'strip': Use a separate embedding network for the type embedding and combine its output with the radial embedding-network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
     )
@@ -991,7 +991,7 @@ def dpa2_repinit_args() -> list[Argument]:
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection.'
     doc_use_three_body = (
-        "Whether to concatenate three-body representation in the output descriptor."
+        "Whether to concatenate an additional three-body representation to the `repinit` output descriptor."
     )
     doc_three_body_neuron = (
         "Number of neurons in each hidden layer of the three-body embedding net."
@@ -1111,35 +1111,35 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_nsel = 'Maximally possible number of selected neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_nlayers = "The number of repformer layers."
-    doc_g1_dim = "The dimension of invariant single-atom representation."
-    doc_g2_dim = "The dimension of invariant pair-atom representation."
-    doc_axis_neuron = "The number of dimension of submatrix in the symmetrization ops."
-    doc_direct_dist = "Whether or not use direct distance as input for the embedding net to get g2 instead of smoothed 1/r."
-    doc_update_g1_has_conv = "Update the g1 rep with convolution term."
-    doc_update_g1_has_drrd = "Update the g1 rep with the drrd term."
-    doc_update_g1_has_grrg = "Update the g1 rep with the grrg term."
-    doc_update_g1_has_attn = "Update the g1 rep with the localized self-attention."
-    doc_update_g2_has_g1g1 = "Update the g2 rep with the g1xg1 term."
-    doc_update_g2_has_attn = "Update the g2 rep with the gated self-attention."
-    doc_use_sqrt_nnei = "Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly."
-    doc_g1_out_conv = "Whether to put the convolutional update of g1 separately outside the concatenated MLP update."
-    doc_g1_out_mlp = "Whether to put the self MLP update of g1 separately outside the concatenated MLP update."
-    doc_update_h2 = "Update the h2 rep."
+    doc_nlayers = "Number of `repformer` layers."
+    doc_g1_dim = "Dimension of the `g1` representation, i.e., the rotationally invariant single-atom representation."
+    doc_g2_dim = "Dimension of the `g2` representation, i.e., the rotationally invariant pair-atom representation."
+    doc_axis_neuron = "Size of the submatrix used in the symmetrization operations."
+    doc_direct_dist = "Whether to use the direct distance as input to the embedding net when building `g2`, instead of the smoothed `1/r`."
+    doc_update_g1_has_conv = "Whether to include the convolution term when updating `g1`."
+    doc_update_g1_has_drrd = "Whether to include the `drrd` term when updating `g1`."
+    doc_update_g1_has_grrg = "Whether to include the `grrg` term when updating `g1`."
+    doc_update_g1_has_attn = "Whether to include localized self-attention when updating `g1`."
+    doc_update_g2_has_g1g1 = "Whether to include the `g1 × g1` term when updating `g2`."
+    doc_update_g2_has_attn = "Whether to include gated self-attention when updating `g2`."
+    doc_use_sqrt_nnei = "Whether to normalize `symmetrization_op` by the square root of the number of neighbors instead of by the number of neighbors itself."
+    doc_g1_out_conv = "Whether to keep the convolutional update of `g1` as a separate branch outside the concatenated MLP update."
+    doc_g1_out_mlp = "Whether to keep the self-MLP update of `g1` as a separate branch outside the concatenated MLP update."
+    doc_update_h2 = "Whether to update the `h2` representation, i.e., the rotationally equivariant pair representation."
     doc_attn1_hidden = (
-        "The hidden dimension of localized self-attention to update the g1 rep."
+        "Hidden dimension of the localized self-attention used to update `g1`."
     )
     doc_attn1_nhead = (
-        "The number of heads in localized self-attention to update the g1 rep."
+        "Number of heads in the localized self-attention used to update `g1`."
     )
     doc_attn2_hidden = (
-        "The hidden dimension of gated self-attention to update the g2 rep."
+        "Hidden dimension of the gated self-attention used to update `g2`."
     )
     doc_attn2_nhead = (
-        "The number of heads in gated self-attention to update the g2 rep."
+        "Number of heads in the gated self-attention used to update `g2`."
     )
     doc_attn2_has_gate = (
-        "Whether to use gate in the gated self-attention to update the g2 rep."
+        "Whether to use gating in the gated self-attention used to update `g2`."
     )
     doc_activation_function = f"The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}."
     doc_update_style = (
@@ -1362,10 +1362,10 @@ def dpa2_repformer_args() -> list[Argument]:
 @descrpt_args_plugin.register("dpa3", doc=doc_only_pt_supported)
 def descrpt_dpa3_args() -> list[Argument]:
     # repflow args
-    doc_repflow = "The arguments used to initialize the repflow block."
+    doc_repflow = "Arguments for the `repflow` block, which updates node, edge, and angle representations in DPA3."
     # descriptor args
     doc_concat_output_tebd = (
-        "Whether to concat type embedding at the output of the descriptor."
+        "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_add_chg_spin_ebd = (
         "Whether to add charge and spin embedding to the descriptor. "
@@ -1378,8 +1378,8 @@ def descrpt_dpa3_args() -> list[Argument]:
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
-    doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
-    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
+    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     doc_use_loc_mapping = (
         "Whether to use local atom index mapping in training or non-parallel inference. "
         "When True, local indexing and mapping are applied to neighbor lists and embeddings during descriptor computation."
@@ -1453,10 +1453,10 @@ def descrpt_dpa3_args() -> list[Argument]:
 # repflow for dpa3
 def dpa3_repflow_args() -> list[Argument]:
     # repflow args
-    doc_n_dim = "The dimension of node representation."
-    doc_e_dim = "The dimension of edge representation."
-    doc_a_dim = "The dimension of angle representation."
-    doc_nlayers = "The number of repflow layers."
+    doc_n_dim = "Dimension of the node (atom-wise) representation."
+    doc_e_dim = "Dimension of the edge (pair-wise) representation."
+    doc_a_dim = "Dimension of the angle (three-body/angular) representation."
+    doc_nlayers = "Number of `repflow` layers."
     doc_e_rcut = "The edge cut-off radius."
     doc_e_rcut_smth = "Where to start smoothing for edge. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_e_sel = 'Maximally possible number of selected edge neighbors. It can be:\n\n\
@@ -1482,10 +1482,10 @@ def dpa3_repflow_args() -> list[Argument]:
         "The default value is False."
     )
     doc_n_multi_edge_message = (
-        "The head number of multiple edge messages to update node feature. "
-        "Default is 1, indicating one head edge message."
+        "Number of heads in the multi-edge-message update of node features. "
+        "Default is 1, i.e., a single edge-message head."
     )
-    doc_axis_neuron = "The number of dimension of submatrix in the symmetrization ops."
+    doc_axis_neuron = "Size of the submatrix used in the symmetrization operations."
     doc_fix_stat_std = (
         "If non-zero (default is 0.3), use this constant as the normalization standard deviation "
         "instead of computing it from data statistics."
@@ -1496,7 +1496,7 @@ def dpa3_repflow_args() -> list[Argument]:
         "Transition to fix_stat_std parameter immediately."
     )
     doc_update_angle = (
-        "Where to update the angle rep. If not, only node and edge rep will be used."
+        "Whether to update the angle representation. If False, only the node and edge representations are updated."
     )
     doc_update_style = (
         "Style to update a representation. "
@@ -1517,7 +1517,7 @@ def dpa3_repflow_args() -> list[Argument]:
     )
     doc_optim_update = (
         "Whether to enable the optimized update method. "
-        "Uses a more efficient process when enabled. Defaults to True"
+        "Uses a more efficient implementation when enabled. Default is True."
     )
     doc_smooth_edge_update = (
         "Whether to make edge update smooth. "
@@ -1697,7 +1697,7 @@ def descrpt_se_a_mask_args() -> list[Argument]:
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
 
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
-    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
+    doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."

From f21bdc6094562923307bc9e40f47ded046fe1750 Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:45:24 +0000
Subject: [PATCH 3/6] docs(argcheck): avoid ambiguous unicode in g1g1 term

Problem
- ruff flags the multiplication sign in the clarified g1g1 doc string as an ambiguous unicode character.

Change
- replace the unicode multiplication sign with plain ASCII  in the doc string

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 589081fbc2..6b02b5d854 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1120,7 +1120,7 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_update_g1_has_drrd = "Whether to include the `drrd` term when updating `g1`."
     doc_update_g1_has_grrg = "Whether to include the `grrg` term when updating `g1`."
     doc_update_g1_has_attn = "Whether to include localized self-attention when updating `g1`."
-    doc_update_g2_has_g1g1 = "Whether to include the `g1 × g1` term when updating `g2`."
+    doc_update_g2_has_g1g1 = "Whether to include the `g1 x g1` term when updating `g2`."
     doc_update_g2_has_attn = "Whether to include gated self-attention when updating `g2`."
     doc_use_sqrt_nnei = "Whether to normalize `symmetrization_op` by the square root of the number of neighbors instead of by the number of neighbors itself."
     doc_g1_out_conv = "Whether to keep the convolutional update of `g1` as a separate branch outside the concatenated MLP update."

From 137138b1a8ee9b1959446efdae5b9273267b3cb8 Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:52:06 +0000
Subject: [PATCH 4/6] style(argcheck): apply prek auto-formatting

Problem
- trim trailing whitespace.................................................Passed
fix end of files.........................................................Passed
check yaml...............................................................Passed
check json...............................................................Passed
check for added large files..............................................Passed
check for merge conflicts................................................Passed
check for broken symlinks................................................Passed
check toml...............................................................Passed
isort....................................................................Passed
ruff (legacy alias)......................................................Passed
ruff format..............................................................Passed
Velin....................................................................Passed
mdformat.................................................................Passed
clang-format.............................................................Passed
prettier.................................................................Passed
shfmt....................................................................Passed
cmake-format.............................................................Passed
bibtex-tidy..............................................................Passed
Insert license in comments...............................................Passed
Insert license in comments...............................................Passed
Insert license in comments...............................................Passed
Insert license in comments...............................................Passed
Disallow improper capitalization.........................................Passed
pylint...................................................................Passed reformatted  via .

Change
- apply the formatter output without changing behavior

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 6b02b5d854..b817f577c7 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -106,7 +106,9 @@ def type_embedding_args() -> list[Argument]:
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_trainable = "Whether the parameters in the embedding net are trainable"
-    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_econf_tebd = (
+        "Whether to use an electronic-configuration-based type embedding."
+    )
     doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
 
     return [
@@ -901,7 +903,9 @@ def descrpt_dpa2_args() -> list[Argument]:
     doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
     doc_add_tebd_to_repinit_out = "Whether to add the type embedding to the output of `repinit` before passing it to `repformer`."
-    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_econf_tebd = (
+        "Whether to use an electronic-configuration-based type embedding."
+    )
     doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     return [
         # repinit args
@@ -990,9 +994,7 @@ def dpa2_repinit_args() -> list[Argument]:
     doc_activation_function = f"The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}."
     doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection.'
-    doc_use_three_body = (
-        "Whether to concatenate an additional three-body representation to the `repinit` output descriptor."
-    )
+    doc_use_three_body = "Whether to concatenate an additional three-body representation to the `repinit` output descriptor."
     doc_three_body_neuron = (
         "Number of neurons in each hidden layer of the three-body embedding net."
         "When two layers are of the same size or one layer is twice as large as the previous layer, "
@@ -1116,12 +1118,18 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_g2_dim = "Dimension of the `g2` representation, i.e., the rotationally invariant pair-atom representation."
     doc_axis_neuron = "Size of the submatrix used in the symmetrization operations."
     doc_direct_dist = "Whether to use the direct distance as input to the embedding net when building `g2`, instead of the smoothed `1/r`."
-    doc_update_g1_has_conv = "Whether to include the convolution term when updating `g1`."
+    doc_update_g1_has_conv = (
+        "Whether to include the convolution term when updating `g1`."
+    )
     doc_update_g1_has_drrd = "Whether to include the `drrd` term when updating `g1`."
     doc_update_g1_has_grrg = "Whether to include the `grrg` term when updating `g1`."
-    doc_update_g1_has_attn = "Whether to include localized self-attention when updating `g1`."
+    doc_update_g1_has_attn = (
+        "Whether to include localized self-attention when updating `g1`."
+    )
     doc_update_g2_has_g1g1 = "Whether to include the `g1 x g1` term when updating `g2`."
-    doc_update_g2_has_attn = "Whether to include gated self-attention when updating `g2`."
+    doc_update_g2_has_attn = (
+        "Whether to include gated self-attention when updating `g2`."
+    )
     doc_use_sqrt_nnei = "Whether to normalize `symmetrization_op` by the square root of the number of neighbors instead of by the number of neighbors itself."
     doc_g1_out_conv = "Whether to keep the convolutional update of `g1` as a separate branch outside the concatenated MLP update."
     doc_g1_out_mlp = "Whether to keep the self-MLP update of `g1` as a separate branch outside the concatenated MLP update."
@@ -1135,9 +1143,7 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_attn2_hidden = (
         "Hidden dimension of the gated self-attention used to update `g2`."
     )
-    doc_attn2_nhead = (
-        "Number of heads in the gated self-attention used to update `g2`."
-    )
+    doc_attn2_nhead = "Number of heads in the gated self-attention used to update `g2`."
     doc_attn2_has_gate = (
         "Whether to use gating in the gated self-attention used to update `g2`."
     )
@@ -1378,7 +1384,9 @@ def descrpt_dpa3_args() -> list[Argument]:
     doc_env_protection = "Protection parameter to prevent division by zero errors during environment matrix calculations. For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection."
     doc_trainable = "Whether the parameters in the embedding net are trainable."
     doc_seed = "Random seed for parameter initialization."
-    doc_use_econf_tebd = "Whether to use an electronic-configuration-based type embedding."
+    doc_use_econf_tebd = (
+        "Whether to use an electronic-configuration-based type embedding."
+    )
     doc_use_tebd_bias = "Whether to use a bias term in the type-embedding layer."
     doc_use_loc_mapping = (
         "Whether to use local atom index mapping in training or non-parallel inference. "
@@ -1495,9 +1503,7 @@ def dpa3_repflow_args() -> list[Argument]:
         "If set to True, it forces fix_stat_std=0.3 for backward compatibility. "
         "Transition to fix_stat_std parameter immediately."
     )
-    doc_update_angle = (
-        "Whether to update the angle representation. If False, only the node and edge representations are updated."
-    )
+    doc_update_angle = "Whether to update the angle representation. If False, only the node and edge representations are updated."
     doc_update_style = (
         "Style to update a representation. "
         "Supported options are: "

From 7dbf27312962cef79f390a1db8c96ffbb7099576 Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 14:59:40 +0000
Subject: [PATCH 5/6] docs(argcheck): address PR review comments

Problem
- PR review comments pointed out unclear shape descriptions, awkward wording in auto selection, and missing line breaks in  bullet lists.

Change
- use explicit  shape descriptions for dipole and polarizability labels
- replace "wrapped up to 4 divisible" with "rounded up to a multiple of 4" throughout
- add explicit line breaks to  bullet lists and improve related wording

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 46 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index b817f577c7..acc62f1f41 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -56,8 +56,8 @@
 # fitting
 doc_ener = "Fit an energy model (potential energy surface)."
 doc_dos = "Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has a number of frame lines and a number of energy-grid columns (times the number of atoms in `atom_dos.npy`). See `loss` parameter."
-doc_dipole = "Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times the number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter."
-doc_polar = "Fit an atomic polarizability model. Global polarizability labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file either has number of frames lines and 9 times the number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter."
+doc_dipole = "Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file should have shape `(n_frames, 3*n_selected)` for atomic dipole labels, or shape `(n_frames, 3)` for global dipole labels. See `loss` parameter."
+doc_polar = "Fit an atomic polarizability model. Global polarizability labels or atomic polarizability labels for all selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file should have shape `(n_frames, 9*n_selected)` for atomic polarizability labels, or shape `(n_frames, 9)` for global polarizability labels. See `loss` parameter."
 # modifier
 doc_dipole_charge = "Use WFCC to model the electronic structure of the system. Correct the long-range interaction."
 
@@ -276,7 +276,7 @@ def descrpt_local_frame_args() -> list[Argument]:
 def descrpt_se_a_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -346,7 +346,7 @@ def descrpt_se_a_args() -> list[Argument]:
 def descrpt_se_t_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -417,7 +417,7 @@ def descrpt_se_a_tpe_args() -> list[Argument]:
 def descrpt_se_r_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -494,7 +494,7 @@ def descrpt_se_atten_common_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -588,10 +588,10 @@ def descrpt_se_atten_args() -> list[Argument]:
         "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_tebd_input_mode = (
-        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip'].\n\n"
         "- 'concat': Concatenate the type embedding with the smoothed radial information as the combined input to the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
-        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j.\n"
         "- 'strip': Use a separate embedding network for the type embedding and combine its output with the radial embedding-network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
@@ -691,7 +691,7 @@ def descrpt_se_e3_tebd_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_rcut = "The cut-off radius."
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
@@ -710,12 +710,12 @@ def descrpt_se_e3_tebd_args() -> list[Argument]:
         "Whether to concatenate the type embedding to the descriptor output."
     )
     doc_tebd_input_mode = (
-        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
-        "- 'concat': Concatenate the type embedding with the smoothed angular information as the union input for the embedding network. "
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip'].\n\n"
+        "- 'concat': Concatenate the type embedding with the smoothed angular information as the combined input to the embedding network. "
         "The input is `input_jk = concat([angle_jk, tebd_j, tebd_k])`. "
-        "The output is `out_jk = embedding(input_jk)` for the three-body representation of atom i with neighbors j and k."
-        "- 'strip': Use a separated embedding network for the type embedding and combine the output with the angular embedding network output. "
-        "The input is `input_t = concat([tebd_j, tebd_k])`."
+        "The output is `out_jk = embedding(input_jk)` for the three-body representation of atom i with neighbors j and k.\n"
+        "- 'strip': Use a separate embedding network for the type embedding and combine its output with the angular embedding-network output. "
+        "The input is `input_t = concat([tebd_j, tebd_k])`. "
         "The output is `out_jk = embedding_t(input_t) * embedding_s(angle_jk) + embedding_s(angle_jk)` for the three-body representation of atom i with neighbors j and k."
     )
 
@@ -970,7 +970,7 @@ def dpa2_repinit_args() -> list[Argument]:
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_nsel = 'Maximally possible number of selected neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_neuron = (
         "Number of neurons in each hidden layer of the embedding net."
         "When two layers are of the same size or one layer is twice as large as the previous layer, "
@@ -979,10 +979,10 @@ def dpa2_repinit_args() -> list[Argument]:
     doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
     doc_tebd_dim = "Dimension of the atom-type embedding (`tebd`)."
     doc_tebd_input_mode = (
-        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip']."
+        "How the atom-type embedding (`tebd`) is fed into the descriptor. Supported modes are ['concat', 'strip'].\n\n"
         "- 'concat': Concatenate the type embedding with the smoothed radial information as the combined input to the embedding network. "
         "When `type_one_side` is False, the input is `input_ij = concat([r_ij, tebd_j, tebd_i])`. When `type_one_side` is True, the input is `input_ij = concat([r_ij, tebd_j])`. "
-        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j."
+        "The output is `out_ij = embedding(input_ij)` for the pair-wise representation of atom i with neighbor j.\n"
         "- 'strip': Use a separate embedding network for the type embedding and combine its output with the radial embedding-network output. "
         f"When `type_one_side` is False, the input is `input_t = concat([tebd_j, tebd_i])`. {doc_only_pt_supported} When `type_one_side` is True, the input is `input_t = tebd_j`. "
         "The output is `out_ij = embedding_t(input_t) * embedding_s(r_ij) + embedding_s(r_ij)` for the pair-wise representation of atom i with neighbor j."
@@ -1002,7 +1002,7 @@ def dpa2_repinit_args() -> list[Argument]:
     )
     doc_three_body_sel = 'Maximally possible number of selected neighbors in the three-body representation. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_three_body_rcut = "The cut-off radius in the three-body representation."
     doc_three_body_rcut_smth = "Where to start smoothing in the three-body representation. For example the 1/r term is smoothed from `three_body_rcut` to `three_body_rcut_smth`."
 
@@ -1112,7 +1112,7 @@ def dpa2_repformer_args() -> list[Argument]:
     doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_nsel = 'Maximally possible number of selected neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_nlayers = "Number of `repformer` layers."
     doc_g1_dim = "Dimension of the `g1` representation, i.e., the rotationally invariant single-atom representation."
     doc_g2_dim = "Dimension of the `g2` representation, i.e., the rotationally invariant pair-atom representation."
@@ -1469,12 +1469,12 @@ def dpa3_repflow_args() -> list[Argument]:
     doc_e_rcut_smth = "Where to start smoothing for edge. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_e_sel = 'Maximally possible number of selected edge neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_a_rcut = "The angle cut-off radius."
     doc_a_rcut_smth = "Where to start smoothing for angle. For example the 1/r term is smoothed from `rcut` to `rcut_smth`."
     doc_a_sel = 'Maximally possible number of selected angle neighbors. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     doc_a_compress_rate = (
         "The compression rate for angular messages. The default value is 0, indicating no compression. "
         " If a non-zero integer c is provided, the node and edge dimensions will be compressed "
@@ -1700,7 +1700,7 @@ def descrpt_se_a_ebd_v2_args() -> list[Argument]:
 def descrpt_se_a_mask_args() -> list[Argument]:
     doc_sel = 'This parameter sets the number of selected neighbors for each type of atom. It can be:\n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
 
     doc_neuron = "Number of neurons in each hidden layer of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_axis_neuron = "Size of the submatrix of `G` (the embedding matrix) used to build the descriptor."
@@ -2461,7 +2461,7 @@ def pairtab_model_args() -> Argument:
     doc_sel = 'This parameter sets the number of selected neighbors. Note that this parameter is a little different from that in other descriptors. Instead of separating each type of atoms, only the summation matters. And this number is highly related with the efficiency, thus one should not make it too large. Usually 200 or less is enough, far away from the GPU limitation 4096. It can be:\n\n\
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `list[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
-    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wrapped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
+    - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors within the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally, the number is rounded up to a multiple of 4. The option "auto" is equivalent to "auto:1.1".'
     ca = Argument(
         "pairtab",
         dict,

From 69b2b4a0a52b91ea2686c36f17e013d9d631e234 Mon Sep 17 00:00:00 2001
From: "njzjz-bot (driven by OpenClaw (model:
 custom-chat-jinzhezeng-group/gpt-5.4))[bot]"
 <48687836+njzjz-bot@users.noreply.github.com>
Date: Fri, 3 Apr 2026 15:04:35 +0000
Subject: [PATCH 6/6] docs(argcheck): clarify dos shape wording

Problem
- One remaining PR review comment pointed out that  used the awkward phrase "frame lines".

Change
- rephrase the DOS shape description to use "frames (rows)" and clearer column wording

Authored by OpenClaw (model: custom-chat-jinzhezeng-group/gpt-5.4)
---
 deepmd/utils/argcheck.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index acc62f1f41..4ac49cf4d8 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -55,7 +55,7 @@
 doc_hybrid = "Concatenates a list of descriptors into a new descriptor."
 # fitting
 doc_ener = "Fit an energy model (potential energy surface)."
-doc_dos = "Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has a number of frame lines and a number of energy-grid columns (times the number of atoms in `atom_dos.npy`). See `loss` parameter."
+doc_dos = "Fit a density of states model. The total density of states / site-projected density of states labels should be provided by `dos.npy` or `atom_dos.npy` in each data system. The file has a number of frames (rows) and a number of energy-grid columns (multiplied by the number of atoms in `atom_dos.npy`). See `loss` parameter."
 doc_dipole = "Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file should have shape `(n_frames, 3*n_selected)` for atomic dipole labels, or shape `(n_frames, 3)` for global dipole labels. See `loss` parameter."
 doc_polar = "Fit an atomic polarizability model. Global polarizability labels or atomic polarizability labels for all selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file should have shape `(n_frames, 9*n_selected)` for atomic polarizability labels, or shape `(n_frames, 9)` for global polarizability labels. See `loss` parameter."
 # modifier