add examples

OutisLi · OutisLi · commit 8a5b2e913290 · 2026-02-21T13:49:19.000+08:00
diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py
@@ -400,20 +400,20 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
 
         per_task_total = []
         if not self.multi_task:
-            sampler_weights = to_numpy_array(
-                self.training_dataloader.batch_sampler.sampler.weights
-            )
-            total_numb_batch = compute_total_numb_batch(
-                training_data.index,
-                sampler_weights,
-            )
             if self.num_steps is None:
                 if self.num_epoch is None:
                     raise ValueError(
                         "Either training.numb_steps or training.num_epoch must be set."
                     )
                 if self.num_epoch <= 0:
                     raise ValueError("training.num_epoch must be positive.")
+                sampler_weights = to_numpy_array(
+                    self.training_dataloader.batch_sampler.sampler.weights
+                )
+                total_numb_batch = compute_total_numb_batch(
+                    training_data.index,
+                    sampler_weights,
+                )
                 if total_numb_batch <= 0:
                     raise ValueError(
                         "Total number of training batches must be positive."
@@ -426,17 +426,24 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
                     total_numb_batch,
                 )
         else:
-            for model_key in self.model_keys:
-                sampler_weights = to_numpy_array(
-                    self.training_dataloader[model_key].batch_sampler.sampler.weights
-                )
-                per_task_total.append(
-                    compute_total_numb_batch(
-                        training_data[model_key].index,
-                        sampler_weights,
-                    )
-                )
             if self.num_epoch_dict:
+                if self.num_steps is not None:
+                    raise ValueError(
+                        "training.numb_steps and training.num_epoch_dict "
+                        "are mutually exclusive."
+                    )
+                for model_key in self.model_keys:
+                    sampler_weights = to_numpy_array(
+                        self.training_dataloader[
+                            model_key
+                        ].batch_sampler.sampler.weights
+                    )
+                    per_task_total.append(
+                        compute_total_numb_batch(
+                            training_data[model_key].index,
+                            sampler_weights,
+                        )
+                    )
                 (
                     self.model_prob,
                     self.num_steps,
@@ -652,15 +659,6 @@ def single_model_finetune(
             frz_model = paddle.jit.load(init_frz_model)
             self.model.set_state_dict(frz_model.state_dict())
 
-        # Get model prob for multi-task
-        if self.multi_task and self.model_prob is None:
-            self.model_prob = resolve_model_prob(
-                self.model_keys,
-                training_params.get("model_prob"),
-                training_data,
-                rank=self.rank,
-            )
-
         # Multi-task share params
         if shared_links is not None:
             self.wrapper.share_params(
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
@@ -473,18 +473,20 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
         # Resolve training steps
         per_task_total = []
         if not self.multi_task:
-            sampler_weights = to_numpy_array(self.training_dataloader.sampler.weights)
-            total_numb_batch = compute_total_numb_batch(
-                training_data.index,
-                sampler_weights,
-            )
             if self.num_steps is None:
                 if self.num_epoch is None:
                     raise ValueError(
                         "Either training.numb_steps or training.num_epoch must be set."
                     )
                 if self.num_epoch <= 0:
                     raise ValueError("training.num_epoch must be positive.")
+                sampler_weights = to_numpy_array(
+                    self.training_dataloader.sampler.weights
+                )
+                total_numb_batch = compute_total_numb_batch(
+                    training_data.index,
+                    sampler_weights,
+                )
                 if total_numb_batch <= 0:
                     raise ValueError(
                         "Total number of training batches must be positive."
@@ -497,17 +499,22 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
                     total_numb_batch,
                 )
         else:
-            for model_key in self.model_keys:
-                sampler_weights = to_numpy_array(
-                    self.training_dataloader[model_key].sampler.weights
-                )
-                per_task_total.append(
-                    compute_total_numb_batch(
-                        training_data[model_key].index,
-                        sampler_weights,
-                    )
-                )
             if self.num_epoch_dict:
+                if self.num_steps is not None:
+                    raise ValueError(
+                        "training.numb_steps and training.num_epoch_dict "
+                        "are mutually exclusive."
+                    )
+                for model_key in self.model_keys:
+                    sampler_weights = to_numpy_array(
+                        self.training_dataloader[model_key].sampler.weights
+                    )
+                    per_task_total.append(
+                        compute_total_numb_batch(
+                            training_data[model_key].index,
+                            sampler_weights,
+                        )
+                    )
                 (
                     self.model_prob,
                     self.num_steps,
@@ -759,15 +766,6 @@ def single_model_finetune(
                     f"Checkpoint loaded non-strictly. Missing keys: {missing}, Unexpected keys: {unexpected}"
                 )
 
-        # Get model prob for multi-task
-        if self.multi_task and self.model_prob is None:
-            self.model_prob = resolve_model_prob(
-                self.model_keys,
-                training_params.get("model_prob"),
-                training_data,
-                rank=self.rank,
-            )
-
         # Multi-task share params
         if shared_links is not None:
             _data_stat_protect = np.array(
diff --git a/examples/water/se_e2_a/input_torch_num_epoch.json b/examples/water/se_e2_a/input_torch_num_epoch.json
@@ -0,0 +1,81 @@
+{
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "data_stat_nbatch": 20,
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "_comment": " that's all"
+  },
+  "training": {
+    "stat_file": "./se_e2_a.hdf5",
+    "training_data": {
+      "systems": [
+        "../data/data_0",
+        "../data/data_1",
+        "../data/data_2"
+      ],
+      "batch_size": 1,
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "num_epoch": 100,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 10000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
+}
diff --git a/examples/water_multi_task/pytorch_example/input_torch_num_epoch_dict.json b/examples/water_multi_task/pytorch_example/input_torch_num_epoch_dict.json
@@ -0,0 +1,163 @@
+{
+  "_comment": "that's all",
+  "model": {
+    "shared_dict": {
+      "type_map_all": [
+        "O",
+        "H"
+      ],
+      "dpa2_descriptor": {
+        "type": "dpa2",
+        "repinit": {
+          "tebd_dim": 8,
+          "rcut": 6.0,
+          "rcut_smth": 0.5,
+          "nsel": 120,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "axis_neuron": 12,
+          "activation_function": "tanh",
+          "three_body_sel": 48,
+          "three_body_rcut": 4.0,
+          "three_body_rcut_smth": 3.5,
+          "use_three_body": true
+        },
+        "repformer": {
+          "rcut": 4.0,
+          "rcut_smth": 3.5,
+          "nsel": 48,
+          "nlayers": 6,
+          "g1_dim": 128,
+          "g2_dim": 32,
+          "attn2_hidden": 32,
+          "attn2_nhead": 4,
+          "attn1_hidden": 128,
+          "attn1_nhead": 4,
+          "axis_neuron": 4,
+          "update_h2": false,
+          "update_g1_has_conv": true,
+          "update_g1_has_grrg": true,
+          "update_g1_has_drrd": true,
+          "update_g1_has_attn": false,
+          "update_g2_has_g1g1": false,
+          "update_g2_has_attn": true,
+          "update_style": "res_residual",
+          "update_residual": 0.01,
+          "update_residual_init": "norm",
+          "attn2_has_gate": true,
+          "use_sqrt_nnei": true,
+          "g1_out_conv": true,
+          "g1_out_mlp": true
+        },
+        "precision": "float64",
+        "add_tebd_to_repinit_out": false,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "_comment": "that's all"
+    },
+    "model_dict": {
+      "water_1": {
+        "type_map": "type_map_all",
+        "descriptor": "dpa2_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      },
+      "water_2": {
+        "type_map": "type_map_all",
+        "descriptor": "dpa2_descriptor",
+        "fitting_net": {
+          "neuron": [
+            240,
+            240,
+            240
+          ],
+          "resnet_dt": true,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.0002,
+    "decay_rate": 0.98,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "water_1": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    },
+    "water_2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
+  "training": {
+    "num_epoch_dict": {
+      "water_1": 10,
+      "water_2": 20
+    },
+    "data_dict": {
+      "water_1": {
+        "training_data": {
+          "systems": [
+            "../../water/data/data_0/",
+            "../../water/data/data_1/",
+            "../../water/data/data_2/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        },
+        "validation_data": {
+          "systems": [
+            "../../water/data/data_3/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      },
+      "water_2": {
+        "training_data": {
+          "systems": [
+            "../../water/data/data_0/",
+            "../../water/data/data_1/",
+            "../../water/data/data_2/"
+          ],
+          "batch_size": 1,
+          "_comment": "that's all"
+        }
+      }
+    },
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 100,
+    "_comment": "that's all"
+  }
+}
diff --git a/source/tests/common/test_examples.py b/source/tests/common/test_examples.py