Skip to content

Commit 8a5b2e9

Browse files
committed
add examples
1 parent 203bb34 commit 8a5b2e9

File tree

5 files changed

+295
-50
lines changed

5 files changed

+295
-50
lines changed

deepmd/pd/train/training.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -400,20 +400,20 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
400400

401401
per_task_total = []
402402
if not self.multi_task:
403-
sampler_weights = to_numpy_array(
404-
self.training_dataloader.batch_sampler.sampler.weights
405-
)
406-
total_numb_batch = compute_total_numb_batch(
407-
training_data.index,
408-
sampler_weights,
409-
)
410403
if self.num_steps is None:
411404
if self.num_epoch is None:
412405
raise ValueError(
413406
"Either training.numb_steps or training.num_epoch must be set."
414407
)
415408
if self.num_epoch <= 0:
416409
raise ValueError("training.num_epoch must be positive.")
410+
sampler_weights = to_numpy_array(
411+
self.training_dataloader.batch_sampler.sampler.weights
412+
)
413+
total_numb_batch = compute_total_numb_batch(
414+
training_data.index,
415+
sampler_weights,
416+
)
417417
if total_numb_batch <= 0:
418418
raise ValueError(
419419
"Total number of training batches must be positive."
@@ -426,17 +426,24 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
426426
total_numb_batch,
427427
)
428428
else:
429-
for model_key in self.model_keys:
430-
sampler_weights = to_numpy_array(
431-
self.training_dataloader[model_key].batch_sampler.sampler.weights
432-
)
433-
per_task_total.append(
434-
compute_total_numb_batch(
435-
training_data[model_key].index,
436-
sampler_weights,
437-
)
438-
)
439429
if self.num_epoch_dict:
430+
if self.num_steps is not None:
431+
raise ValueError(
432+
"training.numb_steps and training.num_epoch_dict "
433+
"are mutually exclusive."
434+
)
435+
for model_key in self.model_keys:
436+
sampler_weights = to_numpy_array(
437+
self.training_dataloader[
438+
model_key
439+
].batch_sampler.sampler.weights
440+
)
441+
per_task_total.append(
442+
compute_total_numb_batch(
443+
training_data[model_key].index,
444+
sampler_weights,
445+
)
446+
)
440447
(
441448
self.model_prob,
442449
self.num_steps,
@@ -652,15 +659,6 @@ def single_model_finetune(
652659
frz_model = paddle.jit.load(init_frz_model)
653660
self.model.set_state_dict(frz_model.state_dict())
654661

655-
# Get model prob for multi-task
656-
if self.multi_task and self.model_prob is None:
657-
self.model_prob = resolve_model_prob(
658-
self.model_keys,
659-
training_params.get("model_prob"),
660-
training_data,
661-
rank=self.rank,
662-
)
663-
664662
# Multi-task share params
665663
if shared_links is not None:
666664
self.wrapper.share_params(

deepmd/pt/train/training.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -473,18 +473,20 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
473473
# Resolve training steps
474474
per_task_total = []
475475
if not self.multi_task:
476-
sampler_weights = to_numpy_array(self.training_dataloader.sampler.weights)
477-
total_numb_batch = compute_total_numb_batch(
478-
training_data.index,
479-
sampler_weights,
480-
)
481476
if self.num_steps is None:
482477
if self.num_epoch is None:
483478
raise ValueError(
484479
"Either training.numb_steps or training.num_epoch must be set."
485480
)
486481
if self.num_epoch <= 0:
487482
raise ValueError("training.num_epoch must be positive.")
483+
sampler_weights = to_numpy_array(
484+
self.training_dataloader.sampler.weights
485+
)
486+
total_numb_batch = compute_total_numb_batch(
487+
training_data.index,
488+
sampler_weights,
489+
)
488490
if total_numb_batch <= 0:
489491
raise ValueError(
490492
"Total number of training batches must be positive."
@@ -497,17 +499,22 @@ def get_lr(lr_params: dict[str, Any]) -> BaseLR:
497499
total_numb_batch,
498500
)
499501
else:
500-
for model_key in self.model_keys:
501-
sampler_weights = to_numpy_array(
502-
self.training_dataloader[model_key].sampler.weights
503-
)
504-
per_task_total.append(
505-
compute_total_numb_batch(
506-
training_data[model_key].index,
507-
sampler_weights,
508-
)
509-
)
510502
if self.num_epoch_dict:
503+
if self.num_steps is not None:
504+
raise ValueError(
505+
"training.numb_steps and training.num_epoch_dict "
506+
"are mutually exclusive."
507+
)
508+
for model_key in self.model_keys:
509+
sampler_weights = to_numpy_array(
510+
self.training_dataloader[model_key].sampler.weights
511+
)
512+
per_task_total.append(
513+
compute_total_numb_batch(
514+
training_data[model_key].index,
515+
sampler_weights,
516+
)
517+
)
511518
(
512519
self.model_prob,
513520
self.num_steps,
@@ -759,15 +766,6 @@ def single_model_finetune(
759766
f"Checkpoint loaded non-strictly. Missing keys: {missing}, Unexpected keys: {unexpected}"
760767
)
761768

762-
# Get model prob for multi-task
763-
if self.multi_task and self.model_prob is None:
764-
self.model_prob = resolve_model_prob(
765-
self.model_keys,
766-
training_params.get("model_prob"),
767-
training_data,
768-
rank=self.rank,
769-
)
770-
771769
# Multi-task share params
772770
if shared_links is not None:
773771
_data_stat_protect = np.array(
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"model": {
3+
"type_map": [
4+
"O",
5+
"H"
6+
],
7+
"descriptor": {
8+
"type": "se_e2_a",
9+
"sel": [
10+
46,
11+
92
12+
],
13+
"rcut_smth": 0.50,
14+
"rcut": 6.00,
15+
"neuron": [
16+
25,
17+
50,
18+
100
19+
],
20+
"resnet_dt": false,
21+
"axis_neuron": 16,
22+
"type_one_side": true,
23+
"seed": 1,
24+
"_comment": " that's all"
25+
},
26+
"fitting_net": {
27+
"neuron": [
28+
240,
29+
240,
30+
240
31+
],
32+
"resnet_dt": true,
33+
"seed": 1,
34+
"_comment": " that's all"
35+
},
36+
"data_stat_nbatch": 20,
37+
"_comment": " that's all"
38+
},
39+
"learning_rate": {
40+
"type": "exp",
41+
"decay_steps": 5000,
42+
"start_lr": 0.001,
43+
"stop_lr": 3.51e-8,
44+
"_comment": "that's all"
45+
},
46+
"loss": {
47+
"type": "ener",
48+
"start_pref_e": 0.02,
49+
"limit_pref_e": 1,
50+
"start_pref_f": 1000,
51+
"limit_pref_f": 1,
52+
"_comment": " that's all"
53+
},
54+
"training": {
55+
"stat_file": "./se_e2_a.hdf5",
56+
"training_data": {
57+
"systems": [
58+
"../data/data_0",
59+
"../data/data_1",
60+
"../data/data_2"
61+
],
62+
"batch_size": 1,
63+
"_comment": "that's all"
64+
},
65+
"validation_data": {
66+
"systems": [
67+
"../data/data_3"
68+
],
69+
"batch_size": 1,
70+
"numb_btch": 3,
71+
"_comment": "that's all"
72+
},
73+
"num_epoch": 100,
74+
"seed": 10,
75+
"disp_file": "lcurve.out",
76+
"disp_freq": 100,
77+
"save_freq": 10000,
78+
"_comment": "that's all"
79+
},
80+
"_comment": "that's all"
81+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
{
2+
"_comment": "that's all",
3+
"model": {
4+
"shared_dict": {
5+
"type_map_all": [
6+
"O",
7+
"H"
8+
],
9+
"dpa2_descriptor": {
10+
"type": "dpa2",
11+
"repinit": {
12+
"tebd_dim": 8,
13+
"rcut": 6.0,
14+
"rcut_smth": 0.5,
15+
"nsel": 120,
16+
"neuron": [
17+
25,
18+
50,
19+
100
20+
],
21+
"axis_neuron": 12,
22+
"activation_function": "tanh",
23+
"three_body_sel": 48,
24+
"three_body_rcut": 4.0,
25+
"three_body_rcut_smth": 3.5,
26+
"use_three_body": true
27+
},
28+
"repformer": {
29+
"rcut": 4.0,
30+
"rcut_smth": 3.5,
31+
"nsel": 48,
32+
"nlayers": 6,
33+
"g1_dim": 128,
34+
"g2_dim": 32,
35+
"attn2_hidden": 32,
36+
"attn2_nhead": 4,
37+
"attn1_hidden": 128,
38+
"attn1_nhead": 4,
39+
"axis_neuron": 4,
40+
"update_h2": false,
41+
"update_g1_has_conv": true,
42+
"update_g1_has_grrg": true,
43+
"update_g1_has_drrd": true,
44+
"update_g1_has_attn": false,
45+
"update_g2_has_g1g1": false,
46+
"update_g2_has_attn": true,
47+
"update_style": "res_residual",
48+
"update_residual": 0.01,
49+
"update_residual_init": "norm",
50+
"attn2_has_gate": true,
51+
"use_sqrt_nnei": true,
52+
"g1_out_conv": true,
53+
"g1_out_mlp": true
54+
},
55+
"precision": "float64",
56+
"add_tebd_to_repinit_out": false,
57+
"seed": 1,
58+
"_comment": " that's all"
59+
},
60+
"_comment": "that's all"
61+
},
62+
"model_dict": {
63+
"water_1": {
64+
"type_map": "type_map_all",
65+
"descriptor": "dpa2_descriptor",
66+
"fitting_net": {
67+
"neuron": [
68+
240,
69+
240,
70+
240
71+
],
72+
"resnet_dt": true,
73+
"seed": 1,
74+
"_comment": " that's all"
75+
}
76+
},
77+
"water_2": {
78+
"type_map": "type_map_all",
79+
"descriptor": "dpa2_descriptor",
80+
"fitting_net": {
81+
"neuron": [
82+
240,
83+
240,
84+
240
85+
],
86+
"resnet_dt": true,
87+
"seed": 1,
88+
"_comment": " that's all"
89+
}
90+
}
91+
}
92+
},
93+
"learning_rate": {
94+
"type": "exp",
95+
"decay_steps": 5000,
96+
"start_lr": 0.0002,
97+
"decay_rate": 0.98,
98+
"stop_lr": 3.51e-08,
99+
"_comment": "that's all"
100+
},
101+
"loss_dict": {
102+
"water_1": {
103+
"type": "ener",
104+
"start_pref_e": 0.02,
105+
"limit_pref_e": 1,
106+
"start_pref_f": 1000,
107+
"limit_pref_f": 1,
108+
"start_pref_v": 0,
109+
"limit_pref_v": 0
110+
},
111+
"water_2": {
112+
"type": "ener",
113+
"start_pref_e": 0.02,
114+
"limit_pref_e": 1,
115+
"start_pref_f": 1000,
116+
"limit_pref_f": 1,
117+
"start_pref_v": 0,
118+
"limit_pref_v": 0
119+
}
120+
},
121+
"training": {
122+
"num_epoch_dict": {
123+
"water_1": 10,
124+
"water_2": 20
125+
},
126+
"data_dict": {
127+
"water_1": {
128+
"training_data": {
129+
"systems": [
130+
"../../water/data/data_0/",
131+
"../../water/data/data_1/",
132+
"../../water/data/data_2/"
133+
],
134+
"batch_size": 1,
135+
"_comment": "that's all"
136+
},
137+
"validation_data": {
138+
"systems": [
139+
"../../water/data/data_3/"
140+
],
141+
"batch_size": 1,
142+
"_comment": "that's all"
143+
}
144+
},
145+
"water_2": {
146+
"training_data": {
147+
"systems": [
148+
"../../water/data/data_0/",
149+
"../../water/data/data_1/",
150+
"../../water/data/data_2/"
151+
],
152+
"batch_size": 1,
153+
"_comment": "that's all"
154+
}
155+
}
156+
},
157+
"seed": 10,
158+
"disp_file": "lcurve.out",
159+
"disp_freq": 100,
160+
"save_freq": 100,
161+
"_comment": "that's all"
162+
}
163+
}

0 commit comments

Comments
 (0)