Skip to content

Commit 3effbdb

Browse files
refine code
1 parent c510815 commit 3effbdb

7 files changed

Lines changed: 10 additions & 108 deletions

File tree

deepmd/pd/train/training.py

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -760,38 +760,6 @@ def step(_step_id, task_key="Default") -> None:
760760
pref_lr = _lr.start_lr
761761
else:
762762
pref_lr = cur_lr
763-
self.wrapper.load_state_dict(paddle.load("./wrapper_dict.pd"))
764-
print("model loaded")
765-
inp = np.load("./input_dict.npz", allow_pickle=True)
766-
for k, v in inp.items():
767-
if isinstance(v, np.ndarray):
768-
# print(k, type(v), v.shape, v.dtype)
769-
try:
770-
input_dict[k] = paddle.to_tensor(v)
771-
# print(k)
772-
except Exception:
773-
pass
774-
if isinstance(input_dict[k], paddle.Tensor):
775-
input_dict[k] = input_dict[k].cuda()
776-
print("input_dict loaded")
777-
lab = np.load("./label_dict.npz", allow_pickle=True)
778-
for k, v in lab.items():
779-
if isinstance(v, np.ndarray):
780-
# print(k, type(v), v.shape, v.dtype)
781-
try:
782-
label_dict[k] = paddle.to_tensor(v)
783-
# print(k)
784-
except Exception:
785-
pass
786-
if isinstance(label_dict[k], paddle.Tensor):
787-
label_dict[k] = label_dict[k].cuda()
788-
print("label_dict loaded")
789-
model_pred, loss, more_loss = self.wrapper(
790-
**input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
791-
)
792-
print({k: float(v) for k, v in more_loss.items()})
793-
print(f"{loss.item():.10f}")
794-
exit()
795763

796764
# disable synchronization in forward-backward manually
797765
# as derivatives exist in model forward
@@ -811,7 +779,7 @@ def step(_step_id, task_key="Default") -> None:
811779

812780
with nvprof_context(enable_profiling, "Backward pass"):
813781
loss.backward()
814-
exit()
782+
815783
# fuse + allreduce manually before optimization if use DDP + no_sync
816784
# details in https://github.com/PaddlePaddle/Paddle/issues/48898#issuecomment-1343838622
817785
if self.world_size > 1:

deepmd/pd/train/wrapper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ def forward(
173173
model_pred = self.model[task_key](**input_dict)
174174
return model_pred, None, None
175175
else:
176-
print(self.loss)
177176
natoms = atype.shape[-1]
178177
model_pred, loss, more_loss = self.loss[task_key](
179178
input_dict,

deepmd/pt/model/model/transform_output.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,6 @@ def task_deriv_one(
9696
extended_virial = extended_virial.view(list(extended_virial.shape[:-2]) + [9]) # noqa:RUF005
9797
else:
9898
extended_virial = None
99-
print(
100-
f"extended_force: {extended_force.min().item():.10f} {extended_force.max().item():.10f} {extended_force.mean().item():.10f} {extended_force.std().item():.10f}"
101-
)
10299
return extended_force, extended_virial
103100

104101

deepmd/pt/train/training.py

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -725,64 +725,9 @@ def step(_step_id, task_key="Default") -> None:
725725
pref_lr = _lr.start_lr
726726
else:
727727
pref_lr = cur_lr
728-
729-
# save
730-
# torch.save(self.wrapper.state_dict(), "wrapper_dict.pt")
731-
# import paddle
732-
# psd = {}
733-
# for k, v in self.wrapper.state_dict().items():
734-
# if isinstance(v, torch.Tensor):
735-
# psd[k] = paddle.from_dlpack(v.detach())
736-
# else:
737-
# psd[k] = v
738-
# paddle.save(psd, "wrapper_dict.pd")
739-
# inp = {}
740-
# for k, v in input_dict.items():
741-
# if isinstance(v, torch.Tensor):
742-
# inp[k] = v.detach().cpu().numpy()
743-
# else:
744-
# inp[k] = v
745-
# np.savez("./input_dict.npz", **inp)
746-
# lab = {}
747-
# for k, v in label_dict.items():
748-
# if isinstance(v, torch.Tensor):
749-
# lab[k] = v.detach().cpu().numpy()
750-
# else:
751-
# lab[k] = v
752-
# np.savez("./label_dict.npz", **lab)
753-
754-
# load
755-
self.wrapper.load_state_dict(torch.load("./wrapper_dict.pt"))
756-
print("model loaded")
757-
inp = np.load("./input_dict.npz", allow_pickle=True)
758-
for k, v in inp.items():
759-
if isinstance(v, np.ndarray):
760-
# print(k, type(v), v.shape, v.dtype)
761-
try:
762-
input_dict[k] = torch.tensor(v)
763-
except TypeError:
764-
pass
765-
if isinstance(input_dict[k], torch.Tensor):
766-
input_dict[k] = input_dict[k].cuda()
767-
print("input_dict loaded")
768-
lab = np.load("./label_dict.npz", allow_pickle=True)
769-
for k, v in lab.items():
770-
if isinstance(v, np.ndarray):
771-
# print(k, type(v), v.shape, v.dtype)
772-
try:
773-
label_dict[k] = torch.tensor(v)
774-
except TypeError:
775-
pass
776-
if isinstance(label_dict[k], torch.Tensor):
777-
label_dict[k] = label_dict[k].cuda()
778-
print("label_dict loaded")
779-
780728
model_pred, loss, more_loss = self.wrapper(
781729
**input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
782730
)
783-
print({k: float(v) for k, v in more_loss.items()})
784-
print(f"{loss.item():.10f}")
785-
exit()
786731
loss.backward()
787732
if self.gradient_max_norm > 0.0:
788733
torch.nn.utils.clip_grad_norm_(

deepmd/pt/train/wrapper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def forward(
175175
return model_pred, None, None
176176
else:
177177
natoms = atype.shape[-1]
178-
print(self.loss)
179178
model_pred, loss, more_loss = self.loss[task_key](
180179
input_dict,
181180
self.model[task_key],

source/api_cc/include/DeepPotPD.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -392,18 +392,18 @@ class DeepPotPD : public DeepPotBackend {
392392
int do_message_passing; // 1:dpa2 model 0:others
393393
bool gpu_enabled;
394394
std::unique_ptr<paddle_infer::Tensor> firstneigh_tensor;
395-
std::vector<paddle_infer::Tensor> comm_vec;
396-
paddle_infer::Tensor mapping_tensor = predictor_fl->GetInputHandle("mapping");
395+
397396
/* comm_vec flatten all the communicatoin tensors below from comm dict:
398-
- send_list
399-
- send_proc
400-
- recv_proc
401-
- send_num
402-
- recv_num
403-
- communicator
397+
- [0] send_list
398+
- [1] send_proc
399+
- [2] recv_proc
400+
- [3] send_num
401+
- [4] recv_num
402+
- [5] communicator
404403
*/
404+
std::vector<paddle_infer::Tensor> comm_vec;
405405

406-
// std::unordered_map<std::string, paddle::Tensor> comm_dict; # Not used yet
406+
paddle_infer::Tensor mapping_tensor = predictor_fl->GetInputHandle("mapping");
407407
};
408408

409409
} // namespace deepmd

source/api_cc/src/DeepPotPD.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
205205
nlist_data.shuffle_exclude_empty(fwd_map);
206206
nlist_data.padding();
207207
if (do_message_passing == 1 && nghost > 0) {
208-
// throw deepmd::deepmd_exception(
209-
// "(do_message_passing == 1 && nghost > 0) is not supported yet.");
210208
int nswap = lmp_list.nswap;
211209
auto sendproc_tensor = predictor_fl->GetInputHandle("sendproc");
212210
sendproc_tensor->Reshape({nswap});
@@ -249,10 +247,6 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
249247
this->mapping_tensor->Reshape({1, nall_real});
250248
this->mapping_tensor->CopyFromCpu(mapping.data());
251249
}
252-
// if (do_message_passing == 1 && nghost == 0) {
253-
// throw deepmd::deepmd_exception(
254-
// "(do_message_passing == 1 && nghost == 0) is not supported yet.");
255-
// }
256250
}
257251
std::vector<int> firstneigh = createNlistTensorPD(nlist_data.jlist);
258252
firstneigh_tensor = predictor_fl->GetInputHandle("nlist");

0 commit comments

Comments
 (0)