Skip to content

Commit d2a46b7

Browse files
update for dpa3_dynamic debug code
1 parent 2b32af5 commit d2a46b7

8 files changed

Lines changed: 125 additions & 28 deletions

File tree

.pre-commit-config.yaml

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ repos:
6565
- id: clang-format
6666
exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|.+\.json$)
6767
# markdown, yaml, CSS, javascript
68-
- repo: https://github.com/pre-commit/mirrors-prettier
69-
rev: v4.0.0-alpha.8
70-
hooks:
71-
- id: prettier
72-
types_or: [markdown, yaml, css]
73-
# workflow files cannot be modified by pre-commit.ci
74-
exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
68+
# - repo: https://github.com/pre-commit/mirrors-prettier
69+
# rev: v4.0.0-alpha.8
70+
# hooks:
71+
# - id: prettier
72+
# types_or: [markdown, yaml, css]
73+
# # workflow files cannot be modified by pre-commit.ci
74+
# exclude: ^(source/3rdparty|\.github/workflows|\.clang-format)
7575
# Shell
7676
- repo: https://github.com/scop/pre-commit-shfmt
7777
rev: v3.12.0-1
@@ -83,25 +83,25 @@ repos:
8383
hooks:
8484
- id: cmake-format
8585
#- id: cmake-lint
86-
- repo: https://github.com/njzjz/mirrors-bibtex-tidy
87-
rev: v1.13.0
88-
hooks:
89-
- id: bibtex-tidy
90-
args:
91-
- --curly
92-
- --numeric
93-
- --align=13
94-
- --blank-lines
95-
# disable sort: the order of keys and fields has explict meanings
96-
#- --sort=key
97-
- --duplicates=key,doi,citation,abstract
98-
- --merge=combine
99-
#- --sort-fields
100-
#- --strip-comments
101-
- --trailing-commas
102-
- --encode-urls
103-
- --remove-empty-fields
104-
- --wrap=80
86+
# - repo: https://github.com/njzjz/mirrors-bibtex-tidy
87+
# rev: v1.13.0
88+
# hooks:
89+
# - id: bibtex-tidy
90+
# args:
91+
# - --curly
92+
# - --numeric
93+
# - --align=13
94+
# - --blank-lines
95+
# # disable sort: the order of keys and fields has explict meanings
96+
# #- --sort=key
97+
# - --duplicates=key,doi,citation,abstract
98+
# - --merge=combine
99+
# #- --sort-fields
100+
# #- --strip-comments
101+
# - --trailing-commas
102+
# - --encode-urls
103+
# - --remove-empty-fields
104+
# - --wrap=80
105105
# license header
106106
- repo: https://github.com/Lucas-C/pre-commit-hooks
107107
rev: v1.5.5

deepmd/pd/model/descriptor/repflows.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Union,
66
)
77

8+
import numpy as np
89
import paddle
910

1011
from deepmd.dpmodel.utils.seed import (
@@ -493,7 +494,7 @@ def forward(
493494
# nf x nloc x a_nnei x a_nnei
494495
# 1 - 1e-6 for paddle.acos stability
495496
cosine_ij = paddle.matmul(normalized_diff_i, normalized_diff_j) * (1 - 1e-6)
496-
angle_input = cosine_ij.unsqueeze(-1) / (paddle.pi**0.5)
497+
angle_input = cosine_ij.unsqueeze(-1) / (np.pi**0.5)
497498

498499
if not parallel_mode and self.use_loc_mapping:
499500
assert mapping is not None

deepmd/pd/model/model/transform_output.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def atomic_virial_corr(
2727
ce = coord * atom_energy
2828
sumce0, sumce1, sumce2 = paddle.split(paddle.sum(ce, axis=1), [1, 1, 1], axis=-1)
2929
# faked_grad = paddle.ones_like(sumce0)
30+
raise
3031
extended_virial_corr0 = paddle.autograd.grad(
3132
[sumce0],
3233
[extended_coord],
@@ -93,6 +94,9 @@ def task_deriv_one(
9394
)
9495
else:
9596
extended_virial = None
97+
print(
98+
f"extended_force: {extended_force.min().item():.10f} {extended_force.max().item():.10f} {extended_force.mean().item():.10f} {extended_force.std().item():.10f}"
99+
)
96100
return extended_force, extended_virial
97101

98102

deepmd/pd/train/training.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,38 @@ def step(_step_id, task_key="Default") -> None:
760760
pref_lr = _lr.start_lr
761761
else:
762762
pref_lr = cur_lr
763+
self.wrapper.load_state_dict(paddle.load("./wrapper_dict.pd"))
764+
print("model loaded")
765+
inp = np.load("./input_dict.npz", allow_pickle=True)
766+
for k, v in inp.items():
767+
if isinstance(v, np.ndarray):
768+
# print(k, type(v), v.shape, v.dtype)
769+
try:
770+
input_dict[k] = paddle.to_tensor(v)
771+
# print(k)
772+
except Exception:
773+
pass
774+
if isinstance(input_dict[k], paddle.Tensor):
775+
input_dict[k] = input_dict[k].cuda()
776+
print("input_dict loaded")
777+
lab = np.load("./label_dict.npz", allow_pickle=True)
778+
for k, v in lab.items():
779+
if isinstance(v, np.ndarray):
780+
# print(k, type(v), v.shape, v.dtype)
781+
try:
782+
label_dict[k] = paddle.to_tensor(v)
783+
# print(k)
784+
except Exception:
785+
pass
786+
if isinstance(label_dict[k], paddle.Tensor):
787+
label_dict[k] = label_dict[k].cuda()
788+
print("label_dict loaded")
789+
model_pred, loss, more_loss = self.wrapper(
790+
**input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
791+
)
792+
print({k: float(v) for k, v in more_loss.items()})
793+
print(f"{loss.item():.10f}")
794+
exit()
763795

764796
# disable synchronization in forward-backward manually
765797
# as derivatives exist in model forward
@@ -779,7 +811,7 @@ def step(_step_id, task_key="Default") -> None:
779811

780812
with nvprof_context(enable_profiling, "Backward pass"):
781813
loss.backward()
782-
814+
exit()
783815
# fuse + allreduce manually before optimization if use DDP + no_sync
784816
# details in https://github.com/PaddlePaddle/Paddle/issues/48898#issuecomment-1343838622
785817
if self.world_size > 1:

deepmd/pd/train/wrapper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ def forward(
173173
model_pred = self.model[task_key](**input_dict)
174174
return model_pred, None, None
175175
else:
176+
print(self.loss)
176177
natoms = atype.shape[-1]
177178
model_pred, loss, more_loss = self.loss[task_key](
178179
input_dict,

deepmd/pt/model/model/transform_output.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ def task_deriv_one(
9696
extended_virial = extended_virial.view(list(extended_virial.shape[:-2]) + [9]) # noqa:RUF005
9797
else:
9898
extended_virial = None
99+
print(
100+
f"extended_force: {extended_force.min().item():.10f} {extended_force.max().item():.10f} {extended_force.mean().item():.10f} {extended_force.std().item():.10f}"
101+
)
99102
return extended_force, extended_virial
100103

101104

deepmd/pt/train/training.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,9 +725,64 @@ def step(_step_id, task_key="Default") -> None:
725725
pref_lr = _lr.start_lr
726726
else:
727727
pref_lr = cur_lr
728+
729+
# save
730+
# torch.save(self.wrapper.state_dict(), "wrapper_dict.pt")
731+
# import paddle
732+
# psd = {}
733+
# for k, v in self.wrapper.state_dict().items():
734+
# if isinstance(v, torch.Tensor):
735+
# psd[k] = paddle.from_dlpack(v.detach())
736+
# else:
737+
# psd[k] = v
738+
# paddle.save(psd, "wrapper_dict.pd")
739+
# inp = {}
740+
# for k, v in input_dict.items():
741+
# if isinstance(v, torch.Tensor):
742+
# inp[k] = v.detach().cpu().numpy()
743+
# else:
744+
# inp[k] = v
745+
# np.savez("./input_dict.npz", **inp)
746+
# lab = {}
747+
# for k, v in label_dict.items():
748+
# if isinstance(v, torch.Tensor):
749+
# lab[k] = v.detach().cpu().numpy()
750+
# else:
751+
# lab[k] = v
752+
# np.savez("./label_dict.npz", **lab)
753+
754+
# load
755+
self.wrapper.load_state_dict(torch.load("./wrapper_dict.pt"))
756+
print("model loaded")
757+
inp = np.load("./input_dict.npz", allow_pickle=True)
758+
for k, v in inp.items():
759+
if isinstance(v, np.ndarray):
760+
# print(k, type(v), v.shape, v.dtype)
761+
try:
762+
input_dict[k] = torch.tensor(v)
763+
except TypeError:
764+
pass
765+
if isinstance(input_dict[k], torch.Tensor):
766+
input_dict[k] = input_dict[k].cuda()
767+
print("input_dict loaded")
768+
lab = np.load("./label_dict.npz", allow_pickle=True)
769+
for k, v in lab.items():
770+
if isinstance(v, np.ndarray):
771+
# print(k, type(v), v.shape, v.dtype)
772+
try:
773+
label_dict[k] = torch.tensor(v)
774+
except TypeError:
775+
pass
776+
if isinstance(label_dict[k], torch.Tensor):
777+
label_dict[k] = label_dict[k].cuda()
778+
print("label_dict loaded")
779+
728780
model_pred, loss, more_loss = self.wrapper(
729781
**input_dict, cur_lr=pref_lr, label=label_dict, task_key=task_key
730782
)
783+
print({k: float(v) for k, v in more_loss.items()})
784+
print(f"{loss.item():.10f}")
785+
exit()
731786
loss.backward()
732787
if self.gradient_max_norm > 0.0:
733788
torch.nn.utils.clip_grad_norm_(

deepmd/pt/train/wrapper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ def forward(
175175
return model_pred, None, None
176176
else:
177177
natoms = atype.shape[-1]
178+
print(self.loss)
178179
model_pred, loss, more_loss = self.loss[task_key](
179180
input_dict,
180181
self.model[task_key],

0 commit comments

Comments
 (0)