Skip to content

Commit 0f1b041

Browse files
committed
feat(pt): add type-coverage option for dp show
1 parent 04da98f commit 0f1b041

6 files changed

Lines changed: 152 additions & 42 deletions

File tree

deepmd/entrypoints/show.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from deepmd.infer.deep_eval import (
55
DeepEval,
66
)
7+
from deepmd.utils.econf_embd import (
8+
sort_element_type,
9+
)
710

811
log = logging.getLogger(__name__)
912

@@ -69,3 +72,32 @@ def show(
6972
log.info(f"Parameter counts{log_prefix}:")
7073
for k in sorted(size_dict):
7174
log.info(f"Parameters in {k}: {size_dict[k]:,}")
75+
76+
if "type-coverage" in ATTRIBUTES:
77+
if model_is_multi_task:
78+
log.info("The type coverage for each branch: ")
79+
total_type_coverage_list = []
80+
model_branches = list(model_params["model_dict"].keys())
81+
for branch in model_branches:
82+
tmp_model = DeepEval(INPUT, head=branch, no_jit=True)
83+
type_coverage = tmp_model.get_type_coverage()
84+
log.info(
85+
f"{branch}: Number of covered types: {type_coverage['type_num']} "
86+
)
87+
log.info(f"{branch}: Covered types: {type_coverage['covered_type']} ")
88+
total_type_coverage_list += [
89+
tt
90+
for tt in type_coverage["covered_type"]
91+
if tt not in total_type_coverage_list
92+
]
93+
log.info(
94+
f"TOTAL number of covered types in the model: {len(total_type_coverage_list)} "
95+
)
96+
log.info(
97+
f"TOTAL covered types in the model: {sort_element_type(total_type_coverage_list)} "
98+
)
99+
else:
100+
log.info("The type coverage for this model: ")
101+
type_coverage = model.get_type_coverage()
102+
log.info(f"Number of covered types: {type_coverage['type_num']} ")
103+
log.info(f"Covered types: {type_coverage['covered_type']} ")

deepmd/infer/deep_eval.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ def get_model_size(self) -> dict:
295295
"""Get model parameter count."""
296296
raise NotImplementedError("Not implemented in this backend.")
297297

298+
def get_type_coverage(self) -> dict:
299+
"""Get model type (element) coverage."""
300+
raise NotImplementedError("Not implemented in this backend.")
301+
298302

299303
class DeepEval(ABC):
300304
"""High-level Deep Evaluator interface.
@@ -568,3 +572,7 @@ def get_model_def_script(self) -> dict:
568572
def get_model_size(self) -> dict:
569573
"""Get model parameter count."""
570574
return self.deep_eval.get_model_size()
575+
576+
def get_type_coverage(self) -> dict:
577+
"""Get model type (element) coverage."""
578+
return self.deep_eval.get_type_coverage()

deepmd/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,14 @@ def main_parser() -> argparse.ArgumentParser:
851851
)
852852
parser_show.add_argument(
853853
"ATTRIBUTES",
854-
choices=["model-branch", "type-map", "descriptor", "fitting-net", "size"],
854+
choices=[
855+
"model-branch",
856+
"type-map",
857+
"descriptor",
858+
"fitting-net",
859+
"size",
860+
"type-coverage",
861+
],
855862
nargs="+",
856863
)
857864
return parser

deepmd/pt/infer/deep_eval.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
to_numpy_array,
6565
to_torch_tensor,
6666
)
67+
from deepmd.utils.econf_embd import (
68+
sort_element_type,
69+
)
6770

6871
if TYPE_CHECKING:
6972
import ase.neighborlist
@@ -98,6 +101,7 @@ def __init__(
98101
auto_batch_size: Union[bool, int, AutoBatchSize] = True,
99102
neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
100103
head: Optional[Union[str, int]] = None,
104+
no_jit: bool = False,
101105
**kwargs: Any,
102106
) -> None:
103107
self.output_def = output_def
@@ -130,7 +134,7 @@ def __init__(
130134
] = state_dict[item].clone()
131135
state_dict = state_dict_head
132136
model = get_model(self.input_param).to(DEVICE)
133-
if not self.input_param.get("hessian_mode"):
137+
if not self.input_param.get("hessian_mode") and not no_jit:
134138
model = torch.jit.script(model)
135139
self.dp = ModelWrapper(model)
136140
self.dp.load_state_dict(state_dict)
@@ -648,6 +652,35 @@ def get_model_size(self) -> dict:
648652
"total": sum_param_des + sum_param_fit,
649653
}
650654

655+
def get_type_coverage(self) -> dict:
656+
"""Get model type (element) coverage.
657+
658+
Returns
659+
-------
660+
dict
661+
A dictionary containing the information of type coverage in the model:
662+
- 'type_num': the total number of covered types in this model.
663+
- 'covered_type': a list of the covered types in this model.
664+
"""
665+
buffers_dict = dict(self.dp.named_buffers())
666+
type_map = np.array(self.type_map)
667+
out_bias = None
668+
for k in buffers_dict:
669+
if ".out_bias" in k:
670+
# only use out_bias in the first fitting out_def
671+
out_bias = buffers_dict[k].detach().cpu().numpy()[0]
672+
break
673+
assert out_bias is not None, "No out_bias found in the model buffers."
674+
assert len(out_bias.shape) == 2, "The supported out_bias should be a 2D array."
675+
assert out_bias.shape[0] == len(type_map), (
676+
"The out_bias shape does not match the type map length."
677+
)
678+
bias_mask = (np.abs(out_bias) > 1e-6).any(-1) # 1e-6 for stability
679+
return {
680+
"type_num": bias_mask.sum(),
681+
"covered_type": sort_element_type(type_map[bias_mask].tolist()),
682+
}
683+
651684
def eval_descriptor(
652685
self,
653686
coords: np.ndarray,

deepmd/utils/econf_embd.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"electronic_configuration_embedding",
1010
"make_econf_embedding",
1111
"normalized_electronic_configuration_embedding",
12+
"sort_element_type",
1213
"transform_to_spin_rep",
1314
]
1415

@@ -263,3 +264,16 @@ def print_econf_embedding(res: dict[str, np.ndarray]) -> None:
263264
vvstr = ",".join([str(ii) for ii in vv])
264265
space = " " * (2 - len(kk))
265266
print(f'"{kk}"{space} : [{vvstr}],') # noqa: T201
267+
268+
269+
def sort_element_type(elements: list[str]) -> list[str]:
270+
"""Sort element types based on their atomic number."""
271+
272+
def get_atomic_number(symbol):
273+
try:
274+
return element(symbol).atomic_number
275+
except ValueError:
276+
return float("inf")
277+
278+
sorted_elements = sorted(elements, key=lambda x: get_atomic_number(x))
279+
return sorted_elements

source/tests/pt/test_dp_show.py

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -47,43 +47,49 @@ def setUp(self) -> None:
4747

4848
def test_checkpoint(self) -> None:
4949
INPUT = "model.pt"
50-
ATTRIBUTES = "type-map descriptor fitting-net size"
50+
ATTRIBUTES = "type-map descriptor fitting-net size type-coverage"
5151
with redirect_stderr(io.StringIO()) as f:
5252
run_dp(f"dp --pt show {INPUT} {ATTRIBUTES}")
5353
results = f.getvalue().split("\n")[:-1]
54-
assert "This is a singletask model" in results[-8]
55-
assert "The type_map is ['O', 'H', 'Au']" in results[-7]
54+
assert "This is a singletask model" in results[-11]
55+
assert "The type_map is ['O', 'H', 'Au']" in results[-10]
5656
assert (
5757
"{'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut': 4.0"
58-
) in results[-6]
58+
) in results[-9]
5959
assert (
6060
"The fitting_net parameter is {'neuron': [24, 24, 24], 'resnet_dt': True, 'seed': 1}"
61-
in results[-5]
61+
in results[-8]
6262
)
63-
assert "Parameter counts:" in results[-4]
64-
assert "Parameters in descriptor: 19,350" in results[-3]
65-
assert "Parameters in fitting-net: 119,091" in results[-2]
66-
assert "Parameters in total: 138,441" in results[-1]
63+
assert "Parameter counts:" in results[-7]
64+
assert "Parameters in descriptor: 19,350" in results[-6]
65+
assert "Parameters in fitting-net: 119,091" in results[-5]
66+
assert "Parameters in total: 138,441" in results[-4]
67+
assert "The type coverage for this model:" in results[-3]
68+
assert "Number of covered types: 2" in results[-2]
69+
assert "Covered types: ['H', 'O']" in results[-1]
6770

6871
def test_frozen_model(self) -> None:
6972
INPUT = "frozen_model.pth"
70-
ATTRIBUTES = "type-map descriptor fitting-net size"
73+
ATTRIBUTES = "type-map descriptor fitting-net size type-coverage"
7174
with redirect_stderr(io.StringIO()) as f:
7275
run_dp(f"dp --pt show {INPUT} {ATTRIBUTES}")
7376
results = f.getvalue().split("\n")[:-1]
74-
assert "This is a singletask model" in results[-8]
75-
assert "The type_map is ['O', 'H', 'Au']" in results[-7]
77+
assert "This is a singletask model" in results[-11]
78+
assert "The type_map is ['O', 'H', 'Au']" in results[-10]
7679
assert (
7780
"{'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut': 4.0"
78-
) in results[-6]
81+
) in results[-9]
7982
assert (
8083
"The fitting_net parameter is {'neuron': [24, 24, 24], 'resnet_dt': True, 'seed': 1}"
81-
in results[-5]
84+
in results[-8]
8285
)
83-
assert "Parameter counts:" in results[-4]
84-
assert "Parameters in descriptor: 19,350" in results[-3]
85-
assert "Parameters in fitting-net: 119,091" in results[-2]
86-
assert "Parameters in total: 138,441" in results[-1]
86+
assert "Parameter counts:" in results[-7]
87+
assert "Parameters in descriptor: 19,350" in results[-6]
88+
assert "Parameters in fitting-net: 119,091" in results[-5]
89+
assert "Parameters in total: 138,441" in results[-4]
90+
assert "The type coverage for this model:" in results[-3]
91+
assert "Number of covered types: 2" in results[-2]
92+
assert "Covered types: ['H', 'O']" in results[-1] # only covers two elements
8793

8894
def test_checkpoint_error(self) -> None:
8995
INPUT = "model.pt"
@@ -152,62 +158,72 @@ def setUp(self) -> None:
152158

153159
def test_checkpoint(self) -> None:
154160
INPUT = "model.ckpt.pt"
155-
ATTRIBUTES = "model-branch type-map descriptor fitting-net size"
161+
ATTRIBUTES = "model-branch type-map descriptor fitting-net size type-coverage"
156162
with redirect_stderr(io.StringIO()) as f:
157163
run_dp(f"dp --pt show {INPUT} {ATTRIBUTES}")
158164
results = f.getvalue().split("\n")[:-1]
159-
assert "This is a multitask model" in results[-12]
165+
assert "This is a multitask model" in results[-19]
160166
assert (
161167
"Available model branches are ['model_1', 'model_2', 'RANDOM'], "
162168
"where 'RANDOM' means using a randomly initialized fitting net."
163-
in results[-11]
169+
in results[-18]
164170
)
165-
assert "The type_map of branch model_1 is ['O', 'H', 'B']" in results[-10]
166-
assert "The type_map of branch model_2 is ['O', 'H', 'B']" in results[-9]
171+
assert "The type_map of branch model_1 is ['O', 'H', 'B']" in results[-17]
172+
assert "The type_map of branch model_2 is ['O', 'H', 'B']" in results[-16]
167173
assert (
168174
"model_1"
169175
and "'type': 'se_e2_a'"
170176
and "'sel': [46, 92, 4]"
171177
and "'rcut_smth': 0.5"
172-
) in results[-8]
178+
) in results[-15]
173179
assert (
174180
"model_2"
175181
and "'type': 'se_e2_a'"
176182
and "'sel': [46, 92, 4]"
177183
and "'rcut_smth': 0.5"
178-
) in results[-7]
184+
) in results[-14]
179185
assert (
180186
"The fitting_net parameter of branch model_1 is {'neuron': [1, 2, 3], 'seed': 678}"
181-
in results[-6]
187+
in results[-13]
182188
)
183189
assert (
184190
"The fitting_net parameter of branch model_2 is {'neuron': [9, 8, 7], 'seed': 1111}"
185-
in results[-5]
191+
in results[-12]
186192
)
187-
assert "Parameter counts for a single branch model:" in results[-4]
188-
assert "Parameters in descriptor: 19,350" in results[-3]
189-
assert "Parameters in fitting-net: 4,860" in results[-2]
190-
assert "Parameters in total: 24,210" in results[-1]
193+
assert "Parameter counts for a single branch model:" in results[-11]
194+
assert "Parameters in descriptor: 19,350" in results[-10]
195+
assert "Parameters in fitting-net: 4,860" in results[-9]
196+
assert "Parameters in total: 24,210" in results[-8]
197+
assert "The type coverage for each branch:" in results[-7]
198+
assert "model_1: Number of covered types: 2" in results[-6]
199+
assert "model_1: Covered types: ['H', 'O']" in results[-5]
200+
assert "model_2: Number of covered types: 2" in results[-4]
201+
assert "model_2: Covered types: ['H', 'O']" in results[-3]
202+
assert "TOTAL number of covered types in the model: 2" in results[-2]
203+
assert "TOTAL covered types in the model: ['H', 'O']" in results[-1]
191204

192205
def test_frozen_model(self) -> None:
193206
INPUT = "frozen_model.pth"
194-
ATTRIBUTES = "type-map descriptor fitting-net size"
207+
ATTRIBUTES = "type-map descriptor fitting-net size type-coverage"
195208
with redirect_stderr(io.StringIO()) as f:
196209
run_dp(f"dp --pt show {INPUT} {ATTRIBUTES}")
197210
results = f.getvalue().split("\n")[:-1]
198-
assert "This is a singletask model" in results[-8]
199-
assert "The type_map is ['O', 'H', 'B']" in results[-7]
211+
assert "This is a singletask model" in results[-11]
212+
assert "The type_map is ['O', 'H', 'B']" in results[-10]
200213
assert (
201214
"'type': 'se_e2_a'" and "'sel': [46, 92, 4]" and "'rcut_smth': 0.5"
202-
) in results[-6]
215+
) in results[-9]
203216
assert (
204217
"The fitting_net parameter is {'neuron': [1, 2, 3], 'seed': 678}"
205-
in results[-5]
218+
in results[-8]
206219
)
207-
assert "Parameter counts:" in results[-4]
208-
assert "Parameters in descriptor: 19,350" in results[-3]
209-
assert "Parameters in fitting-net: 4,860" in results[-2]
210-
assert "Parameters in total: 24,210" in results[-1]
220+
assert "Parameter counts:" in results[-7]
221+
assert "Parameters in descriptor: 19,350" in results[-6]
222+
assert "Parameters in fitting-net: 4,860" in results[-5]
223+
assert "Parameters in total: 24,210" in results[-4]
224+
assert "The type coverage for this model:" in results[-3]
225+
assert "Number of covered types: 2" in results[-2]
226+
assert "Covered types: ['H', 'O']" in results[-1] # only covers two elements
211227

212228
def tearDown(self) -> None:
213229
for f in os.listdir("."):

0 commit comments

Comments
 (0)