Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions deepmd/dpmodel/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from deepmd.dpmodel.common import (
to_numpy_array,
)
from deepmd.dpmodel.utils.exclude_mask import (
AtomExcludeMask,
)
from deepmd.utils.out_stat import (
compute_stats_do_not_distinguish_types,
compute_stats_from_atomic,
Expand Down Expand Up @@ -245,10 +248,8 @@ def compute_output_stats(
system["find_atom_" + kk] > 0.0
):
atomic_sampled_idx[kk].append(idx)
elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
global_sampled_idx[kk].append(idx)
else:
continue

# use index to gather model predictions for the corresponding systems.
model_pred_g = (
Expand Down Expand Up @@ -359,14 +360,21 @@ def compute_output_stats_global(
for kk in keys
}

natoms_key = "natoms"
input_natoms = {
kk: [
to_numpy_array(sampled[idx][natoms_key])
for idx in global_sampled_idx.get(kk, [])
]
for kk in keys
}
data_mixed_type = "real_natoms_vec" in sampled[0]
natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
input_natoms = {}
for kk in keys:
kk_natoms = []
for idx in global_sampled_idx.get(kk, []):
nn = to_numpy_array(sampled[idx][natoms_key])
if "atom_exclude_types" in sampled[idx]:
nn = nn.copy()
type_mask = AtomExcludeMask(
ntypes, sampled[idx]["atom_exclude_types"]
).get_type_mask()
nn[:, 2:] *= type_mask.reshape(1, -1)
kk_natoms.append(nn)
input_natoms[kk] = kk_natoms

# shape: (nframes, ndim)
merged_output = {
Expand Down
91 changes: 43 additions & 48 deletions deepmd/pd/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,9 @@ def compute_output_stats(
system["find_atom_" + kk] > 0.0
):
atomic_sampled_idx[kk].append(idx)
elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
global_sampled_idx[kk].append(idx)

else:
continue

# use index to gather model predictions for the corresponding systems.

model_pred_g = (
Expand Down Expand Up @@ -378,14 +375,16 @@ def compute_output_stats(
keys,
rcond,
preset_bias,
model_pred_g,
global_sampled_idx,
stats_distinguish_types,
intensive,
model_pred_g,
)
bias_atom_a, std_atom_a = compute_output_stats_atomic(
sampled,
ntypes,
keys,
atomic_sampled_idx,
model_pred_a,
)

Expand Down Expand Up @@ -422,52 +421,46 @@ def compute_output_stats_global(
keys: list[str],
rcond: float | None = None,
preset_bias: dict[str, list[paddle.Tensor | None]] | None = None,
model_pred: dict[str, np.ndarray] | None = None,
global_sampled_idx: dict | None = None,
stats_distinguish_types: bool = True,
intensive: bool = False,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""This function only handle stat computation from reduced global labels."""
# return directly if model predict is empty for global
if model_pred == {}:
# return directly if no global samples
if global_sampled_idx is None or all(
len(v) == 0 for v in global_sampled_idx.values()
):
Comment thread
wanghan-iapcm marked this conversation as resolved.
return {}, {}
Comment thread
wanghan-iapcm marked this conversation as resolved.

# get label dict from sample; for each key, only picking the system with global labels.
outputs = {
kk: [
system[kk]
for system in sampled
if kk in system and system.get(f"find_{kk}", 0) > 0
]
kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])]
for kk in keys
}

data_mixed_type = "real_natoms_vec" in sampled[0]
natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
for system in sampled:
if "atom_exclude_types" in system:
type_mask = AtomExcludeMask(
ntypes, system["atom_exclude_types"]
).get_type_mask()
system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)

input_natoms = {
kk: [
item[natoms_key]
for item in sampled
if kk in item and item.get(f"find_{kk}", 0) > 0
]
for kk in keys
}
input_natoms = {}
for kk in keys:
kk_natoms = []
for idx in global_sampled_idx.get(kk, []):
nn = to_numpy_array(sampled[idx][natoms_key])
if "atom_exclude_types" in sampled[idx]:
nn = nn.copy()
type_mask = AtomExcludeMask(
ntypes, sampled[idx]["atom_exclude_types"]
).get_type_mask()
nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1)
kk_natoms.append(nn)
input_natoms[kk] = kk_natoms
# shape: (nframes, ndim)
merged_output = {
kk: to_numpy_array(paddle.concat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
# shape: (nframes, ntypes)

merged_natoms = {
kk: to_numpy_array(paddle.concat(input_natoms[kk])[:, 2:])
kk: np.concatenate(input_natoms[kk])[:, 2:]
for kk in keys
if len(input_natoms[kk]) > 0
}
Expand Down Expand Up @@ -554,49 +547,51 @@ def compute_output_stats_atomic(
sampled: list[dict],
ntypes: int,
keys: list[str],
atomic_sampled_idx: dict | None = None,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""Compute output statistics from atomic labels."""
# return directly if no atomic samples
if atomic_sampled_idx is None or all(
len(v) == 0 for v in atomic_sampled_idx.values()
):
return {}, {}

# get label dict from sample; for each key, only picking the system with atomic labels.
outputs = {
kk: [
system["atom_" + kk]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atom_" + kk])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
natoms = {
kk: [
system["atype"]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atype"])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
# reshape outputs [nframes, nloc * ndim] --> reshape to [nframes * nloc, 1, ndim] for concatenation
# reshape natoms [nframes, nloc] --> reshape to [nframes * nolc, 1] for concatenation
natoms = {k: [sys_v.reshape([-1, 1]) for sys_v in v] for k, v in natoms.items()}
natoms = {k: [sys_v.reshape(-1, 1) for sys_v in v] for k, v in natoms.items()}
outputs = {
k: [
sys.reshape([natoms[k][sys_idx].shape[0], 1, -1])
sys.reshape(natoms[k][sys_idx].shape[0], 1, -1)
for sys_idx, sys in enumerate(v)
]
for k, v in outputs.items()
}

merged_output = {
kk: to_numpy_array(paddle.concat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
merged_natoms = {
kk: to_numpy_array(paddle.concat(natoms[kk]))
for kk in keys
if len(natoms[kk]) > 0
kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0
}
# reshape merged data to [nf, nloc, ndim]
merged_output = {
kk: merged_output[kk].reshape([*merged_natoms[kk].shape, -1])
kk: merged_output[kk].reshape((*merged_natoms[kk].shape, -1))
for kk in merged_output
}

Expand Down
83 changes: 40 additions & 43 deletions deepmd/pt/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,9 @@ def compute_output_stats(
system["find_atom_" + kk] > 0.0
):
atomic_sampled_idx[kk].append(idx)
elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
global_sampled_idx[kk].append(idx)

else:
continue

# use index to gather model predictions for the corresponding systems.

model_pred_g = (
Expand Down Expand Up @@ -378,14 +375,16 @@ def compute_output_stats(
keys,
rcond,
preset_bias,
model_pred_g,
global_sampled_idx,
stats_distinguish_types,
intensive,
model_pred_g,
)
bias_atom_a, std_atom_a = compute_output_stats_atomic(
sampled,
ntypes,
keys,
atomic_sampled_idx,
model_pred_a,
)

Expand Down Expand Up @@ -422,52 +421,46 @@ def compute_output_stats_global(
keys: list[str],
rcond: float | None = None,
preset_bias: dict[str, list[np.ndarray | None]] | None = None,
model_pred: dict[str, np.ndarray] | None = None,
global_sampled_idx: dict | None = None,
stats_distinguish_types: bool = True,
intensive: bool = False,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""This function only handle stat computation from reduced global labels."""
# return directly if model predict is empty for global
if model_pred == {}:
# return directly if no global samples
if global_sampled_idx is None or all(
len(v) == 0 for v in global_sampled_idx.values()
):
Comment thread
wanghan-iapcm marked this conversation as resolved.
return {}, {}
Comment thread
wanghan-iapcm marked this conversation as resolved.

# get label dict from sample; for each key, only picking the system with global labels.
outputs = {
kk: [
system[kk]
for system in sampled
if kk in system and system.get(f"find_{kk}", 0) > 0
]
kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])]
for kk in keys
}

data_mixed_type = "real_natoms_vec" in sampled[0]
natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
for system in sampled:
if "atom_exclude_types" in system:
type_mask = AtomExcludeMask(
ntypes, system["atom_exclude_types"]
).get_type_mask()
system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)

input_natoms = {
kk: [
item[natoms_key]
for item in sampled
if kk in item and item.get(f"find_{kk}", 0) > 0
]
for kk in keys
}
input_natoms = {}
for kk in keys:
kk_natoms = []
for idx in global_sampled_idx.get(kk, []):
nn = to_numpy_array(sampled[idx][natoms_key])
if "atom_exclude_types" in sampled[idx]:
nn = nn.copy()
type_mask = AtomExcludeMask(
ntypes, sampled[idx]["atom_exclude_types"]
).get_type_mask()
nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1)
kk_natoms.append(nn)
input_natoms[kk] = kk_natoms
# shape: (nframes, ndim)
merged_output = {
kk: to_numpy_array(torch.cat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
# shape: (nframes, ntypes)

merged_natoms = {
kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:])
kk: np.concatenate(input_natoms[kk])[:, 2:]
for kk in keys
if len(input_natoms[kk]) > 0
}
Expand Down Expand Up @@ -551,22 +544,28 @@ def compute_output_stats_atomic(
sampled: list[dict],
ntypes: int,
keys: list[str],
atomic_sampled_idx: dict | None = None,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""Compute output statistics from atomic labels."""
# return directly if no atomic samples
if atomic_sampled_idx is None or all(
len(v) == 0 for v in atomic_sampled_idx.values()
):
return {}, {}

# get label dict from sample; for each key, only picking the system with atomic labels.
outputs = {
kk: [
system["atom_" + kk]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atom_" + kk])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
natoms = {
kk: [
system["atype"]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atype"])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
Expand All @@ -582,12 +581,10 @@ def compute_output_stats_atomic(
}

merged_output = {
kk: to_numpy_array(torch.cat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
merged_natoms = {
kk: to_numpy_array(torch.cat(natoms[kk])) for kk in keys if len(natoms[kk]) > 0
kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0
}
# reshape merged data to [nf, nloc, ndim]
merged_output = {
Expand Down
1 change: 1 addition & 0 deletions source/tests/consistent/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
Loading
Loading