From 71e4c193d279e0b7bb51c031d19c278a9e422be3 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 30 Mar 2026 11:53:40 +0800 Subject: [PATCH 1/6] refactor(pt_expt): use model API for Python inference, rename .pt2/.pte metadata files Python DeepEval now deserializes model.json and delegates all API calls (get_rcut, get_sel, get_dim_fparam, etc.) to the dpmodel instance instead of reading a flat metadata dict. This is consistent with how other backends (.dp/.yaml, .pth) work. File layout in .pt2/.pte archives is renamed for consistency: - metadata.json: C++ runtime metadata (was model_def_script.json) - model_def_script.json: training config (was model_params.json, now matches .pth convention) - output_keys.json removed (merged into metadata.json) --- .gitignore | 2 + deepmd/pt_expt/entrypoints/compress.py | 1 + deepmd/pt_expt/entrypoints/main.py | 6 +- deepmd/pt_expt/infer/deep_eval.py | 89 +++++++++----------- deepmd/pt_expt/utils/serialization.py | 55 +++++++----- source/api_cc/src/DeepPotPTExpt.cc | 14 ++- source/tests/pt_expt/infer/test_deep_eval.py | 73 ++++++++++++++-- 7 files changed, 148 insertions(+), 92 deletions(-) diff --git a/.gitignore b/.gitignore index 6382ecedd2..12b149c911 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,8 @@ test_dp_test_*.out # Training and model output files *.pth +*.pte +*.pt2 *.ckpt* checkpoint lcurve.out diff --git a/deepmd/pt_expt/entrypoints/compress.py b/deepmd/pt_expt/entrypoints/compress.py index 83417c68f2..04becb0523 100644 --- a/deepmd/pt_expt/entrypoints/compress.py +++ b/deepmd/pt_expt/entrypoints/compress.py @@ -118,6 +118,7 @@ def enable_compression( deserialize_to_file( output, uncompressed_data, + model_params=model_dict.get("model_def_script"), model_json_override={ "model": compressed_model_dict, "model_def_script": model_dict.get("model_def_script"), diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index 25cecb4630..5bfa4119a5 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -344,7 +344,7 @@ def change_bias( ) model_to_change = BaseModel.deserialize(pte_data["model"]) - model_params = None + model_params = pte_data.get("model_def_script") else: raise RuntimeError( "The model provided must be a checkpoint file with a .pt extension " @@ -440,7 +440,9 @@ def change_bias( ) ) model_dict = model_to_change.serialize() - deserialize_to_file(output_path, {"model": model_dict}) + deserialize_to_file( + output_path, {"model": model_dict}, model_params=model_params + ) log.info(f"Saved model to {output_path}") diff --git a/deepmd/pt_expt/infer/deep_eval.py b/deepmd/pt_expt/infer/deep_eval.py index afde4abdec..4eedb723d0 100644 --- a/deepmd/pt_expt/infer/deep_eval.py +++ b/deepmd/pt_expt/infer/deep_eval.py @@ -16,7 +16,6 @@ communicate_extended_output, ) from deepmd.dpmodel.output_def import ( - FittingOutputDef, ModelOutputDef, OutputVariableCategory, OutputVariableDef, @@ -59,28 +58,6 @@ import ase.neighborlist -def _reconstruct_model_output_def(metadata: dict) -> ModelOutputDef: - """Reconstruct ModelOutputDef from stored fitting_output_defs metadata.""" - var_defs = [] - for vd in metadata["fitting_output_defs"]: - var_defs.append( - OutputVariableDef( - name=vd["name"], - shape=vd["shape"], - reducible=vd["reducible"], - r_differentiable=vd["r_differentiable"], - c_differentiable=vd["c_differentiable"], - atomic=vd["atomic"], - category=vd["category"], - r_hessian=vd["r_hessian"], - magnetic=vd["magnetic"], - intensive=vd["intensive"], - ) - ) - fitting_output_def = FittingOutputDef(var_defs) - return ModelOutputDef(fitting_output_def) - - class DeepEval(DeepEvalBackend): """PyTorch Exportable backend implementation of DeepEval. @@ -124,9 +101,6 @@ def __init__( else: self._load_pte(model_file) - # Reconstruct the model output def from stored fitting output defs - self._model_output_def = _reconstruct_model_output_def(self.metadata) - if isinstance(auto_batch_size, bool): if auto_batch_size: self.auto_batch_size = AutoBatchSize() @@ -139,14 +113,30 @@ def __init__( else: raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize") + def _init_from_model_json(self, model_json_str: str) -> None: + """Deserialize model.json and derive model API from the dpmodel instance.""" + from deepmd.pt_expt.model.model import ( + BaseModel, + ) + from deepmd.pt_expt.utils.serialization import ( + _json_to_numpy, + ) + + model_dict = json.loads(model_json_str) + model_dict = _json_to_numpy(model_dict) + self._dpmodel = BaseModel.deserialize(model_dict["model"]) + self.rcut = self._dpmodel.get_rcut() + self.type_map = self._dpmodel.get_type_map() + self._model_output_def = ModelOutputDef(self._dpmodel.atomic_output_def()) + def _load_pte(self, model_file: str) -> None: """Load a .pte (torch.export) model file.""" - extra_files = {"model_def_script.json": ""} + extra_files = {"model.json": "", "model_def_script.json": ""} exported = torch.export.load(model_file, extra_files=extra_files) self.exported_module = exported.module() - self.metadata = json.loads(extra_files["model_def_script.json"]) - self.rcut = self.metadata["rcut"] - self.type_map = self.metadata["type_map"] + self._init_from_model_json(extra_files["model.json"]) + mds = extra_files["model_def_script.json"] + self._model_def_script = json.loads(mds) if mds else {} def _load_pt2(self, model_file: str) -> None: """Load a .pt2 (AOTInductor) model file.""" @@ -159,16 +149,17 @@ def _load_pt2(self, model_file: str) -> None: # Read metadata from the .pt2 ZIP archive with zipfile.ZipFile(model_file, "r") as zf: names = zf.namelist() - for required in ("extra/model_def_script.json", "extra/output_keys.json"): - if required not in names: - raise ValueError( - f"Invalid .pt2 file '{model_file}': missing '{required}'" - ) - self.metadata = json.loads(zf.read("extra/model_def_script.json")) - self._output_keys = json.loads(zf.read("extra/output_keys.json")) + if "extra/model.json" not in names: + raise ValueError( + f"Invalid .pt2 file '{model_file}': missing 'extra/model.json'" + ) + model_json_str = zf.read("extra/model.json").decode("utf-8") + mds = "" + if "extra/model_def_script.json" in names: + mds = zf.read("extra/model_def_script.json").decode("utf-8") - self.rcut = self.metadata["rcut"] - self.type_map = self.metadata["type_map"] + self._init_from_model_json(model_json_str) + self._model_def_script = json.loads(mds) if mds else {} # Load the AOTInductor model package (.pt2 ZIP archive). # Uses torch._inductor.aoti_load_package (private API, stable since PyTorch 2.6). @@ -189,16 +180,16 @@ def get_type_map(self) -> list[str]: def get_dim_fparam(self) -> int: """Get the number (dimension) of frame parameters of this DP.""" - return self.metadata["dim_fparam"] + return self._dpmodel.get_dim_fparam() def get_dim_aparam(self) -> int: """Get the number (dimension) of atomic parameters of this DP.""" - return self.metadata["dim_aparam"] + return self._dpmodel.get_dim_aparam() @property def model_type(self) -> type["DeepEvalWrapper"]: """The the evaluator of the model type.""" - model_output_type = self.metadata["model_output_type"] + model_output_type = self._dpmodel.model_output_type() if "energy" in model_output_type: return DeepPot elif "dos" in model_output_type: @@ -219,7 +210,7 @@ def get_sel_type(self) -> list[int]: to the result of the model. If returning an empty list, all atom types are selected. """ - return self.metadata["sel_type"] + return self._dpmodel.get_sel_type() def get_numb_dos(self) -> int: """Get the number of DOS.""" @@ -364,8 +355,8 @@ def _build_nlist_native( nframes = coords.shape[0] natoms = coords.shape[1] rcut = self.rcut - sel = self.metadata["sel"] - mixed_types = self.metadata["mixed_types"] + sel = self._dpmodel.get_sel() + mixed_types = self._dpmodel.mixed_types() if cells is not None: box_input = cells.reshape(nframes, 3, 3) @@ -476,8 +467,8 @@ def _build_nlist_ase_single( nlist : np.ndarray, shape (nloc, nsel) mapping : np.ndarray, shape (nall,) """ - sel = self.metadata["sel"] - mixed_types = self.metadata["mixed_types"] + sel = self._dpmodel.get_sel() + mixed_types = self._dpmodel.mixed_types() nsel = sum(sel) natoms = positions.shape[0] @@ -703,8 +694,8 @@ def _get_output_shape( raise RuntimeError("unknown category") def get_model_def_script(self) -> dict: - """Get model definition script.""" - return self.metadata + """Get model definition script (training config).""" + return self._model_def_script def get_model(self) -> torch.nn.Module: """Get the exported model module. diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py index 35c32bc562..3740e6fb4f 100644 --- a/deepmd/pt_expt/utils/serialization.py +++ b/deepmd/pt_expt/utils/serialization.py @@ -165,9 +165,17 @@ def _build_dynamic_shapes( def _collect_metadata(model: torch.nn.Module) -> dict: - """Collect metadata from the model for storage in .pte extra_files.""" - # Serialize the fitting output definitions so that ModelOutputDef - # can be reconstructed at inference time without loading the full model. + """Collect metadata from the model for C++ inference. + + This metadata is stored as ``metadata.json`` in .pt2 archives and as + ``model_def_script.json`` (legacy) in .pte archives. C++ reads these + flat JSON fields because compiling model API methods as AOTInductor + entry points is impractical (~12 s per trivial function) and string + outputs (``get_type_map``) cannot be expressed as tensor I/O. + + The ``fitting_output_defs`` list is also included so that + ``ModelOutputDef`` can be reconstructed without loading the full model. + """ fitting_output_def = model.atomic_output_def() fitting_output_defs = [] for vdef in fitting_output_def.get_data().values(): @@ -189,11 +197,9 @@ def _collect_metadata(model: torch.nn.Module) -> dict: "type_map": model.get_type_map(), "rcut": model.get_rcut(), "sel": model.get_sel(), - "model_output_type": model.model_output_type(), "dim_fparam": model.get_dim_fparam(), "dim_aparam": model.get_dim_aparam(), "mixed_types": model.mixed_types(), - "sel_type": model.get_sel_type(), "has_default_fparam": model.has_default_fparam(), "default_fparam": model.get_default_fparam(), "fitting_output_defs": fitting_output_defs, @@ -214,8 +220,8 @@ def serialize_from_file(model_file: str) -> dict: ------- dict The serialized model data. If the archive contains - ``model_params.json``, it is included under the - ``"model_params"`` key. + ``model_def_script.json`` (training config), it is included + under the ``"model_def_script"`` key. """ if model_file.endswith(".pt2"): return _serialize_from_file_pt2(model_file) @@ -225,12 +231,14 @@ def serialize_from_file(model_file: str) -> dict: def _serialize_from_file_pte(model_file: str) -> dict: """Serialize a .pte model file to a dictionary.""" - extra_files = {"model.json": "", "model_params.json": ""} + extra_files = {"model.json": "", "model_def_script.json": ""} torch.export.load(model_file, extra_files=extra_files) model_dict = json.loads(extra_files["model.json"]) model_dict = _json_to_numpy(model_dict) - if extra_files["model_params.json"]: - model_dict["model_params"] = json.loads(extra_files["model_params.json"]) + if extra_files["model_def_script.json"]: + model_dict["model_def_script"] = json.loads( + extra_files["model_def_script.json"] + ) return model_dict @@ -247,13 +255,15 @@ def _serialize_from_file_pt2(model_file: str) -> dict: f"Invalid .pt2 file '{model_file}': missing 'extra/model.json'" ) model_json = zf.read("extra/model.json").decode("utf-8") - model_params_json = "" - if "extra/model_params.json" in zf.namelist(): - model_params_json = zf.read("extra/model_params.json").decode("utf-8") + model_def_script_json = "" + if "extra/model_def_script.json" in zf.namelist(): + model_def_script_json = zf.read("extra/model_def_script.json").decode( + "utf-8" + ) model_dict = json.loads(model_json) model_dict = _json_to_numpy(model_dict) - if model_params_json: - model_dict["model_params"] = json.loads(model_params_json) + if model_def_script_json: + model_dict["model_def_script"] = json.loads(model_def_script_json) return model_dict @@ -390,16 +400,16 @@ def _deserialize_to_file_pte( model_params: dict | None = None, ) -> None: """Deserialize a dictionary to a .pte model file.""" - exported, metadata, data_for_json, _output_keys = _trace_and_export( + exported, metadata, data_for_json, output_keys = _trace_and_export( data, model_json_override ) + metadata["output_keys"] = output_keys extra_files = { - "model_def_script.json": json.dumps(metadata), + "metadata.json": json.dumps(metadata), + "model_def_script.json": json.dumps(model_params or {}), "model.json": json.dumps(data_for_json, separators=(",", ":")), } - if model_params is not None: - extra_files["model_params.json"] = json.dumps(model_params) torch.export.save(exported, model_file, extra_files=extra_files) @@ -430,12 +440,11 @@ def _deserialize_to_file_pt2( aoti_compile_and_package(exported, package_path=model_file) # Embed metadata into the .pt2 ZIP archive + metadata["output_keys"] = output_keys with zipfile.ZipFile(model_file, "a") as zf: - zf.writestr("extra/model_def_script.json", json.dumps(metadata)) - zf.writestr("extra/output_keys.json", json.dumps(output_keys)) + zf.writestr("extra/metadata.json", json.dumps(metadata)) + zf.writestr("extra/model_def_script.json", json.dumps(model_params or {})) zf.writestr( "extra/model.json", json.dumps(data_for_json, separators=(",", ":")), ) - if model_params is not None: - zf.writestr("extra/model_params.json", json.dumps(model_params)) diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc index 076b7e78d5..31f8b41453 100644 --- a/source/api_cc/src/DeepPotPTExpt.cc +++ b/source/api_cc/src/DeepPotPTExpt.cc @@ -423,7 +423,7 @@ std::string read_zip_entry(const std::string& zip_path, } // Match exact name or suffix (handles archives with directory prefixes, - // e.g. "model/extra/output_keys.json" matches "extra/output_keys.json") + // e.g. "model/extra/metadata.json" matches "extra/metadata.json") bool match = (name == entry_name); if (!match && name.size() > entry_name.size()) { size_t suffix_start = name.size() - entry_name.size(); @@ -619,10 +619,7 @@ void DeepPotPTExpt::init(const std::string& model, } // Read metadata from the .pt2 ZIP archive - std::string metadata_json = - read_zip_entry(model, "extra/model_def_script.json"); - std::string output_keys_json = - read_zip_entry(model, "extra/output_keys.json"); + std::string metadata_json = read_zip_entry(model, "extra/metadata.json"); auto metadata = parse_json(metadata_json); rcut = metadata["rcut"].as_double(); @@ -666,10 +663,9 @@ void DeepPotPTExpt::init(const std::string& model, sel.push_back(v.as_int()); } - // Parse output keys - auto keys_val = parse_json(output_keys_json); + // Parse output keys from metadata output_keys.clear(); - for (const auto& v : keys_val.as_array()) { + for (const auto& v : metadata["output_keys"].as_array()) { output_keys.push_back(v.as_string()); } @@ -726,7 +722,7 @@ void DeepPotPTExpt::extract_outputs( throw deepmd::deepmd_exception( "Model returned " + std::to_string(flat_outputs.size()) + " outputs but expected " + std::to_string(output_keys.size()) + - " (from output_keys.json)"); + " (from metadata.json)"); } for (size_t i = 0; i < output_keys.size(); ++i) { output_map[output_keys[i]] = flat_outputs[i]; diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py index ad83ca849b..e494532658 100644 --- a/source/tests/pt_expt/infer/test_deep_eval.py +++ b/source/tests/pt_expt/infer/test_deep_eval.py @@ -104,11 +104,35 @@ def test_get_model(self) -> None: self.assertIsInstance(mod, torch.nn.Module) def test_get_model_def_script(self) -> None: + """Without model_params, get_model_def_script returns {}.""" mds = self.dp.deep_eval.get_model_def_script() self.assertIsInstance(mds, dict) - self.assertEqual(mds["type_map"], self.type_map) - self.assertAlmostEqual(mds["rcut"], self.rcut) - self.assertEqual(mds["sel"], list(self.sel)) + self.assertEqual(mds, {}) + + def test_get_model_def_script_with_params(self) -> None: + """Export with model_params → get_model_def_script returns them.""" + training_config = {"type_map": self.type_map, "descriptor": {"type": "se_e2_a"}} + with tempfile.NamedTemporaryFile(suffix=".pte", delete=False) as f: + tmpfile2 = f.name + try: + deserialize_to_file(tmpfile2, self.model_data, model_params=training_config) + dp2 = DeepPot(tmpfile2) + mds = dp2.deep_eval.get_model_def_script() + self.assertEqual(mds, training_config) + finally: + import os + + os.unlink(tmpfile2) + + def test_model_api_delegation(self) -> None: + """Verify that model API calls are delegated to the deserialized dpmodel.""" + de = self.dp.deep_eval + self.assertIsNotNone(de._dpmodel) + self.assertAlmostEqual(de.get_rcut(), self.rcut) + self.assertEqual(de.get_type_map(), self.type_map) + self.assertEqual(de.get_dim_fparam(), 0) + self.assertEqual(de.get_dim_aparam(), 0) + self.assertEqual(de.get_sel_type(), self.model.get_sel_type()) def test_eval_consistency(self) -> None: """Test that DeepPot.eval gives same results as direct model forward.""" @@ -210,8 +234,7 @@ def test_dynamic_shapes(self) -> None: Compares exported module output against direct forward_common_lower for multiple nloc values. """ - extra_files = {"model_def_script.json": ""} - exported = torch.export.load(self.tmpfile.name, extra_files=extra_files) + exported = torch.export.load(self.tmpfile.name) exported_mod = exported.module() for nloc in [2, 5, 10]: @@ -562,11 +585,41 @@ def test_model_type(self) -> None: self.assertIs(self.dp.deep_eval.model_type, DeepPot) def test_get_model_def_script(self) -> None: + """Without model_params, get_model_def_script returns {}.""" mds = self.dp.deep_eval.get_model_def_script() self.assertIsInstance(mds, dict) - self.assertEqual(mds["type_map"], self.type_map) - self.assertAlmostEqual(mds["rcut"], self.rcut) - self.assertEqual(mds["sel"], list(self.sel)) + self.assertEqual(mds, {}) + + def test_get_model_def_script_with_params(self) -> None: + """Export with model_params → get_model_def_script returns them.""" + training_config = {"type_map": self.type_map, "descriptor": {"type": "se_e2_a"}} + with tempfile.NamedTemporaryFile(suffix=".pt2", delete=False) as f: + tmpfile2 = f.name + try: + torch.set_default_device(None) + try: + deserialize_to_file( + tmpfile2, self.model_data, model_params=training_config + ) + finally: + torch.set_default_device("cuda:9999999") + dp2 = DeepPot(tmpfile2) + mds = dp2.deep_eval.get_model_def_script() + self.assertEqual(mds, training_config) + finally: + import os + + os.unlink(tmpfile2) + + def test_model_api_delegation(self) -> None: + """Verify that model API calls are delegated to the deserialized dpmodel.""" + de = self.dp.deep_eval + self.assertIsNotNone(de._dpmodel) + self.assertAlmostEqual(de.get_rcut(), self.rcut) + self.assertEqual(de.get_type_map(), self.type_map) + self.assertEqual(de.get_dim_fparam(), 0) + self.assertEqual(de.get_dim_aparam(), 0) + self.assertEqual(de.get_sel_type(), self.model.get_sel_type()) def test_pt2_file_is_zip(self) -> None: """The .pt2 file should be a valid ZIP archive.""" @@ -576,9 +629,11 @@ def test_pt2_has_metadata(self) -> None: """The .pt2 ZIP should contain metadata entries.""" with zipfile.ZipFile(self.tmpfile.name, "r") as zf: names = zf.namelist() + self.assertIn("extra/metadata.json", names) self.assertIn("extra/model_def_script.json", names) - self.assertIn("extra/output_keys.json", names) self.assertIn("extra/model.json", names) + self.assertNotIn("extra/output_keys.json", names) + self.assertNotIn("extra/model_params.json", names) def test_eval_consistency(self) -> None: """Test that DeepPot.eval gives same results as direct model forward.""" From 9e87eaadac156b7e572793ca3b62359d5b0115af Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 30 Mar 2026 13:08:48 +0800 Subject: [PATCH 2/6] test(pt_expt): verify change_bias preserves model_def_script for .pte/.pt2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two tests exercising the .pte/.pt2 → change_bias → .pte/.pt2 round-trip, asserting that training config (model_def_script) embedded by freeze is preserved in the output file. --- source/tests/pt_expt/test_change_bias.py | 64 ++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index 50d114af28..746a74f5ea 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -250,6 +250,70 @@ def test_change_bias_frozen_pte(self) -> None: "Bias should have changed after change-bias on frozen model", ) + def test_change_bias_pte_preserves_model_def_script(self) -> None: + """change_bias on .pte should preserve model_def_script (training config).""" + from deepmd.pt_expt.entrypoints.main import ( + freeze, + ) + from deepmd.pt_expt.utils.serialization import ( + serialize_from_file, + ) + + # Freeze the checkpoint (embeds training config as model_def_script) + pte_path = os.path.join(self.tmpdir, "frozen_mds.pte") + freeze(model=self.model_path, output=pte_path) + + # Verify training config is present + original_data = serialize_from_file(pte_path) + self.assertIn("model_def_script", original_data) + original_mds = original_data["model_def_script"] + self.assertIn("type_map", original_mds) # training config has model params + + # Run change-bias with user-defined values + output_pte = os.path.join(self.tmpdir, "frozen_mds_updated.pte") + run_dp(f"dp --pt-expt change-bias {pte_path} -b 0.1 3.2 -o {output_pte}") + + # Verify model_def_script is preserved in the output + updated_data = serialize_from_file(output_pte) + self.assertIn("model_def_script", updated_data) + self.assertEqual(updated_data["model_def_script"], original_mds) + + def test_change_bias_pt2_preserves_model_def_script(self) -> None: + """change_bias on .pt2 should preserve model_def_script (training config).""" + from deepmd.pt_expt.entrypoints.main import ( + freeze, + ) + from deepmd.pt_expt.utils.serialization import ( + serialize_from_file, + ) + + # Freeze to .pt2 + pt2_path = os.path.join(self.tmpdir, "frozen_mds.pt2") + torch.set_default_device(None) + try: + freeze(model=self.model_path, output=pt2_path) + finally: + torch.set_default_device("cuda:9999999") + + # Verify training config is present + original_data = serialize_from_file(pt2_path) + self.assertIn("model_def_script", original_data) + original_mds = original_data["model_def_script"] + self.assertIn("type_map", original_mds) # training config has model params + + # Run change-bias with user-defined values + output_pt2 = os.path.join(self.tmpdir, "frozen_mds_updated.pt2") + torch.set_default_device(None) + try: + run_dp(f"dp --pt-expt change-bias {pt2_path} -b 0.1 3.2 -o {output_pt2}") + finally: + torch.set_default_device("cuda:9999999") + + # Verify model_def_script is preserved in the output + updated_data = serialize_from_file(output_pt2) + self.assertIn("model_def_script", updated_data) + self.assertEqual(updated_data["model_def_script"], original_mds) + class TestChangeBiasFittingStats(unittest.TestCase): """Test that model_change_out_bias recomputes fitting stats for set-by-statistic.""" From 5e90b19428d096f9b66260a3554a9d91dbc7e5e4 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 30 Mar 2026 16:18:21 +0800 Subject: [PATCH 3/6] fix(pt_expt): update finetune to use model_def_script key from serialize_from_file serialize_from_file now returns training config under 'model_def_script' key (renamed from 'model_params' in the previous commit). Update _load_model_params in finetune.py to match. --- deepmd/pt_expt/utils/finetune.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deepmd/pt_expt/utils/finetune.py b/deepmd/pt_expt/utils/finetune.py index cc24aec219..7c7c775165 100644 --- a/deepmd/pt_expt/utils/finetune.py +++ b/deepmd/pt_expt/utils/finetune.py @@ -33,10 +33,10 @@ def _load_model_params(finetune_model: str) -> dict[str, Any]: ) data = serialize_from_file(finetune_model) - # Prefer embedded model_params (full config); fall back to + # Prefer embedded model_def_script (full training config); fall back to # a minimal dict with just type_map for older .pte files. - if "model_params" in data: - return data["model_params"] + if "model_def_script" in data: + return data["model_def_script"] return {"type_map": data["model"]["type_map"]} else: state_dict = torch.load(finetune_model, map_location=DEVICE, weights_only=True) From ff2d1caa8af34d4944ed9d5b77e87c6a6d88b75e Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 30 Mar 2026 17:23:58 +0800 Subject: [PATCH 4/6] refactor(pt_expt): remove dead fallback in _load_model_params model_def_script.json is always present in .pte/.pt2 archives, so the fallback to {"type_map": ...} is unreachable. --- deepmd/pt_expt/utils/finetune.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/deepmd/pt_expt/utils/finetune.py b/deepmd/pt_expt/utils/finetune.py index 7c7c775165..b10025d56d 100644 --- a/deepmd/pt_expt/utils/finetune.py +++ b/deepmd/pt_expt/utils/finetune.py @@ -33,11 +33,7 @@ def _load_model_params(finetune_model: str) -> dict[str, Any]: ) data = serialize_from_file(finetune_model) - # Prefer embedded model_def_script (full training config); fall back to - # a minimal dict with just type_map for older .pte files. - if "model_def_script" in data: - return data["model_def_script"] - return {"type_map": data["model"]["type_map"]} + return data["model_def_script"] else: state_dict = torch.load(finetune_model, map_location=DEVICE, weights_only=True) if "model" in state_dict: From b878e73e6c7796d80757629516bbf85e27b9d405 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Mon, 30 Mar 2026 18:20:38 +0800 Subject: [PATCH 5/6] refactor(pt_expt): remove model_params kwarg from deserialize_to_file Read training config from data["model_def_script"] instead of a separate kwarg, matching the convention of pt/jax/tf/pd backends. This fixes dp convert-backend for pt_expt, which passes the universal dict (already containing model_def_script) with only 2 positional args. --- deepmd/pt_expt/entrypoints/compress.py | 1 - deepmd/pt_expt/entrypoints/main.py | 4 ++-- deepmd/pt_expt/utils/serialization.py | 20 +++++++++----------- source/tests/pt_expt/infer/test_deep_eval.py | 11 +++++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/deepmd/pt_expt/entrypoints/compress.py b/deepmd/pt_expt/entrypoints/compress.py index 04becb0523..83417c68f2 100644 --- a/deepmd/pt_expt/entrypoints/compress.py +++ b/deepmd/pt_expt/entrypoints/compress.py @@ -118,7 +118,6 @@ def enable_compression( deserialize_to_file( output, uncompressed_data, - model_params=model_dict.get("model_def_script"), model_json_override={ "model": compressed_model_dict, "model_def_script": model_dict.get("model_def_script"), diff --git a/deepmd/pt_expt/entrypoints/main.py b/deepmd/pt_expt/entrypoints/main.py index 5bfa4119a5..3c82ff13aa 100644 --- a/deepmd/pt_expt/entrypoints/main.py +++ b/deepmd/pt_expt/entrypoints/main.py @@ -259,7 +259,7 @@ def freeze( m.eval() model_dict = m.serialize() - deserialize_to_file(output, {"model": model_dict}, model_params=model_params) + deserialize_to_file(output, {"model": model_dict, "model_def_script": model_params}) log.info("Saved frozen model to %s", output) @@ -441,7 +441,7 @@ def change_bias( ) model_dict = model_to_change.serialize() deserialize_to_file( - output_path, {"model": model_dict}, model_params=model_params + output_path, {"model": model_dict, "model_def_script": model_params} ) log.info(f"Saved model to {output_path}") diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py index 3740e6fb4f..12a28c5cbd 100644 --- a/deepmd/pt_expt/utils/serialization.py +++ b/deepmd/pt_expt/utils/serialization.py @@ -270,7 +270,6 @@ def _serialize_from_file_pt2(model_file: str) -> dict: def deserialize_to_file( model_file: str, data: dict, - model_params: dict | None = None, model_json_override: dict | None = None, ) -> None: """Deserialize a dictionary to a .pte or .pt2 model file. @@ -285,19 +284,18 @@ def deserialize_to_file( data : dict The dictionary to be deserialized (same format as dpmodel's serialize output, with "model" and optionally "model_def_script" keys). - model_params : dict or None - Original model config (the dict passed to ``get_model``). - If provided, embedded in the .pte so that ``--use-pretrain-script`` - can extract descriptor/fitting params at finetune time. + If ``data["model_def_script"]`` is present, it is embedded in the + output so that ``--use-pretrain-script`` can extract descriptor/fitting + params at finetune time. model_json_override : dict or None If provided, this dict is stored in model.json instead of ``data``. Used by ``dp compress`` to store the compressed model dict while tracing the uncompressed model (make_fx cannot trace custom ops). """ if model_file.endswith(".pt2"): - _deserialize_to_file_pt2(model_file, data, model_json_override, model_params) + _deserialize_to_file_pt2(model_file, data, model_json_override) else: - _deserialize_to_file_pte(model_file, data, model_json_override, model_params) + _deserialize_to_file_pte(model_file, data, model_json_override) def _trace_and_export( @@ -397,17 +395,17 @@ def _deserialize_to_file_pte( model_file: str, data: dict, model_json_override: dict | None = None, - model_params: dict | None = None, ) -> None: """Deserialize a dictionary to a .pte model file.""" exported, metadata, data_for_json, output_keys = _trace_and_export( data, model_json_override ) + model_def_script = data.get("model_def_script") or {} metadata["output_keys"] = output_keys extra_files = { "metadata.json": json.dumps(metadata), - "model_def_script.json": json.dumps(model_params or {}), + "model_def_script.json": json.dumps(model_def_script), "model.json": json.dumps(data_for_json, separators=(",", ":")), } @@ -418,7 +416,6 @@ def _deserialize_to_file_pt2( model_file: str, data: dict, model_json_override: dict | None = None, - model_params: dict | None = None, ) -> None: """Deserialize a dictionary to a .pt2 model file (AOTInductor). @@ -440,10 +437,11 @@ def _deserialize_to_file_pt2( aoti_compile_and_package(exported, package_path=model_file) # Embed metadata into the .pt2 ZIP archive + model_def_script = data.get("model_def_script") or {} metadata["output_keys"] = output_keys with zipfile.ZipFile(model_file, "a") as zf: zf.writestr("extra/metadata.json", json.dumps(metadata)) - zf.writestr("extra/model_def_script.json", json.dumps(model_params or {})) + zf.writestr("extra/model_def_script.json", json.dumps(model_def_script)) zf.writestr( "extra/model.json", json.dumps(data_for_json, separators=(",", ":")), diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py index e494532658..112ada5dc7 100644 --- a/source/tests/pt_expt/infer/test_deep_eval.py +++ b/source/tests/pt_expt/infer/test_deep_eval.py @@ -115,7 +115,8 @@ def test_get_model_def_script_with_params(self) -> None: with tempfile.NamedTemporaryFile(suffix=".pte", delete=False) as f: tmpfile2 = f.name try: - deserialize_to_file(tmpfile2, self.model_data, model_params=training_config) + data_with_config = {**self.model_data, "model_def_script": training_config} + deserialize_to_file(tmpfile2, data_with_config) dp2 = DeepPot(tmpfile2) mds = dp2.deep_eval.get_model_def_script() self.assertEqual(mds, training_config) @@ -598,9 +599,11 @@ def test_get_model_def_script_with_params(self) -> None: try: torch.set_default_device(None) try: - deserialize_to_file( - tmpfile2, self.model_data, model_params=training_config - ) + data_with_config = { + **self.model_data, + "model_def_script": training_config, + } + deserialize_to_file(tmpfile2, data_with_config) finally: torch.set_default_device("cuda:9999999") dp2 = DeepPot(tmpfile2) From 179a21700ee893a30a4fb88d3b95d8a7b22504a8 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Tue, 31 Mar 2026 14:51:06 +0800 Subject: [PATCH 6/6] fix: address reviewer comments on PR #5354 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - show.py: use safe .get() for type_map/descriptor/fitting_net to avoid KeyError when get_model_def_script() returns {} (model frozen without training config); fall back to model.get_type_map() for type-map - finetune.py: fix stale error message referencing "model_params" → "model_def_script" - serialization.py: fix _collect_metadata() docstring (metadata.json used in both .pt2 and .pte, not just .pt2) - test_change_bias.py: add comments explaining set_default_device(None) workaround --- deepmd/entrypoints/show.py | 19 ++++++++++++++++--- deepmd/pt_expt/utils/finetune.py | 2 +- deepmd/pt_expt/utils/serialization.py | 4 ++-- source/tests/pt_expt/test_change_bias.py | 4 +++- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/deepmd/entrypoints/show.py b/deepmd/entrypoints/show.py index 7fd3e81467..b156e9d43d 100644 --- a/deepmd/entrypoints/show.py +++ b/deepmd/entrypoints/show.py @@ -55,18 +55,26 @@ def show( for branch in model_branches: type_map = model_params["model_dict"][branch]["type_map"] log.info(f"The type_map of branch {branch} is {type_map}") - else: + elif "type_map" in model_params: type_map = model_params["type_map"] log.info(f"The type_map is {type_map}") + else: + type_map = model.get_type_map() + log.info(f"The type_map is {type_map}") if "descriptor" in ATTRIBUTES: if model_is_multi_task: model_branches = list(model_params["model_dict"].keys()) for branch in model_branches: descriptor = model_params["model_dict"][branch]["descriptor"] log.info(f"The descriptor parameter of branch {branch} is {descriptor}") - else: + elif "descriptor" in model_params: descriptor = model_params["descriptor"] log.info(f"The descriptor parameter is {descriptor}") + else: + log.warning( + "Descriptor parameters not available " + "(model was not frozen with training config)." + ) if "fitting-net" in ATTRIBUTES: if model_is_multi_task: model_branches = list(model_params["model_dict"].keys()) @@ -75,9 +83,14 @@ def show( log.info( f"The fitting_net parameter of branch {branch} is {fitting_net}" ) - else: + elif "fitting_net" in model_params: fitting_net = model_params["fitting_net"] log.info(f"The fitting_net parameter is {fitting_net}") + else: + log.warning( + "Fitting net parameters not available " + "(model was not frozen with training config)." + ) if "size" in ATTRIBUTES: size_dict = model.get_model_size() log_prefix = " for a single branch model" if model_is_multi_task else "" diff --git a/deepmd/pt_expt/utils/finetune.py b/deepmd/pt_expt/utils/finetune.py index b10025d56d..5e49d8738b 100644 --- a/deepmd/pt_expt/utils/finetune.py +++ b/deepmd/pt_expt/utils/finetune.py @@ -78,7 +78,7 @@ def get_finetune_rules( raise ValueError( "Cannot use --use-pretrain-script: the pretrained model does not " "contain full model params. If finetuning from a .pte file, " - "re-freeze it with the latest code so that model_params is embedded." + "re-freeze it with the latest code so that model_def_script is embedded." ) finetune_from_multi_task = "model_dict" in last_model_params diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py index 12a28c5cbd..c8678b4d8d 100644 --- a/deepmd/pt_expt/utils/serialization.py +++ b/deepmd/pt_expt/utils/serialization.py @@ -167,8 +167,8 @@ def _build_dynamic_shapes( def _collect_metadata(model: torch.nn.Module) -> dict: """Collect metadata from the model for C++ inference. - This metadata is stored as ``metadata.json`` in .pt2 archives and as - ``model_def_script.json`` (legacy) in .pte archives. C++ reads these + This metadata is stored as ``metadata.json`` in both .pt2 and .pte archives. + Training config is stored separately in ``model_def_script.json``. C++ reads flat JSON fields because compiling model API methods as AOTInductor entry points is impractical (~12 s per trivial function) and string outputs (``get_type_map``) cannot be expressed as tensor I/O. diff --git a/source/tests/pt_expt/test_change_bias.py b/source/tests/pt_expt/test_change_bias.py index 746a74f5ea..2ef754aa50 100644 --- a/source/tests/pt_expt/test_change_bias.py +++ b/source/tests/pt_expt/test_change_bias.py @@ -288,6 +288,8 @@ def test_change_bias_pt2_preserves_model_def_script(self) -> None: ) # Freeze to .pt2 + # Clear default device: tests/pt/__init__.py sets it to "cuda:9999999" + # for CPU fallback, which poisons AOTInductor compilation. pt2_path = os.path.join(self.tmpdir, "frozen_mds.pt2") torch.set_default_device(None) try: @@ -301,7 +303,7 @@ def test_change_bias_pt2_preserves_model_def_script(self) -> None: original_mds = original_data["model_def_script"] self.assertIn("type_map", original_mds) # training config has model params - # Run change-bias with user-defined values + # Run change-bias with user-defined values (same device workaround) output_pt2 = os.path.join(self.tmpdir, "frozen_mds_updated.pt2") torch.set_default_device(None) try: