fix: fsdp unavailable in older version of pytorch (≦2.5)

OutisLi · OutisLi · commit a607a479722f · 2026-04-23T22:51:48.000+08:00
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
@@ -99,9 +99,13 @@
     get_optimizer_state_dict,
     set_optimizer_state_dict,
 )
-from torch.distributed.fsdp import (
-    fully_shard,
-)
+
+try:
+    from torch.distributed.fsdp import (
+        fully_shard,
+    )
+except ImportError:
+    fully_shard = None  # type: ignore[assignment]
 from torch.distributed.optim import (
     ZeroRedundancyOptimizer,
 )
@@ -849,6 +853,15 @@ def single_model_finetune(
         if self.is_distributed:
             torch.cuda.set_device(LOCAL_RANK)
             if self.zero_stage >= 2:
+                if fully_shard is None:
+                    raise RuntimeError(
+                        "training.zero_stage>=2 requires FSDP2, which is only "
+                        "available in PyTorch >= 2.6 "
+                        "(``torch.distributed.fsdp.fully_shard``). "
+                        f"Current PyTorch is {torch.__version__}. "
+                        "Please upgrade PyTorch, or set training.zero_stage "
+                        "to 0 or 1 to stay on the DDP / ZeRO-1 path."
+                    )
                 # FSDP2 does NOT broadcast params (unlike DDP constructor).
                 # Ensure all ranks share identical weights before sharding.
                 for p in self.wrapper.parameters():