Fix: Add weights_only parameter to LearningRateFinder checkpoint restore (#21758)

cngmid · pre-commit-ci[bot] · web-flow · commit fe6b1cc4e80a · 2026-06-09T12:12:57.000Z
* Fix: add weights_only parameter to LearningRateFinder and propagate through LR finder call chain * Test: ensure LearningRateFinder supports weights_only=False during checkpoint restore * Test: ensure LearningRateFinder supports weights_only=False during checkpoint restore and apply pre-commit * Style: apply pre-commit formatting to LR Finder patch * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/src/lightning/pytorch/callbacks/lr_finder.py b/src/lightning/pytorch/callbacks/lr_finder.py
@@ -50,6 +50,11 @@ class LearningRateFinder(Callback):
         update_attr: Whether to update the learning rate attribute or not.
         attr_name: Name of the attribute which stores the learning rate. The names 'learning_rate' or 'lr' get
             automatically detected. Otherwise, set the name here.
+        weights_only: Defaults to ``None``. If ``True``, restricts loading to ``state_dicts`` of plain
+            ``torch.Tensor`` and other primitive types. If loading a checkpoint from a trusted source that contains
+            an ``nn.Module``, use ``weights_only=False``. If loading checkpoint from an untrusted source, we
+            recommend using ``weights_only=True``. For more information, please refer to the
+            `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
     Example::
 
@@ -92,6 +97,7 @@ def __init__(
         early_stop_threshold: Optional[float] = 4.0,
         update_attr: bool = True,
         attr_name: str = "",
+        weights_only: Optional[bool] = None,
     ) -> None:
         mode = mode.lower()
         if mode not in self.SUPPORTED_MODES:
@@ -104,6 +110,7 @@ def __init__(
         self._early_stop_threshold = early_stop_threshold
         self._update_attr = update_attr
         self._attr_name = attr_name
+        self._weights_only = weights_only
 
         self._early_exit = False
         self.optimal_lr: Optional[_LRFinder] = None
@@ -120,6 +127,7 @@ def lr_find(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> Non
                 early_stop_threshold=self._early_stop_threshold,
                 update_attr=self._update_attr,
                 attr_name=self._attr_name,
+                weights_only=self._weights_only,
             )
 
         if self._early_exit:
diff --git a/src/lightning/pytorch/tuner/lr_finder.py b/src/lightning/pytorch/tuner/lr_finder.py
@@ -206,6 +206,7 @@ def _lr_find(
     early_stop_threshold: Optional[float] = 4.0,
     update_attr: bool = False,
     attr_name: str = "",
+    weights_only: Optional[bool] = None,
 ) -> Optional[_LRFinder]:
     """Enables the user to do a range test of good initial learning rates, to reduce the amount of guesswork in picking
     a good starting learning rate.
@@ -227,6 +228,11 @@ def _lr_find(
         update_attr: Whether to update the learning rate attribute or not.
         attr_name: Name of the attribute which stores the learning rate. The names 'learning_rate' or 'lr' get
             automatically detected. Otherwise, set the name here.
+        weights_only: Defaults to ``None``. If ``True``, restricts loading to ``state_dicts`` of plain
+            ``torch.Tensor`` and other primitive types. If loading a checkpoint from a trusted source that contains
+            an ``nn.Module``, use ``weights_only=False``. If loading checkpoint from an untrusted source, we
+            recommend using ``weights_only=True``. For more information, please refer to the
+            `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
     """
     if trainer.fast_dev_run:
@@ -285,7 +291,7 @@ def _lr_find(
         raise ex
     finally:
         # Restore initial state of model (this will also restore the original optimizer state)
-        trainer._checkpoint_connector.restore(ckpt_path)
+        trainer._checkpoint_connector.restore(ckpt_path, weights_only=weights_only)
         trainer.strategy.remove_checkpoint(ckpt_path)
         trainer.fit_loop.restarting = False  # reset restarting flag as checkpoint restoring sets it to True
         trainer.fit_loop.epoch_loop.restarting = False  # reset restarting flag as checkpoint restoring sets it to True
diff --git a/src/lightning/pytorch/tuner/tuning.py b/src/lightning/pytorch/tuner/tuning.py
@@ -131,6 +131,7 @@ def lr_find(
         early_stop_threshold: Optional[float] = 4.0,
         update_attr: bool = True,
         attr_name: str = "",
+        weights_only: Optional[bool] = None,
     ) -> Optional["_LRFinder"]:
         """Enables the user to do a range test of good initial learning rates, to reduce the amount of guesswork in
         picking a good starting learning rate.
@@ -159,6 +160,11 @@ def lr_find(
             update_attr: Whether to update the learning rate attribute or not.
             attr_name: Name of the attribute which stores the learning rate. The names 'learning_rate' or 'lr' get
                 automatically detected. Otherwise, set the name here.
+            weights_only: Defaults to ``None``. If ``True``, restricts loading to ``state_dicts`` of plain
+                ``torch.Tensor`` and other primitive types. If loading a checkpoint from a trusted source that contains
+                an ``nn.Module``, use ``weights_only=False``. If loading checkpoint from an untrusted source, we
+                recommend using ``weights_only=True``. For more information, please refer to the
+                `PyTorch Developer Notes on Serialization Semantics <https://docs.pytorch.org/docs/main/notes/serialization.html#id3>`_.
 
         Raises:
             MisconfigurationException:
@@ -183,6 +189,7 @@ def lr_find(
             early_stop_threshold=early_stop_threshold,
             update_attr=update_attr,
             attr_name=attr_name,
+            weights_only=weights_only,
         )
 
         lr_finder_callback._early_exit = True
diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py
@@ -23,7 +23,7 @@
 import torch
 from lightning_utilities.test.warning import no_warning_call
 
-from lightning.pytorch import Trainer, seed_everything
+from lightning.pytorch import LightningModule, Trainer, seed_everything
 from lightning.pytorch.callbacks import EarlyStopping
 from lightning.pytorch.callbacks.finetuning import BackboneFinetuning
 from lightning.pytorch.callbacks.lr_finder import LearningRateFinder
@@ -844,3 +844,60 @@ def configure_optimizers(self):
     # Check that backbone was unfrozen at the correct epoch
     for param in model.backbone.parameters():
         assert param.requires_grad, "Backbone parameters should be unfrozen after epoch 1"
+
+
+def test_lr_finder_respects_weights_only(tmp_path):
+    """Test that lr_find works correctly when saving more than the weights."""
+
+    # Simple torch Module
+    class TorchCoder(torch.nn.Module):
+        def __init__(self, in_features, out_features):
+            super().__init__()
+            self.net = torch.nn.Linear(in_features, out_features)
+
+        def forward(self, x):
+            return self.net(x)
+
+    # Simple model
+    class SimpleModel(LightningModule):
+        def __init__(self, coder, loss, lr=1e-3):
+            super().__init__()
+            self.save_hyperparameters()
+            self.layer = coder
+            self.loss = loss
+            self.lr = lr
+
+        def training_step(self, batch, batch_idx):
+            x, y = batch
+            y_hat = self.layer(x)
+            return self.loss(y_hat, y)
+
+        def configure_optimizers(self):
+            return torch.optim.Adam(self.parameters(), lr=self.lr)
+
+    # Dummy data
+    x = torch.randn(16, 4)
+    y = torch.randn(16, 2)
+    loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x, y), batch_size=4)
+
+    model = SimpleModel(
+        TorchCoder(4, 2),
+        loss=torch.nn.MSELoss(),
+    )
+
+    trainer = Trainer(
+        default_root_dir=tmp_path,
+        max_epochs=1,
+        logger=False,
+        enable_checkpointing=True,
+    )
+
+    # This should NOT raise an exception after the fix
+    lr_finder = Tuner(trainer).lr_find(
+        model,
+        train_dataloaders=loader,
+        weights_only=False,  # <-- the key part
+    )
+
+    assert lr_finder is not None
+    assert hasattr(lr_finder, "results")