training: add HF bootstrap mode with fresh-iteration reset

Dieg0Code · Dieg0Code · commit d4aeeeabe05a · 2026-03-04T02:48:05.000-03:00
diff --git a/src/training/checkpointing.py b/src/training/checkpointing.py
@@ -70,7 +70,11 @@ def __init__(
         self.api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)
 
     def _repo_path(self, filename: str) -> str:
-        return f"runs/{self.run_id}/{filename}"
+        return self._repo_path_for_run(self.run_id, filename)
+
+    @staticmethod
+    def _repo_path_for_run(run_id: str, filename: str) -> str:
+        return f"runs/{run_id}/{filename}"
 
     def save_checkpoint_local(
         self,
@@ -157,12 +161,23 @@ def upload_checkpoint_files(
         )
         self.cleanup_local_checkpoints(keep_last_n=keep_last_n)
 
-    def load_latest_checkpoint(self, *, system: AtaxxZero, buffer: ReplayBuffer) -> int:
+    def load_latest_checkpoint(
+        self,
+        *,
+        system: AtaxxZero,
+        buffer: ReplayBuffer,
+        run_id: str | None = None,
+        load_buffer: bool = True,
+    ) -> int:
         hub_mod = __import__("huggingface_hub", fromlist=["hf_hub_download"])
         hf_hub_download = hub_mod.hf_hub_download
 
+        source_run_id = (run_id or self.run_id).strip()
+        if source_run_id == "":
+            raise ValueError("Checkpoint source run_id cannot be empty.")
+
         files = self.api.list_repo_files(repo_id=self.repo_id, repo_type="model")
-        run_prefix = self._repo_path("")
+        run_prefix = self._repo_path_for_run(source_run_id, "")
         model_files = [
             f
             for f in files
@@ -175,7 +190,7 @@ def load_latest_checkpoint(self, *, system: AtaxxZero, buffer: ReplayBuffer) ->
 
         latest_iter = max(int(Path(name).stem.split("_")[2]) for name in model_files)
         model_name = f"model_iter_{latest_iter:03d}.pt"
-        model_repo_path = self._repo_path(model_name)
+        model_repo_path = self._repo_path_for_run(source_run_id, model_name)
         model_path = hf_hub_download(
             repo_id=self.repo_id,
             filename=model_repo_path,
@@ -197,25 +212,26 @@ def load_latest_checkpoint(self, *, system: AtaxxZero, buffer: ReplayBuffer) ->
                 "reentrena o usa carga parcial manual (strict=False)."
             ) from exc
 
-        buffer_name = f"buffer_iter_{latest_iter:03d}.npz"
-        buffer_repo_path = self._repo_path(buffer_name)
-        try:
-            buffer_path = hf_hub_download(
-                repo_id=self.repo_id,
-                filename=buffer_repo_path,
-                repo_type="model",
-                token=self.token,
-                local_dir=str(self.local_dir),
-            )
-            data = np.load(buffer_path)
-            observations = data["observations"]
-            policies = data["policies"]
-            values = data["values"]
-            examples = list(zip(observations, policies, values, strict=True))
-            buffer.clear()
-            buffer.save_game(examples)
-        except (OSError, KeyError, ValueError):
-            pass
+        if load_buffer:
+            buffer_name = f"buffer_iter_{latest_iter:03d}.npz"
+            buffer_repo_path = self._repo_path_for_run(source_run_id, buffer_name)
+            try:
+                buffer_path = hf_hub_download(
+                    repo_id=self.repo_id,
+                    filename=buffer_repo_path,
+                    repo_type="model",
+                    token=self.token,
+                    local_dir=str(self.local_dir),
+                )
+                data = np.load(buffer_path)
+                observations = data["observations"]
+                policies = data["policies"]
+                values = data["values"]
+                examples = list(zip(observations, policies, values, strict=True))
+                buffer.clear()
+                buffer.save_game(examples)
+            except (OSError, KeyError, ValueError):
+                pass
 
         return latest_iter
 
diff --git a/src/training/config_runtime.py b/src/training/config_runtime.py
@@ -56,6 +56,8 @@
     "hf_enabled": False,
     "hf_repo_id": "",
     "hf_run_id": "policy_spatial_v1",
+    "hf_bootstrap_run_id": "",
+    "hf_reset_iteration": False,
     "hf_token_env": "HF_TOKEN",
     "hf_local_dir": "hf_checkpoints",
     "max_pending_hf_uploads": 2,
@@ -176,6 +178,8 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument("--hf", action="store_true")
     parser.add_argument("--hf-repo-id", default=None)
     parser.add_argument("--hf-run-id", default=None)
+    parser.add_argument("--hf-bootstrap-run-id", default=None)
+    parser.add_argument("--hf-reset-iteration", action="store_true")
     parser.add_argument("--max-pending-hf-uploads", type=int, default=None)
     parser.add_argument("--hf-upload-timeout-s", type=float, default=None)
     return parser.parse_args()
@@ -297,6 +301,10 @@ def apply_cli_overrides(args: argparse.Namespace) -> None:
         CONFIG["hf_repo_id"] = args.hf_repo_id
     if args.hf_run_id is not None:
         CONFIG["hf_run_id"] = args.hf_run_id.strip()
+    if args.hf_bootstrap_run_id is not None:
+        CONFIG["hf_bootstrap_run_id"] = args.hf_bootstrap_run_id.strip()
+    if args.hf_reset_iteration:
+        CONFIG["hf_reset_iteration"] = True
     if args.max_pending_hf_uploads is not None:
         CONFIG["max_pending_hf_uploads"] = max(1, args.max_pending_hf_uploads)
     if args.hf_upload_timeout_s is not None:
diff --git a/tests/test_training_checkpointing.py b/tests/test_training_checkpointing.py
@@ -1,7 +1,12 @@
 from __future__ import annotations
 
+import sys
+import types
 import unittest
 from concurrent.futures import Future
+from pathlib import Path
+from typing import Any, cast
+from unittest.mock import MagicMock, Mock, patch
 
 from training.checkpointing import (
     HuggingFaceCheckpointer,
@@ -27,6 +32,57 @@ def test_repo_path_is_namespaced_by_run_id(self) -> None:
         repo_path = checkpointer._repo_path("model_iter_040.pt")
         self.assertEqual(repo_path, "runs/policy_spatial_v1/model_iter_040.pt")
 
+    def test_repo_path_for_run_allows_explicit_source_namespace(self) -> None:
+        repo_path = HuggingFaceCheckpointer._repo_path_for_run(
+            run_id="policy_spatial_v2",
+            filename="model_iter_001.pt",
+        )
+        self.assertEqual(repo_path, "runs/policy_spatial_v2/model_iter_001.pt")
+
+    def test_load_latest_checkpoint_can_bootstrap_from_explicit_run_without_buffer(self) -> None:
+        sample_value = "sample_value"
+        checkpointer = object.__new__(HuggingFaceCheckpointer)
+        checkpointer.repo_id = "dieg0code/ataxx-zero"
+        checkpointer.token = sample_value
+        checkpointer.run_id = "policy_target_v2"
+        checkpointer.local_dir = Path()
+        checkpointer.api = Mock()
+        checkpointer.api.list_repo_files.return_value = [
+            "runs/policy_source_v1/model_iter_022.pt",
+            "runs/policy_source_v1/buffer_iter_022.npz",
+            "runs/policy_target_v2/model_iter_010.pt",
+        ]
+
+        hf_download_mock = MagicMock(return_value="model_iter_022.pt")
+        hub_module = cast(Any, types.ModuleType("huggingface_hub"))
+        hub_module.hf_hub_download = hf_download_mock
+
+        system = Mock()
+        buffer = Mock()
+
+        with patch.dict(sys.modules, {"huggingface_hub": hub_module}), patch(
+            "training.checkpointing.torch.load"
+        ) as torch_load_mock:
+            torch_load_mock.return_value = {"state_dict": {}}
+            loaded_iter = checkpointer.load_latest_checkpoint(
+                system=system,
+                buffer=buffer,
+                run_id="policy_source_v1",
+                load_buffer=False,
+            )
+
+        self.assertEqual(loaded_iter, 22)
+        hf_download_mock.assert_called_once_with(
+            repo_id="dieg0code/ataxx-zero",
+            filename="runs/policy_source_v1/model_iter_022.pt",
+            repo_type="model",
+            token=sample_value,
+            local_dir=".",
+        )
+        system.load_state_dict.assert_called_once_with({})
+        buffer.clear.assert_not_called()
+        buffer.save_game.assert_not_called()
+
     def test_ensure_hf_ready_raises_when_hf_enabled_without_checkpointer(self) -> None:
         CONFIG["hf_enabled"] = True
         CONFIG["hf_token_env"] = "HF_TOKEN"  # noqa: S105 - test fixture value, not a secret.
diff --git a/tests/test_training_config_runtime.py b/tests/test_training_config_runtime.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+import sys
+import unittest
+from unittest.mock import patch
+
+from training.config_runtime import CONFIG, apply_cli_overrides, parse_args
+
+
+class TestTrainingConfigRuntime(unittest.TestCase):
+    def setUp(self) -> None:
+        self._backup = dict(CONFIG)
+
+    def tearDown(self) -> None:
+        CONFIG.clear()
+        CONFIG.update(self._backup)
+
+    def test_hf_bootstrap_flags_are_applied_from_cli(self) -> None:
+        with patch.object(
+            sys,
+            "argv",
+            [
+                "train.py",
+                "--hf",
+                "--hf-run-id",
+                "policy_target_v2",
+                "--hf-bootstrap-run-id",
+                "policy_source_v1",
+                "--hf-reset-iteration",
+            ],
+        ):
+            args = parse_args()
+
+        apply_cli_overrides(args)
+
+        self.assertTrue(bool(CONFIG["hf_enabled"]))
+        self.assertEqual(str(CONFIG["hf_run_id"]), "policy_target_v2")
+        self.assertEqual(str(CONFIG["hf_bootstrap_run_id"]), "policy_source_v1")
+        self.assertTrue(bool(CONFIG["hf_reset_iteration"]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/train.py b/train.py
@@ -296,12 +296,39 @@ def main() -> None:
     hf_upload_futures: list[Future[None]] = []
     if hf_checkpointer is not None:
         hf_upload_executor = ThreadPoolExecutor(max_workers=1)
+        bootstrap_run_id = cfg_str("hf_bootstrap_run_id").strip()
+        source_run_id = bootstrap_run_id or cfg_str("hf_run_id").strip()
+        reset_iteration = cfg_bool("hf_reset_iteration")
         try:
-            start_iteration = hf_checkpointer.load_latest_checkpoint(
+            loaded_iteration = hf_checkpointer.load_latest_checkpoint(
                 system=system,
                 buffer=buffer,
+                run_id=(bootstrap_run_id or None),
+                load_buffer=not reset_iteration,
             )
-            log(f"Resumed from HF checkpoint iteration {start_iteration}.")
+            if loaded_iteration > 0:
+                if reset_iteration:
+                    # Fresh-run bootstrap: keep learned weights but rebuild replay
+                    # from scratch so warmup/curriculum can run from iteration 0.
+                    buffer.clear()
+                    start_iteration = 0
+                    log(
+                        "HF bootstrap loaded "
+                        f"iteration {loaded_iteration} from run_id={source_run_id}; "
+                        "resetting iteration to 0 and clearing replay buffer.",
+                    )
+                else:
+                    start_iteration = loaded_iteration
+                    if bootstrap_run_id != "":
+                        log(
+                            "Resumed from HF checkpoint iteration "
+                            f"{start_iteration} (source run_id={source_run_id}).",
+                        )
+                    else:
+                        log(f"Resumed from HF checkpoint iteration {start_iteration}.")
+            else:
+                start_iteration = 0
+                log(f"No HF checkpoint found in run_id={source_run_id}; starting from scratch.")
         except (ValueError, OSError):
             start_iteration = 0
             log("HF resume failed; starting from scratch.")