Add legacy checkpoint support for inference service

Dieg0Code · Dieg0Code · commit da1f06731e2b · 2026-03-02T03:19:13.000-03:00
diff --git a/src/inference/legacy_model.py b/src/inference/legacy_model.py
@@ -0,0 +1,102 @@
+from __future__ import annotations
+
+import torch
+import torch.nn as nn
+
+from game.actions import ACTION_SPACE
+from game.constants import BOARD_SIZE
+
+
+class LegacyAtaxxTransformerNet(nn.Module):
+    """Transformer legacy (3 canales + policy flatten) para checkpoints historicos."""
+
+    def __init__(
+        self,
+        d_model: int = 128,
+        nhead: int = 8,
+        num_layers: int = 6,
+        dim_feedforward: int = 512,
+        dropout: float = 0.1,
+    ) -> None:
+        super().__init__()
+        self.board_size = BOARD_SIZE
+        self.num_cells = self.board_size * self.board_size
+        self.num_actions = ACTION_SPACE.num_actions
+        self.num_input_channels = 3
+
+        self.input_proj = nn.Linear(self.num_input_channels, d_model)
+        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_cells + 1, d_model))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
+
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_model,
+            nhead=nhead,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+            activation="gelu",
+            batch_first=True,
+            norm_first=False,
+        )
+        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
+
+        self.policy_head = nn.Sequential(
+            nn.LayerNorm(d_model * self.num_cells),
+            nn.Linear(d_model * self.num_cells, self.num_actions),
+        )
+        self.value_head = nn.Sequential(
+            nn.LayerNorm(d_model),
+            nn.Linear(d_model, d_model),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(d_model, 1),
+            nn.Tanh(),
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        action_mask: torch.Tensor | None = None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        batch_size = x.size(0)
+        x = x.permute(0, 2, 3, 1).contiguous().view(
+            batch_size,
+            self.num_cells,
+            self.num_input_channels,
+        )
+        x = self.input_proj(x)
+
+        cls = self.cls_token.expand(batch_size, -1, -1)
+        tokens = torch.cat([cls, x], dim=1) + self.pos_embed
+        encoded = self.encoder(tokens)
+
+        cls_out = encoded[:, 0]
+        board_out = encoded[:, 1:].contiguous().view(batch_size, -1)
+        policy_logits = self.policy_head(board_out)
+        if action_mask is not None:
+            min_value = torch.finfo(policy_logits.dtype).min
+            policy_logits = policy_logits.masked_fill(action_mask <= 0, min_value)
+
+        value = self.value_head(cls_out)
+        return policy_logits, value
+
+
+class LegacyAtaxxSystem(nn.Module):
+    """Wrapper compatible con state_dicts `model.*` de checkpoints legacy."""
+
+    def __init__(
+        self,
+        d_model: int = 128,
+        nhead: int = 8,
+        num_layers: int = 6,
+        dim_feedforward: int = 512,
+        dropout: float = 0.1,
+    ) -> None:
+        super().__init__()
+        self.model = LegacyAtaxxTransformerNet(
+            d_model=d_model,
+            nhead=nhead,
+            num_layers=num_layers,
+            dim_feedforward=dim_feedforward,
+            dropout=dropout,
+        )
+
diff --git a/src/inference/service.py b/src/inference/service.py
@@ -15,7 +15,6 @@
 
 if TYPE_CHECKING:
     from engine.mcts import MCTS
-    from model.system import AtaxxZero
 
 InferenceMode = Literal["fast", "strong"]
 
@@ -57,6 +56,19 @@ def run(self, output_names: list[str] | None, input_feed: dict[str, Any]) -> lis
         ...
 
 
+class _SystemLike(Protocol):
+    model: Any
+
+    def eval(self) -> _SystemLike:
+        ...
+
+    def to(self, device: str) -> _SystemLike:
+        ...
+
+    def load_state_dict(self, state_dict: dict[str, object]) -> object:
+        ...
+
+
 @lru_cache(maxsize=1)
 def _get_torch_module() -> ModuleType | None:
     """Import torch lazily so API startup does not hard-fail in lightweight runtimes."""
@@ -95,11 +107,15 @@ def __init__(
         self.c_puct = float(c_puct)
         self.model_kwargs: ModelInitKwargs = model_kwargs or {}
 
-        self.system: AtaxxZero | None = None
+        self.system: _SystemLike | None = None
+        self._model_input_channels = 4
         if self.checkpoint_path.exists():
             self.system = self._load_system()
             self.system.eval()
             self.system.to(self.device)
+            self._model_input_channels = int(
+                getattr(self.system.model, "num_input_channels", 4)
+            )
 
         self._onnx_session: _OnnxSessionLike | None = None
         self._onnx_last_error: str | None = None
@@ -132,7 +148,29 @@ def _require_torch() -> ModuleType:
             )
         return torch_module
 
-    def _load_system(self) -> AtaxxZero:
+    @staticmethod
+    def _is_legacy_state_dict(state_dict: dict[str, Any]) -> bool:
+        has_legacy_policy = "model.policy_head.1.weight" in state_dict
+        has_spatial_policy = "model.policy_src_proj.weight" in state_dict
+        input_weight = state_dict.get("model.input_proj.weight")
+        input_channels = None
+        if hasattr(input_weight, "shape"):
+            shape = tuple(input_weight.shape)
+            if len(shape) == 2:
+                input_channels = int(shape[1])
+        return has_legacy_policy and not has_spatial_policy and input_channels == 3
+
+    @staticmethod
+    def _extract_arch_kwargs(raw_kwargs: ModelInitKwargs) -> dict[str, Any]:
+        allowed = ("d_model", "nhead", "num_layers", "dim_feedforward", "dropout")
+        return {key: raw_kwargs[key] for key in allowed if key in raw_kwargs}
+
+    def _build_legacy_system(self) -> _SystemLike:
+        from inference.legacy_model import LegacyAtaxxSystem
+
+        return LegacyAtaxxSystem(**self._extract_arch_kwargs(self.model_kwargs))
+
+    def _load_system(self) -> _SystemLike:
         from model.system import AtaxxZero
 
         torch_module = self._require_torch()
@@ -157,6 +195,16 @@ def _load_system(self) -> AtaxxZero:
         try:
             system.load_state_dict(state_dict_obj)
         except RuntimeError as exc:
+            if self._is_legacy_state_dict(state_dict_obj):
+                legacy_system = self._build_legacy_system()
+                try:
+                    legacy_system.load_state_dict(state_dict_obj)
+                    return legacy_system
+                except RuntimeError as legacy_exc:
+                    raise ValueError(
+                        "Checkpoint incompatible con architecture policy_head espacial; "
+                        "reentrena o usa carga parcial manual (strict=False)."
+                    ) from legacy_exc
             raise ValueError(
                 "Checkpoint incompatible con architecture policy_head espacial; "
                 "reentrena o usa carga parcial manual (strict=False)."
@@ -270,6 +318,8 @@ def _fast_result(self, board: AtaxxBoard) -> InferenceResult:
         torch_module = self._require_torch()
         mask_np = self._legal_action_mask(board)
         obs = board.get_observation()
+        if obs.shape[0] != self._model_input_channels:
+            obs = obs[: self._model_input_channels]
 
         obs_tensor = torch_module.from_numpy(obs).unsqueeze(0).to(self.device)
         mask_tensor = torch_module.from_numpy(mask_np).unsqueeze(0).to(self.device)
@@ -302,6 +352,10 @@ def _strong_result(self, board: AtaxxBoard) -> InferenceResult:
         if self.system is None:
             # If no torch model is available, degrade gracefully to fast ONNX/Torch.
             return self._fast_result(board)
+        if self._model_input_channels != 4:
+            # Legacy checkpoints were trained with 3-channel observations and do
+            # not support the current MCTS path that batches 4-channel states.
+            return self._fast_result(board)
         torch_module = self._require_torch()
         mcts = self._ensure_mcts()
         probs = mcts.run(board=board, add_dirichlet_noise=False, temperature=0.0)
@@ -311,6 +365,8 @@ def _strong_result(self, board: AtaxxBoard) -> InferenceResult:
         # Value still comes from raw net (current-player perspective), which is stable and cheap.
         mask_np = self._legal_action_mask(board)
         obs = board.get_observation()
+        if obs.shape[0] != self._model_input_channels:
+            obs = obs[: self._model_input_channels]
         obs_tensor = torch_module.from_numpy(obs).unsqueeze(0).to(self.device)
         mask_tensor = torch_module.from_numpy(mask_np).unsqueeze(0).to(self.device)
         with torch_module.no_grad():
diff --git a/tests/test_inference_service.py b/tests/test_inference_service.py
@@ -14,6 +14,7 @@
 
 from game.actions import ACTION_SPACE
 from game.board import AtaxxBoard
+from inference.legacy_model import LegacyAtaxxSystem
 from inference.service import InferenceService
 from model.system import AtaxxZero
 
@@ -121,6 +122,37 @@ def test_rejects_missing_checkpoint(self) -> None:
         with self.assertRaises(FileNotFoundError):
             InferenceService(checkpoint_path="does/not/exist/model.pt", device="cpu")
 
+    def test_loads_legacy_checkpoint_and_predicts(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            legacy = LegacyAtaxxSystem(
+                d_model=64,
+                nhead=8,
+                num_layers=2,
+                dim_feedforward=128,
+                dropout=0.0,
+            )
+            ckpt_path = Path(tmp_dir) / "legacy.pt"
+            torch.save({"state_dict": legacy.state_dict()}, ckpt_path)
+
+            service = InferenceService(
+                checkpoint_path=ckpt_path,
+                device="cpu",
+                model_kwargs={
+                    "d_model": 64,
+                    "nhead": 8,
+                    "num_layers": 2,
+                    "dim_feedforward": 128,
+                    "dropout": 0.0,
+                },
+            )
+            board = AtaxxBoard()
+            result = service.predict(board, mode="strong")
+
+            legal_moves = board.get_valid_moves()
+            legal_idxs = {ACTION_SPACE.encode(mv) for mv in legal_moves}
+            self.assertEqual(result.mode, "fast")
+            self.assertIn(result.action_idx, legal_idxs)
+
     def test_rejects_invalid_mode(self) -> None:
         with tempfile.TemporaryDirectory() as tmp_dir:
             system = self._tiny_system()