Address coderabbit comments

kevalmorabia97 · kevalmorabia97 · commit 38d95220eaa3 · 2026-04-13T06:00:34.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/examples/puzzletron/evaluation/hf_deployable_anymodel.py b/examples/puzzletron/evaluation/hf_deployable_anymodel.py
@@ -331,7 +331,6 @@ def get_triton_input(self):
             Tensor(name="top_p", shape=(-1,), dtype=np.single, optional=True),
             Tensor(name="temperature", shape=(-1,), dtype=np.single, optional=True),
             Tensor(name="random_seed", shape=(-1,), dtype=np.int_, optional=True),
-            Tensor(name="max_length", shape=(-1,), dtype=np.int_, optional=True),
             Tensor(name="output_logits", shape=(-1,), dtype=np.bool_, optional=True),
             Tensor(name="output_scores", shape=(-1,), dtype=np.bool_, optional=True),
         )
diff --git a/modelopt/torch/puzzletron/mip/mip_with_multi_layer_replacements.py b/modelopt/torch/puzzletron/mip/mip_with_multi_layer_replacements.py
@@ -55,6 +55,9 @@ def run_mip(
         )
         print("\n\n\n")
 
+    if not replacements:
+        return [], 0.0, {}
+
     mip_model = Model()
 
     objective_vars = []
diff --git a/modelopt/torch/puzzletron/replacement_library/build_replacement_library.py b/modelopt/torch/puzzletron/replacement_library/build_replacement_library.py
@@ -88,7 +88,7 @@ def build_replacement_library(
         add_attention_no_ops,
         trust_remote_code=trust_remote_code,
     )
-    block_library_df = _build_block_library_from_subblocks(subblocks_df)
+    block_library_df = _build_block_library_from_subblocks(subblocks_df, master_puzzle_dir)
 
     layer_replacements = _build_layer_replacements(
         block_library_df, master_puzzle_dir, teacher_checkpoint_dir, trust_remote_code
@@ -143,7 +143,9 @@ def infer_teacher_dir(
     return teacher_checkpoint_dir
 
 
-def _build_block_library_from_subblocks(subblocks_df: pd.DataFrame) -> pd.DataFrame:
+def _build_block_library_from_subblocks(
+    subblocks_df: pd.DataFrame, output_dir: Path
+) -> pd.DataFrame:
     joint_blocks_df = subblocks_df.dropna(subset=["block_config"]).copy()
     constructed_blocks_df = _construct_blocks_from_subblocks(subblocks_df)
 
@@ -164,8 +166,12 @@ def _build_block_library_from_subblocks(subblocks_df: pd.DataFrame) -> pd.DataFr
         dups_with_same_block_idx = dups[dups["block_idx"] == dup_block_idx]
         for _, row in dups_with_same_block_idx.head(10).iterrows():
             mprint(row.to_dict())
-        json_dump(block_library_df.to_dict(orient="records"), "ERROR_block_library.json")
-        json_dump(subblocks_df.to_dict(orient="records"), "ERROR_subblock_library.json")
+        json_dump(
+            block_library_df.to_dict(orient="records"), output_dir / "ERROR_block_library.json"
+        )
+        json_dump(
+            subblocks_df.to_dict(orient="records"), output_dir / "ERROR_subblock_library.json"
+        )
         raise ValueError(
             f"Found {len(dups)} duplicate blocks in the block library. See ERROR_block_library.json and ERROR_subblock_library.json for more details."
         )
diff --git a/modelopt/torch/puzzletron/scoring.py b/modelopt/torch/puzzletron/scoring.py
@@ -82,8 +82,10 @@ def main(cfg: DictConfig) -> None:
     cfg = hydra.utils.instantiate(cfg)
     mprint(cfg)
     dist.setup(timeout=cfg.nccl_timeout_minutes)
-    launch_scoring(cfg)
-    dist.cleanup()
+    try:
+        launch_scoring(cfg)
+    finally:
+        dist.cleanup()
 
 
 if __name__ == "__main__":
diff --git a/modelopt/torch/puzzletron/sewing_kit/passage.py b/modelopt/torch/puzzletron/sewing_kit/passage.py
@@ -297,6 +297,7 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         assert self.active_context_manager is not None
         self.active_context_manager.__exit__(exc_type, exc_val, exc_tb)
+        self.active_context_manager = None
 
     def freeze(self):
         self.eval()
diff --git a/modelopt/torch/puzzletron/sewing_kit/utils.py b/modelopt/torch/puzzletron/sewing_kit/utils.py
@@ -375,21 +375,21 @@ def has_fake_tensor(v: Any) -> bool:
 
 def _get_device_for_distributed(
     group: Optional[torch.distributed.ProcessGroup] = None,
-) -> str:
+) -> torch.device:
     """
     Determine the appropriate device for distributed communication based on the backend.
     NCCL backend requires CUDA tensors, while Gloo supports both CPU and CUDA.
     """
     if not torch.distributed.is_initialized():
-        return "cpu"
+        return torch.device("cpu")
 
     backend = torch.distributed.get_backend(group)
     if backend == "nccl":
         # NCCL requires CUDA tensors
-        return torch.cuda.current_device()
+        return torch.device("cuda", torch.cuda.current_device())
     else:
         # Gloo and other backends support CPU tensors
-        return "cpu"
+        return torch.device("cpu")
 
 
 def distributed_isend_obj(
diff --git a/modelopt/torch/puzzletron/tools/checkpoint_utils_hf.py b/modelopt/torch/puzzletron/tools/checkpoint_utils_hf.py
@@ -342,7 +342,7 @@ def optimized_safe_save(kwargs):
         # Check for any failures
         failed_saves = sum(1 for r in results if not r)
         if failed_saves > 0:
-            mprint(f"  Warning: {failed_saves} files failed to save")
+            raise RuntimeError(f"  {failed_saves} shard file(s) failed to save")
     else:
         mprint("  Using single-threaded saving...")
         for kwargs in safe_save_kwargs:

Original file line number	Diff line number	Diff line change
`@@ -331,7 +331,6 @@ def get_triton_input(self):`
`331`	`331`	`Tensor(name="top_p", shape=(-1,), dtype=np.single, optional=True),`
`332`	`332`	`Tensor(name="temperature", shape=(-1,), dtype=np.single, optional=True),`
`333`	`333`	`Tensor(name="random_seed", shape=(-1,), dtype=np.int_, optional=True),`
`334`		`- Tensor(name="max_length", shape=(-1,), dtype=np.int_, optional=True),`
`335`	`334`	`Tensor(name="output_logits", shape=(-1,), dtype=np.bool_, optional=True),`
`336`	`335`	`Tensor(name="output_scores", shape=(-1,), dtype=np.bool_, optional=True),`
`337`	`336`	`)`
Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,9 @@ def run_mip(`
`55`	`55`	`)`
`56`	`56`	`print("\n\n\n")`
`57`	`57`
	`58`	`+ if not replacements:`
	`59`	`+ return [], 0.0, {}`
	`60`	`+`
`58`	`61`	`mip_model = Model()`
`59`	`62`
`60`	`63`	`objective_vars = []`