From cd86cce263f4b24c9cfce4184b950914c0a7cdc2 Mon Sep 17 00:00:00 2001 From: CSY-ModelCloud Date: Wed, 13 May 2026 16:58:03 +0800 Subject: [PATCH] improve JIT extension failure diagnostics for CI flakiness --- gptqmodel/utils/cpp.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/gptqmodel/utils/cpp.py b/gptqmodel/utils/cpp.py index 2f8acb85d..a5f1762db 100644 --- a/gptqmodel/utils/cpp.py +++ b/gptqmodel/utils/cpp.py @@ -9,6 +9,7 @@ import logging import math import os +import platform import shutil import subprocess import sys @@ -928,7 +929,30 @@ def load(self) -> bool: elapsed = time.perf_counter() - started self._load_attempted = True self._load_result = False - self._last_error = f"{self.display_name}: failed to build torch.ops JIT extension: {exc}" + diagnostic_lines = [ + f"{self.display_name}: failed to build torch.ops JIT extension: {exc}", + f"build_root={build_root}", + f"base_build_root={base_build_root}", + f"python={platform.python_version()}", + f"pid={os.getpid()}", + f"TORCH_EXTENSIONS_DIR={os.getenv('TORCH_EXTENSIONS_DIR', '')}", + f"GPTQMODEL_TORCH_EXTENSIONS_DIR={os.getenv('GPTQMODEL_TORCH_EXTENSIONS_DIR', '')}", + ] + candidate_paths = self._candidate_binary_paths(build_root) + if candidate_paths: + diagnostic_lines.append( + "candidate_binaries=" + + ", ".join(f"{path}:{'exists' if path.exists() else 'missing'}" for path in candidate_paths) + ) + try: + entries = sorted(build_root.iterdir()) + preview = ", ".join(entry.name for entry in entries[:24]) + if len(entries) > 24: + preview += f", ... (+{len(entries) - 24} more)" + diagnostic_lines.append(f"build_root_entries=[{preview}]") + except OSError as snapshot_exc: + diagnostic_lines.append(f"build_root_entries=") + self._last_error = " | ".join(diagnostic_lines) log.debug("%s", self._last_error, exc_info=True) logger.info( f"{self.display_name}: torch.ops JIT compilation failed "