Fixed error

jingyu-ml · jingyu-ml · commit 5873652c098a · 2026-04-02T08:36:35.000Z
Signed-off-by: Jingyu Xin &lt;jingyux@nvidia.com&gt;
diff --git a/modelopt/torch/sparsity/attention_sparsity/conversion.py b/modelopt/torch/sparsity/attention_sparsity/conversion.py
@@ -121,13 +121,16 @@ def _register_diffusers_backends_if_needed(model: nn.Module) -> None:
                 register_diffusers_eager_attention()
             if register_diffusers_triton_attention is not None:
                 register_diffusers_triton_attention()
-    except ImportError:
+    except (ImportError, Exception):
         pass
 
     # Patch ltx_core Attention modules if present (independent of diffusers)
     import contextlib
 
-    from .kernels import register_ltx_eager_attention, register_ltx_triton_attention
+    try:
+        from .kernels import register_ltx_eager_attention, register_ltx_triton_attention
+    except (ImportError, RuntimeError):
+        return
 
     if register_ltx_eager_attention is not None:
         with contextlib.suppress(Exception):
diff --git a/modelopt/torch/sparsity/attention_sparsity/kernels/__init__.py b/modelopt/torch/sparsity/attention_sparsity/kernels/__init__.py
@@ -47,16 +47,17 @@ def get_skip_softmax_context() -> bool:
 register_ltx_eager_attention = None
 register_ltx_triton_attention = None
 
-with contextlib.suppress(ImportError):
+# Suppress ImportError (missing package) and RuntimeError (triton without GPU driver)
+with contextlib.suppress(ImportError, RuntimeError):
     from .diffusers_eager_attention import register_diffusers_eager_attention
 
-with contextlib.suppress(ImportError):
+with contextlib.suppress(ImportError, RuntimeError):
     from .diffusers_triton_attention import register_diffusers_triton_attention
 
-with contextlib.suppress(ImportError):
+with contextlib.suppress(ImportError, RuntimeError):
     from .ltx_eager_attention import register_ltx_eager_attention
 
-with contextlib.suppress(ImportError):
+with contextlib.suppress(ImportError, RuntimeError):
     from .ltx_triton_attention import register_ltx_triton_attention
 
 __all__ = [
diff --git a/modelopt/torch/sparsity/attention_sparsity/kernels/diffusers_triton_attention.py b/modelopt/torch/sparsity/attention_sparsity/kernels/diffusers_triton_attention.py
@@ -33,7 +33,7 @@
     attention_backend,
 )
 
-from modelopt.torch.kernels.triton_fa import attention
+from modelopt.torch.kernels import attention
 
 _BACKEND_NAME = "modelopt_triton"
 _BACKEND_REGISTERED = False
@@ -110,6 +110,7 @@ def _diffusers_triton_attention(
     if threshold is not None and threshold > 0.0:
         kw["skip_softmax_threshold"] = threshold
 
+    assert attention is not None, "Triton attention kernel not available (requires CUDA + triton)"
     o = attention(q, k, v, **kw)
 
     # Reshape back: [B*S, H, D] -> [B, S, H, D]
diff --git a/modelopt/torch/sparsity/attention_sparsity/kernels/ltx_triton_attention.py b/modelopt/torch/sparsity/attention_sparsity/kernels/ltx_triton_attention.py
@@ -28,7 +28,7 @@
 import torch
 from ltx_core.model.transformer.attention import Attention
 
-from modelopt.torch.kernels.triton_fa import attention
+from modelopt.torch.kernels import attention
 
 # Thread-local storage for skip-softmax configuration
 _thread_local = threading.local()
@@ -106,6 +106,7 @@ def _ltx_triton_attention(
     if threshold is not None and threshold > 0.0:
         kw["skip_softmax_threshold"] = threshold
 
+    assert attention is not None, "Triton attention kernel not available (requires CUDA + triton)"
     o = attention(q_flat, k_flat, v_flat, **kw)
 
     # Reshape back: [B*T, H, D] -> [B, T, H*D]
diff --git a/tests/unit/torch/sparsity/attention_sparsity/test_kernel_backends.py b/tests/unit/torch/sparsity/attention_sparsity/test_kernel_backends.py