Skip to content

Commit 5873652

Browse files
committed
Fixed error
Signed-off-by: Jingyu Xin <jingyux@nvidia.com>
1 parent 1f8f0d3 commit 5873652

File tree

5 files changed

+175
-260
lines changed

5 files changed

+175
-260
lines changed

modelopt/torch/sparsity/attention_sparsity/conversion.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,16 @@ def _register_diffusers_backends_if_needed(model: nn.Module) -> None:
121121
register_diffusers_eager_attention()
122122
if register_diffusers_triton_attention is not None:
123123
register_diffusers_triton_attention()
124-
except ImportError:
124+
except (ImportError, Exception):
125125
pass
126126

127127
# Patch ltx_core Attention modules if present (independent of diffusers)
128128
import contextlib
129129

130-
from .kernels import register_ltx_eager_attention, register_ltx_triton_attention
130+
try:
131+
from .kernels import register_ltx_eager_attention, register_ltx_triton_attention
132+
except (ImportError, RuntimeError):
133+
return
131134

132135
if register_ltx_eager_attention is not None:
133136
with contextlib.suppress(Exception):

modelopt/torch/sparsity/attention_sparsity/kernels/__init__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,17 @@ def get_skip_softmax_context() -> bool:
4747
register_ltx_eager_attention = None
4848
register_ltx_triton_attention = None
4949

50-
with contextlib.suppress(ImportError):
50+
# Suppress ImportError (missing package) and RuntimeError (triton without GPU driver)
51+
with contextlib.suppress(ImportError, RuntimeError):
5152
from .diffusers_eager_attention import register_diffusers_eager_attention
5253

53-
with contextlib.suppress(ImportError):
54+
with contextlib.suppress(ImportError, RuntimeError):
5455
from .diffusers_triton_attention import register_diffusers_triton_attention
5556

56-
with contextlib.suppress(ImportError):
57+
with contextlib.suppress(ImportError, RuntimeError):
5758
from .ltx_eager_attention import register_ltx_eager_attention
5859

59-
with contextlib.suppress(ImportError):
60+
with contextlib.suppress(ImportError, RuntimeError):
6061
from .ltx_triton_attention import register_ltx_triton_attention
6162

6263
__all__ = [

modelopt/torch/sparsity/attention_sparsity/kernels/diffusers_triton_attention.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
attention_backend,
3434
)
3535

36-
from modelopt.torch.kernels.triton_fa import attention
36+
from modelopt.torch.kernels import attention
3737

3838
_BACKEND_NAME = "modelopt_triton"
3939
_BACKEND_REGISTERED = False
@@ -110,6 +110,7 @@ def _diffusers_triton_attention(
110110
if threshold is not None and threshold > 0.0:
111111
kw["skip_softmax_threshold"] = threshold
112112

113+
assert attention is not None, "Triton attention kernel not available (requires CUDA + triton)"
113114
o = attention(q, k, v, **kw)
114115

115116
# Reshape back: [B*S, H, D] -> [B, S, H, D]

modelopt/torch/sparsity/attention_sparsity/kernels/ltx_triton_attention.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import torch
2929
from ltx_core.model.transformer.attention import Attention
3030

31-
from modelopt.torch.kernels.triton_fa import attention
31+
from modelopt.torch.kernels import attention
3232

3333
# Thread-local storage for skip-softmax configuration
3434
_thread_local = threading.local()
@@ -106,6 +106,7 @@ def _ltx_triton_attention(
106106
if threshold is not None and threshold > 0.0:
107107
kw["skip_softmax_threshold"] = threshold
108108

109+
assert attention is not None, "Triton attention kernel not available (requires CUDA + triton)"
109110
o = attention(q_flat, k_flat, v_flat, **kw)
110111

111112
# Reshape back: [B*T, H, D] -> [B, T, H*D]

0 commit comments

Comments
 (0)