Skip to content

Commit 6a35224

Browse files
committed
VSA refactoring
Signed-off-by: Olivia Stoner <245287810+o-stoner@users.noreply.github.com>
1 parent 059de9c commit 6a35224

26 files changed

Lines changed: 4855 additions & 43 deletions

File tree

tensorrt_llm/_torch/visual_gen/attention_backend/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,14 @@
2020
simplified metadata that doesn't require KV caching.
2121
"""
2222

23-
from .cute_dsl import CuTeDSLAttention
23+
from .cute_dsl import (
24+
VSA_TILE_SIZE,
25+
CuTeDSLAttention,
26+
VSAMetadata,
27+
VSAMetadataBuilder,
28+
get_vsa_forward_context,
29+
set_vsa_forward_context,
30+
)
2431
from .flash_attn4 import FlashAttn4Attention
2532
from .interface import AttentionBackend, AttentionTensorLayout
2633
from .parallel import Attention2DAttention, RingAttention, UlyssesAttention
@@ -41,4 +48,9 @@
4148
"UlyssesAttention",
4249
"VanillaAttention",
4350
"RingAttention",
51+
"VSAMetadata",
52+
"VSAMetadataBuilder",
53+
"VSA_TILE_SIZE",
54+
"get_vsa_forward_context",
55+
"set_vsa_forward_context",
4456
]

0 commit comments

Comments
 (0)