We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e72c27b commit 0f3020eCopy full SHA for 0f3020e
1 file changed
tensorrt_llm/_torch/attention_backend/sparse/deepseek_v4/deepseek_v4.py
@@ -0,0 +1,21 @@
1
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+# SPDX-License-Identifier: Apache-2.0
3
+
4
+from enum import Enum
5
6
7
+class DeepseekV4AttentionType(Enum):
8
+ # Attention types backed by per-layer sliding-window cache state.
9
+ SWA = 0
10
+ COMPRESSOR_KV = 1
11
+ COMPRESSOR_SCORE = 2
12
+ INDEXER_COMPRESSOR_KV = 3
13
+ INDEXER_COMPRESSOR_SCORE = 4
14
15
+ # Attention types backed by ratio-shared compressed cache state.
16
+ COMPRESS = 5
17
+ INDEXER_COMPRESS = 6
18
19
+ # Backward-compatible names used by the standalone compressor primitive.
20
+ COMPRESSOR_STATE = COMPRESSOR_KV
21
+ INDEXER_COMPRESSOR_STATE = INDEXER_COMPRESSOR_KV
0 commit comments