We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 12244c2 commit 1ea59e9Copy full SHA for 1ea59e9
1 file changed
src/infinicore/ops/multi_head_attention_varlen/mha_varlen_flashattn.cc
@@ -152,11 +152,7 @@ void run(void *planned_meta) {
152
153
#if defined(ENABLE_METAX_API)
154
run_flashattn_varlen_metax(p);
155
- return;
156
-#endif
157
-
158
- // Original InfiniCore path (NVIDIA + xmake flash-attn-nvidia). MetaX is handled above.
159
-#if defined(ENABLE_NVIDIA_API)
+#else
160
c10::cuda::CUDAStreamGuard guard(infinicore::adaptor::get_cuda_stream());
161
162
auto q = infinicore::adaptor::to_aten_tensor(p->q);
@@ -195,8 +191,6 @@ void run(void *planned_meta) {
195
191
0.0,
196
192
false,
197
193
std::nullopt);
198
-#else
199
- throw std::runtime_error("FlashAttention varlen: no supported GPU backend in this build");
200
194
#endif
201
202
#else
0 commit comments