File tree Expand file tree Collapse file tree
src/infinicore/ops/multi_head_attention_varlen Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -80,15 +80,15 @@ void run(void *planned_meta) {
8080 auto block_table = std::optional<at::Tensor>(infinicore::adaptor::to_aten_tensor (p->block_table ));
8181 auto max_seqlen_q = p->max_seqlen_q ;
8282 auto max_seqlen_k = p->max_seqlen_k ;
83- auto alibi_slopes =
84- p->alibi_slopes ? std::optional<at::Tensor>(infinicore::adaptor::to_aten_tensor (*p->alibi_slopes )) : std::nullopt ;
83+ auto alibi_slopes = p->alibi_slopes ? std::optional<at::Tensor>(infinicore::adaptor::to_aten_tensor (*p->alibi_slopes )) : std::nullopt ;
8584 auto scale = p->scale ;
8685
8786#if defined(ENABLE_METAX_API) && defined(INFINICORE_HPCC_VERSION_MAJOR) && (INFINICORE_HPCC_VERSION_MAJOR >= 3)
8887 std::optional<at::Tensor> flash_attn_mars_ext = std::nullopt ;
8988#endif
9089
91- INFINICORE_FLASH_OP (mha_varlen_fwd)(
90+ INFINICORE_FLASH_OP (mha_varlen_fwd)
91+ (
9292 q,
9393 k,
9494 v,
You can’t perform that action at this time.
0 commit comments