@@ -491,6 +491,8 @@ class AttentionOp
491491 bool mIsSpecDecTree = true ;
492492 bool mSpecDecodingIsGenerationLengthVariable = false ;
493493 int32_t mSpecDecodingMaxGenerationLength = 1 ;
494+ // Static spec-dec tree length used by FMHA autotuning.
495+ int32_t mSpecDecodingTargetMaxGenLen = 0 ;
494496 bool mIsMLAEnabled = false ;
495497 bool mIsGenerationMLA = false ;
496498 bool mUseGenFlashMLA = false ;
@@ -559,13 +561,14 @@ class AttentionOp
559561 mCrossAttention , mMaxDistance , mPosShiftEnabled , mPagedContextFMHA , mFP8ContextFMHA , mFP8AttenOutput ,
560562 mFP8ContextMLA , mFP8GenerationMLA , mChunkPrefillBufferBatchSize , mDenseContextFMHA , mHasFullAttentionMask ,
561563 mIsSpecDecodingEnabled , mUseSpecDecoding , mIsSpecDecTree , mSpecDecodingIsGenerationLengthVariable ,
562- mSpecDecodingMaxGenerationLength , mIsMLAEnabled , mIsGenerationMLA , mUseGenFlashMLA , mUseSparseAttention ,
563- mUseTllmGenSparseAttentionPaged , mUseTllmGenSparseAttention , mMLAParams .data (), mCpSize , mCpRank , mCpGroup ,
564- mNumAttnHeads , mNumAttnKVHeads , mNumKVHeadsOrigin , mAttnTpSize , mAttnTpRank , mAttnCpSize , mAttnCpRank ,
565- mUlyssesMQABroadcast , mEnableContextFMHA , mFMHAForceFP32Acc , mMultiBlockMode , mEnableXQA , mUseKVCache ,
566- mSkipAttn , mFuseFp4Quant , mNbMultiBlockSemaphores , mAttentionChunkSize .value_or (-1 ),
567- mSkipSoftmaxThresholdScaleFactorPrefill , mSkipSoftmaxThresholdScaleFactorDecode , mSageAttnNumEltsPerBlkQ ,
568- mSageAttnNumEltsPerBlkK , mSageAttnNumEltsPerBlkV , mSageAttnQkInt8 );
564+ mSpecDecodingMaxGenerationLength , mSpecDecodingTargetMaxGenLen , mIsMLAEnabled , mIsGenerationMLA ,
565+ mUseGenFlashMLA , mUseSparseAttention , mUseTllmGenSparseAttentionPaged , mUseTllmGenSparseAttention ,
566+ mMLAParams .data (), mCpSize , mCpRank , mCpGroup , mNumAttnHeads , mNumAttnKVHeads , mNumKVHeadsOrigin ,
567+ mAttnTpSize , mAttnTpRank , mAttnCpSize , mAttnCpRank , mUlyssesMQABroadcast , mEnableContextFMHA ,
568+ mFMHAForceFP32Acc , mMultiBlockMode , mEnableXQA , mUseKVCache , mSkipAttn , mFuseFp4Quant ,
569+ mNbMultiBlockSemaphores , mAttentionChunkSize .value_or (-1 ), mSkipSoftmaxThresholdScaleFactorPrefill ,
570+ mSkipSoftmaxThresholdScaleFactorDecode , mSageAttnNumEltsPerBlkQ , mSageAttnNumEltsPerBlkK ,
571+ mSageAttnNumEltsPerBlkV , mSageAttnQkInt8 );
569572 };
570573
571574private:
0 commit comments