File tree Expand file tree Collapse file tree
transformer_engine/pytorch/attention/dot_product_attention Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -672,13 +672,6 @@ def _disable_all_flash_attention() -> None:
672672 if use_flash_attention :
673673 use_flash_attention = False
674674 logger .debug ("Disabling FlashAttention for max_logit" )
675- # FusedAttention emits max_logit alongside the softmax stats, which cuDNN only
676- # supports through the unified softmax node introduced in cuDNN 9.21.0. On older
677- # cuDNN the composite softmax node rejects the stats+max combination, so fall back
678- # to UnfusedDotProductAttention.
679- if use_fused_attention and cudnn_version < (9 , 21 , 0 ):
680- use_fused_attention = False
681- logger .debug ("Disabling FusedAttention for max_logit for cuDNN < 9.21.0" )
682675 if fp8 and fp8_meta ["recipe" ].fp8_dpa :
683676 use_flash_attention = False
684677 use_fused_attention = False
You can’t perform that action at this time.
0 commit comments