We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 52e662d commit d0c0f51Copy full SHA for d0c0f51
1 file changed
modelopt/torch/sparsity/attention_sparsity/config.py
@@ -399,9 +399,9 @@ class FlashSkipSoftmaxConfig(SparseAttentionConfig):
399
SKIP_SOFTMAX_CALIB = {
400
"sparse_cfg": {
401
"calibration": {
402
- "target_sparse_ratio": {"prefill": 0.9, "decode": 0.9},
+ "target_sparse_ratio": {"prefill": 0.5, "decode": 0.5},
403
"samples": 64,
404
- "max_seqlen": 65536,
+ "max_seqlen": 16384,
405
"chunk_size": 4096,
406
},
407
"*attn*": {
0 commit comments