Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions lmdeploy/pytorch/engine/executor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,16 +239,15 @@ def _adjust_block_size(self):
if self.cache_config.block_size != 64:
raise ValueError('Please set block_size to 64 for flash_mla.')
return
# Linear attention requires a kv block size of 128 on ascend.
# head_dim=256 requires block_size=128 on ascend.
# Other models keep the user-provided block size.
is_ssm = len(self.model_config.states_shapes) > 0
if (self.cache_config.device_type == 'ascend' and is_ssm and
if (self.cache_config.device_type == 'ascend' and self.model_config.k_head_dim == 256 and
(self.cache_config.block_size != 128 or self.cache_config.kernel_block_size != 128)):
logger.warning(
'Force `block_size=128` and `kernel_block_size=128` '
f'(was block_size={self.cache_config.block_size}, '
f'kernel_block_size={self.cache_config.kernel_block_size}) '
'for linear attention on ascend.')
'for head_dim=256 on ascend.')
self.cache_config.block_size = 128
self.cache_config.kernel_block_size = 128
return
Expand Down
Loading