InternLM · wanfengcxz · Jun 30, 2026
diff --git a/lmdeploy/pytorch/engine/executor/base.py b/lmdeploy/pytorch/engine/executor/base.py
@@ -239,16 +239,15 @@ def _adjust_block_size(self):
             if self.cache_config.block_size != 64:
                 raise ValueError('Please set block_size to 64 for flash_mla.')
             return
-        # Linear attention requires a kv block size of 128 on ascend.
+        # head_dim=256 requires block_size=128 on ascend.
         # Other models keep the user-provided block size.
-        is_ssm = len(self.model_config.states_shapes) > 0
-        if (self.cache_config.device_type == 'ascend' and is_ssm and
+        if (self.cache_config.device_type == 'ascend' and self.model_config.k_head_dim == 256 and
                 (self.cache_config.block_size != 128 or self.cache_config.kernel_block_size != 128)):
             logger.warning(
                 'Force `block_size=128` and `kernel_block_size=128` '
                 f'(was block_size={self.cache_config.block_size}, '
                 f'kernel_block_size={self.cache_config.kernel_block_size}) '
-                'for linear attention on ascend.')
+                'for head_dim=256 on ascend.')
             self.cache_config.block_size = 128
             self.cache_config.kernel_block_size = 128
             return