issue/214 - update attn and caching logics

wooway777 · wooway777 · commit ee59b3f590b6 · 2026-02-10T18:26:20.000+08:00
diff --git a/csrc/cache/kv_cache.cpp b/csrc/cache/kv_cache.cpp
@@ -96,7 +96,6 @@ StaticKVCache::update(size_t layer_idx,
     if (device.getType() == infinicore::Device::Type::NVIDIA
         || device.getType() == infinicore::Device::Type::ILUVATAR
         || device.getType() == infinicore::Device::Type::METAX
-        || device.getType() == infinicore::Device::Type::MOORE
         || device.getType() == infinicore::Device::Type::CAMBRICON) {
         infinicore::op::kv_caching_(
             k_cache_layer,
diff --git a/csrc/models/llama/llama_attention.cpp b/csrc/models/llama/llama_attention.cpp
@@ -127,8 +127,6 @@ infinicore::Tensor LlamaAttention::forward_(const infinicore::Tensor &hidden_sta
 
     infinicore::Tensor attn_output;
     if (q_reshaped->device().getType() == infinicore::Device::Type::NVIDIA
-        || q_reshaped->device().getType() == infinicore::Device::Type::METAX
-        || q_reshaped->device().getType() == infinicore::Device::Type::MOORE
         || q_reshaped->device().getType() == infinicore::Device::Type::ILUVATAR
         || q_reshaped->device().getType() == infinicore::Device::Type::CAMBRICON) {
         attn_output = infinicore::op::flash_attention(q_reshaped, k_total, v_total, total_sequence_lengths.value(), scaling_, true);