Skip to content

Commit 9b95dd2

Browse files
authored
runner fix to mitigate the numerical issue (#19286)
Differential Revision: D103690468 Pull Request resolved: #19286
1 parent e0cc468 commit 9b95dd2

1 file changed

Lines changed: 9 additions & 2 deletions

File tree

examples/models/llama/runner/static_attention_io_manager.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <unordered_map>
1515
#include <vector>
1616

17+
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
1718
#include <executorch/runtime/core/span.h>
1819
#include <executorch/runtime/executor/method.h>
1920
#include <executorch/runtime/platform/log.h>
@@ -53,8 +54,8 @@ class StaticKVCache {
5354
style_(style),
5455
input_ptrs_(n_caches_),
5556
output_ptrs_(n_caches_) {
56-
size_t total_cache_len =
57-
std::accumulate(cache_lengths_.begin(), cache_lengths_.end(), 0);
57+
size_t total_cache_len = std::accumulate(
58+
cache_lengths_.begin(), cache_lengths_.end(), size_t(0));
5859
cache_data_size_ = total_cache_len * n_heads_per_cache_ * head_dim_;
5960
update_data_size_ =
6061
n_caches_ * n_heads_per_cache_ * max_input_len_ * head_dim_;
@@ -867,6 +868,12 @@ class StaticAttentionIOManager {
867868
void set_input(executorch::runtime::Method& method, size_t idx, T* data) {
868869
auto methodMeta = method.method_meta();
869870
auto inputMeta = methodMeta.input_tensor_meta(idx);
871+
ET_CHECK_MSG(
872+
sizeof(T) == executorch::runtime::elementSize(inputMeta->scalar_type()),
873+
"set_input: sizeof(T)=%zu but model expects element size %zu for input %zu",
874+
sizeof(T),
875+
executorch::runtime::elementSize(inputMeta->scalar_type()),
876+
idx);
870877
auto impl = ::executorch::runtime::etensor::TensorImpl(
871878
inputMeta->scalar_type(),
872879
inputMeta->sizes().size(),

0 commit comments

Comments
 (0)