@@ -805,8 +805,10 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
805805 const size_t num_interleaved = num_tokens * num_queries;
806806 if (layers_output) {
807807 for (size_t token_idx = 0 ; token_idx < num_interleaved; ++token_idx) {
808- float token_f = tokens[token_idx];
809- layers_output (pos + token_idx, " Tokens" , &token_f, 1 );
808+ const size_t query_idx = token_idx % num_queries;
809+ const size_t logical_pos = (pos + token_idx) / num_queries;
810+ const float token_f = tokens[token_idx];
811+ layers_output (query_idx, logical_pos, " tokens" , -1 , &token_f, 1 );
810812 }
811813 }
812814 constexpr size_t kModelDim = TConfig::kModelDim ;
@@ -821,9 +823,9 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
821823 layer_weights, activations, kv_caches, pool);
822824
823825 if (layers_output) {
824- const std::string block_name = " blocks." + std::to_string (layer);
825826 for (size_t token_idx = 0 ; token_idx < num_interleaved; ++token_idx) {
826- layers_output (pos + token_idx, block_name,
827+ const size_t logical_pos = (pos + token_idx) / num_queries;
828+ layers_output (token_idx % num_queries, logical_pos, " blocks" , layer,
827829 activations.x .Batch (token_idx), kModelDim );
828830 }
829831 }
@@ -833,7 +835,9 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
833835 activations.x .All (), kModelDim );
834836 if (layers_output) {
835837 for (size_t token_idx = 0 ; token_idx < num_interleaved; ++token_idx) {
836- layers_output (pos + token_idx, " final_norm" ,
838+ const size_t query_idx = token_idx % num_queries;
839+ const size_t logical_pos = (pos + token_idx) / num_queries;
840+ layers_output (query_idx, logical_pos, " final_norm" , -1 ,
837841 activations.x .Batch (token_idx), kModelDim );
838842 }
839843 }
0 commit comments