Skip to content

Commit d37c088

Browse files
pchxcopybara-github
authored andcommitted
Extend LayersOutputFunc to take query index and auxillary int
PiperOrigin-RevId: 657574814
1 parent 8b4915f commit d37c088

3 files changed

Lines changed: 21 additions & 12 deletions

File tree

evals/debug_prompt.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,13 @@ int Run(int argc, char** argv) {
5959
env.MutableConfig().layers_output =
6060
prompt_args.layers_output.Empty()
6161
? LayersOutputFunc()
62-
: [&json_output](int pos, const std::string& key, const float* values,
63-
size_t values_len) {
64-
std::vector<float> v{values, values + values_len};
65-
json_output[std::to_string(pos)][key] = v;
62+
: [&json_output](size_t query_idx, size_t pos, const std::string& key,
63+
int layer, const float* values, size_t values_len) {
64+
const std::string& debug_key =
65+
layer < 0 ? key : (key + "." + std::to_string(layer));
66+
const std::vector<float> v{values, values + values_len};
67+
json& json_base = json_output[std::to_string(query_idx)];
68+
json_base[std::to_string(pos)][debug_key] = v;
6669
};
6770

6871
const auto [answer, token_count] = env.QueryModel(prompt_args.prompt);

gemma/gemma-inl.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -805,8 +805,10 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
805805
const size_t num_interleaved = num_tokens * num_queries;
806806
if (layers_output) {
807807
for (size_t token_idx = 0; token_idx < num_interleaved; ++token_idx) {
808-
float token_f = tokens[token_idx];
809-
layers_output(pos + token_idx, "Tokens", &token_f, 1);
808+
const size_t query_idx = token_idx % num_queries;
809+
const size_t logical_pos = (pos + token_idx) / num_queries;
810+
const float token_f = tokens[token_idx];
811+
layers_output(query_idx, logical_pos, "tokens", -1, &token_f, 1);
810812
}
811813
}
812814
constexpr size_t kModelDim = TConfig::kModelDim;
@@ -821,9 +823,9 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
821823
layer_weights, activations, kv_caches, pool);
822824

823825
if (layers_output) {
824-
const std::string block_name = "blocks." + std::to_string(layer);
825826
for (size_t token_idx = 0; token_idx < num_interleaved; ++token_idx) {
826-
layers_output(pos + token_idx, block_name,
827+
const size_t logical_pos = (pos + token_idx) / num_queries;
828+
layers_output(token_idx % num_queries, logical_pos, "blocks", layer,
827829
activations.x.Batch(token_idx), kModelDim);
828830
}
829831
}
@@ -833,7 +835,9 @@ HWY_NOINLINE void Transformer(const int* tokens, size_t num_tokens,
833835
activations.x.All(), kModelDim);
834836
if (layers_output) {
835837
for (size_t token_idx = 0; token_idx < num_interleaved; ++token_idx) {
836-
layers_output(pos + token_idx, "final_norm",
838+
const size_t query_idx = token_idx % num_queries;
839+
const size_t logical_pos = (pos + token_idx) / num_queries;
840+
layers_output(query_idx, logical_pos, "final_norm", -1,
837841
activations.x.Batch(token_idx), kModelDim);
838842
}
839843
}

gemma/gemma.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,15 @@ using AcceptFunc = std::function<bool(int, float)>;
4949
// If not empty, SampleFunc is called with the probability distribution for the
5050
// next token, and its return value is used as the next generated token.
5151
using SampleFunc = std::function<int(const float*, size_t)>;
52-
// Will be called for layers output with:
52+
// If not empty, LayersOutputFunc is called for layer outputs, specified with:
53+
// - index of query within containing batch (if any); zero otherwise.
5354
// - position in the tokens sequence
54-
// - name of the data, p.ex. "tokens", "block.1", "final_norm"
55+
// - name of the data, e.g. "tokens", "blocks", "final_norm"
56+
// - layer index (or -1 for global outputs), e.g. "blocks" exposes x per-layer
5557
// - pointer to the data array
5658
// - size of the data array
5759
using LayersOutputFunc =
58-
std::function<void(int, const std::string&, const float*, size_t)>;
60+
std::function<void(size_t, size_t, const std::string&, int, const float*, size_t)>;
5961

6062
struct RuntimeConfig {
6163
bool StreamToken(size_t query_idx, size_t pos, int token, float prob) const {

0 commit comments

Comments
 (0)