Skip to content

Commit 5a88ce1

Browse files
committed
feat(analytics): add workload cost breakdown dashboards
1 parent 1cb4201 commit 5a88ce1

File tree

30 files changed

+864
-70
lines changed

30 files changed

+864
-70
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
134134
86. [ ] Add deployment docs for self-hosted review + analytics + trend retention setups.
135135
87. [ ] Add secret-management guidance and validation for multi-provider enterprise installs.
136136
88. [ ] Add background jobs for recomputing analytics after schema or scoring changes.
137-
89. [ ] Add cost dashboards by provider/model/role for review, verification, and eval workloads.
137+
89. [x] Add cost dashboards by provider/model/role for review, verification, and eval workloads.
138138
90. [ ] Add failure forensics bundles for self-hosted users when review or eval jobs degrade.
139139

140140
## 10. Eval, Benchmarking, and Model Governance
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Add workload/role/provider/model cost breakdown rows per review event.
2+
ALTER TABLE review_events
3+
ADD COLUMN IF NOT EXISTS cost_breakdowns JSONB NOT NULL DEFAULT '[]';

src/commands/eval/command.rs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,14 @@ fn build_eval_run_metadata(
8484
artifact_dir: Option<&Path>,
8585
) -> EvalRunMetadata {
8686
let (_, resolved_base_url, resolved_adapter) = config.resolve_provider();
87-
let provider = inferred_provider(
88-
resolved_base_url.as_deref().or(config.base_url.as_deref()),
89-
resolved_adapter.as_deref().or(config.adapter.as_deref()),
90-
);
87+
let provider = config.inferred_provider_label_for_role(config.generation_model_role);
9188
let generation_model = config.generation_model_name().to_string();
89+
let cost_breakdowns = crate::server::cost::aggregate_cost_breakdowns(
90+
execution
91+
.results
92+
.iter()
93+
.flat_map(|result| result.cost_breakdowns.clone()),
94+
);
9295
let mut verification_judges = Vec::new();
9396
let mut seen_verification_judges = HashSet::new();
9497
for role in std::iter::once(config.verification.model_role)
@@ -140,17 +143,10 @@ fn build_eval_run_metadata(
140143
repeat_index,
141144
repeat_total,
142145
reproduction_validation: options.repro_validate,
146+
cost_breakdowns,
143147
}
144148
}
145149

146-
fn inferred_provider(base_url: Option<&str>, adapter: Option<&str>) -> Option<String> {
147-
if base_url.is_some_and(|value| value.contains("openrouter.ai")) {
148-
return Some("openrouter".to_string());
149-
}
150-
151-
adapter.map(|value| value.to_string())
152-
}
153-
154150
fn review_mode_label(agent_enabled: bool) -> &'static str {
155151
if agent_enabled {
156152
"agent-loop"

src/commands/eval/metrics/comparisons.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ mod tests {
269269
reproduction_summary: None,
270270
artifact_path: None,
271271
failures: vec![],
272+
cost_breakdowns: vec![],
272273
dag_traces: vec![],
273274
},
274275
EvalFixtureResult {
@@ -290,6 +291,7 @@ mod tests {
290291
reproduction_summary: None,
291292
artifact_path: None,
292293
failures: vec![],
294+
cost_breakdowns: vec![],
293295
dag_traces: vec![],
294296
},
295297
];
@@ -326,6 +328,7 @@ mod tests {
326328
reproduction_summary: None,
327329
artifact_path: None,
328330
failures: vec![],
331+
cost_breakdowns: vec![],
329332
dag_traces: vec![],
330333
}];
331334

@@ -353,6 +356,7 @@ mod tests {
353356
reproduction_summary: None,
354357
artifact_path: None,
355358
failures: vec![],
359+
cost_breakdowns: vec![],
356360
dag_traces: vec![],
357361
}];
358362

@@ -391,12 +395,14 @@ mod tests {
391395
filtered_comments: 0,
392396
abstained_comments: 0,
393397
warnings: vec![],
398+
..Default::default()
394399
}],
395400
}),
396401
agent_activity: None,
397402
reproduction_summary: None,
398403
artifact_path: None,
399404
failures: vec![],
405+
cost_breakdowns: vec![],
400406
dag_traces: vec![],
401407
}];
402408

@@ -434,12 +440,14 @@ mod tests {
434440
filtered_comments: 1,
435441
abstained_comments: 1,
436442
warnings: vec![],
443+
..Default::default()
437444
}],
438445
}),
439446
agent_activity: None,
440447
reproduction_summary: None,
441448
artifact_path: None,
442449
failures: vec![],
450+
cost_breakdowns: vec![],
443451
dag_traces: vec![],
444452
}];
445453

src/commands/eval/metrics/suites.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ mod tests {
254254
reproduction_summary: None,
255255
artifact_path: None,
256256
failures: vec!["missing finding".to_string()],
257+
cost_breakdowns: vec![],
257258
dag_traces: vec![],
258259
}];
259260

@@ -292,6 +293,7 @@ mod tests {
292293
reproduction_summary: None,
293294
artifact_path: None,
294295
failures: vec![],
296+
cost_breakdowns: vec![],
295297
dag_traces: vec![],
296298
},
297299
EvalFixtureResult {
@@ -318,6 +320,7 @@ mod tests {
318320
reproduction_summary: None,
319321
artifact_path: None,
320322
failures: vec![],
323+
cost_breakdowns: vec![],
321324
dag_traces: vec![],
322325
},
323326
];
@@ -369,6 +372,7 @@ mod tests {
369372
reproduction_summary: None,
370373
artifact_path: None,
371374
failures: vec![],
375+
cost_breakdowns: vec![],
372376
dag_traces: vec![],
373377
},
374378
EvalFixtureResult {
@@ -390,6 +394,7 @@ mod tests {
390394
reproduction_summary: None,
391395
artifact_path: None,
392396
failures: vec!["missing".to_string()],
397+
cost_breakdowns: vec![],
393398
dag_traces: vec![],
394399
},
395400
];

src/commands/eval/report/trend.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ fn trend_entry_for_report(report: &EvalReport) -> Option<TrendEntry> {
9090
verification_verified_checks: verification_health.map(|health| health.verified_checks),
9191
verification_total_checks: verification_health.map(|health| health.total_checks),
9292
verification_verified_pct: verification_health.map(|health| health.verified_pct),
93+
cost_breakdowns: report.run.cost_breakdowns.clone(),
9394
})
9495
}
9596

@@ -232,6 +233,7 @@ mod tests {
232233
reproduction_summary: None,
233234
artifact_path: None,
234235
failures: vec![],
236+
cost_breakdowns: vec![],
235237
dag_traces: vec![],
236238
}],
237239
}

src/commands/eval/runner/execute/dag.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ struct EvalFixtureDagContext {
6666
dag_config: EvalFixtureDagConfig,
6767
comments: Vec<core::Comment>,
6868
warnings: Vec<String>,
69+
cost_breakdowns: Vec<crate::server::cost::CostBreakdownRow>,
6970
verification_report: Option<EvalVerificationReport>,
7071
agent_activity: Option<EvalAgentActivity>,
7172
reproduction_summary: Option<EvalReproductionSummary>,
@@ -82,6 +83,7 @@ enum EvalFixtureStageOutput {
8283
Review {
8384
comments: Vec<core::Comment>,
8485
warnings: Vec<String>,
86+
cost_breakdowns: Vec<crate::server::cost::CostBreakdownRow>,
8587
verification_report: Option<EvalVerificationReport>,
8688
agent_activity: Option<EvalAgentActivity>,
8789
dag_traces: Vec<DagExecutionTrace>,
@@ -99,6 +101,7 @@ enum EvalFixtureStageOutput {
99101
ReproductionValidation {
100102
reproduction_summary: Option<EvalReproductionSummary>,
101103
warnings: Vec<String>,
104+
cost_breakdowns: Vec<crate::server::cost::CostBreakdownRow>,
102105
},
103106
ArtifactCapture {
104107
artifact_path: Option<String>,
@@ -112,6 +115,7 @@ impl EvalFixtureDagContext {
112115
dag_config,
113116
comments: Vec::new(),
114117
warnings: Vec::new(),
118+
cost_breakdowns: Vec::new(),
115119
verification_report: None,
116120
agent_activity: None,
117121
reproduction_summary: None,
@@ -147,6 +151,7 @@ impl EvalFixtureDagContext {
147151
reproduction_summary: self.reproduction_summary,
148152
artifact_path: self.artifact_path,
149153
failures: self.failures,
154+
cost_breakdowns: self.cost_breakdowns,
150155
dag_traces,
151156
},
152157
})
@@ -409,9 +414,27 @@ fn spawn_stage(
409414
let repo_path = context.prepared.repo_path.clone();
410415
let config = config.clone();
411416
Ok(async move {
417+
let generation_role = config.generation_model_role.as_str().to_string();
418+
let generation_provider =
419+
config.inferred_provider_label_for_role(config.generation_model_role);
420+
let generation_model = config.generation_model_name().to_string();
412421
let review_result =
413422
review_diff_content_raw(&diff_content, config, &repo_path).await?;
423+
let cost_breakdowns = crate::server::cost::review_cost_breakdowns(
424+
crate::server::cost::CostBreakdownRequest {
425+
workload: "eval_generation",
426+
role: &generation_role,
427+
provider: generation_provider,
428+
model: &generation_model,
429+
prompt_tokens: review_result.total_prompt_tokens,
430+
completion_tokens: review_result.total_completion_tokens,
431+
total_tokens: review_result.total_tokens,
432+
},
433+
"eval_verification",
434+
review_result.verification_report.as_ref(),
435+
);
414436
Ok(EvalFixtureStageOutput::Review {
437+
cost_breakdowns,
415438
verification_report: convert_verification_report(
416439
review_result.verification_report,
417440
),
@@ -486,9 +509,27 @@ fn spawn_stage(
486509
.as_ref()
487510
.map(build_reproduction_warnings)
488511
.unwrap_or_default();
512+
let cost_breakdowns = reproduction_summary
513+
.as_ref()
514+
.and_then(|summary| {
515+
(summary.total_tokens > 0).then(|| {
516+
crate::server::cost::CostBreakdownRow::new(
517+
"eval_auditing",
518+
summary.role.as_str(),
519+
summary.provider.clone(),
520+
summary.model.as_str(),
521+
summary.prompt_tokens,
522+
summary.completion_tokens,
523+
summary.total_tokens,
524+
)
525+
})
526+
})
527+
.into_iter()
528+
.collect();
489529
Ok(EvalFixtureStageOutput::ReproductionValidation {
490530
reproduction_summary,
491531
warnings,
532+
cost_breakdowns,
492533
})
493534
}
494535
.boxed())
@@ -543,6 +584,7 @@ fn apply_stage_output(
543584
EvalFixtureStageOutput::Review {
544585
comments,
545586
warnings,
587+
cost_breakdowns,
546588
verification_report,
547589
agent_activity,
548590
dag_traces,
@@ -551,6 +593,7 @@ fn apply_stage_output(
551593
context.total_comments = comments.len();
552594
context.comments = comments;
553595
context.warnings = warnings;
596+
context.cost_breakdowns = cost_breakdowns;
554597
context.verification_report = verification_report;
555598
context.agent_activity = agent_activity;
556599
context.dag_traces = dag_traces;
@@ -586,10 +629,12 @@ fn apply_stage_output(
586629
EvalFixtureStageOutput::ReproductionValidation {
587630
reproduction_summary,
588631
warnings,
632+
cost_breakdowns,
589633
},
590634
) => {
591635
context.reproduction_summary = reproduction_summary;
592636
context.warnings.extend(warnings);
637+
context.cost_breakdowns.extend(cost_breakdowns);
593638
Ok(())
594639
}
595640
(

src/commands/eval/runner/execute/repro.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ pub(super) async fn maybe_run_reproduction_validation(
5454

5555
let model_config = config.to_model_config_for_role(config.auditing_model_role);
5656
let model_name = model_config.model_name.clone();
57+
let role = config.auditing_model_role.as_str().to_string();
58+
let provider = config.inferred_provider_label_for_role(config.auditing_model_role);
5759
let adapter: Arc<dyn adapters::llm::LLMAdapter> =
5860
Arc::from(adapters::llm::create_adapter(&model_config)?);
5961
let workspace = prepare_reproduction_workspace(prepared)?;
@@ -67,6 +69,9 @@ pub(super) async fn maybe_run_reproduction_validation(
6769
let tools = build_review_tools(tool_context, None);
6870

6971
let mut checks = Vec::new();
72+
let mut prompt_tokens = 0usize;
73+
let mut completion_tokens = 0usize;
74+
let mut total_tokens = 0usize;
7075
for comment in comments.iter().take(max_comments) {
7176
let (tool_evidence, tool_logs, tool_warnings) =
7277
gather_reproduction_evidence(&tools, comment, workspace.include_git_tools).await;
@@ -80,6 +85,11 @@ pub(super) async fn maybe_run_reproduction_validation(
8085
};
8186
match adapter.complete(request).await {
8287
Ok(response) => {
88+
if let Some(usage) = response.usage.as_ref() {
89+
prompt_tokens += usage.prompt_tokens;
90+
completion_tokens += usage.completion_tokens;
91+
total_tokens += usage.total_tokens;
92+
}
8393
let parsed = parse_reproduction_response(&response.content);
8494
let agent_activity = convert_agent_activity(Some(crate::review::AgentActivity {
8595
total_iterations: usize::from(!tool_logs.is_empty()),
@@ -137,7 +147,15 @@ pub(super) async fn maybe_run_reproduction_validation(
137147
}
138148
}
139149

140-
Ok(Some(build_reproduction_summary(checks)))
150+
Ok(Some(build_reproduction_summary(
151+
checks,
152+
model_name,
153+
role,
154+
provider,
155+
prompt_tokens,
156+
completion_tokens,
157+
total_tokens,
158+
)))
141159
}
142160

143161
fn build_reproduction_prompt(
@@ -161,7 +179,15 @@ fn build_reproduction_prompt(
161179
)
162180
}
163181

164-
fn build_reproduction_summary(checks: Vec<EvalReproductionCheck>) -> EvalReproductionSummary {
182+
fn build_reproduction_summary(
183+
checks: Vec<EvalReproductionCheck>,
184+
model: String,
185+
role: String,
186+
provider: Option<String>,
187+
prompt_tokens: usize,
188+
completion_tokens: usize,
189+
total_tokens: usize,
190+
) -> EvalReproductionSummary {
165191
let mut summary = EvalReproductionSummary::default();
166192
for check in &checks {
167193
match check.reproduced {
@@ -170,6 +196,13 @@ fn build_reproduction_summary(checks: Vec<EvalReproductionCheck>) -> EvalReprodu
170196
None => summary.inconclusive += 1,
171197
}
172198
}
199+
summary.model = model.clone();
200+
summary.role = role;
201+
summary.provider = provider;
202+
summary.prompt_tokens = prompt_tokens;
203+
summary.completion_tokens = completion_tokens;
204+
summary.total_tokens = total_tokens;
205+
summary.cost_estimate_usd = crate::server::cost::estimate_cost_usd(&model, total_tokens);
173206
summary.checks = checks;
174207
summary
175208
}

0 commit comments

Comments
 (0)