Skip to content

Commit 484ae9a

Browse files
acking-youclaude
andauthored
feat(frontend): color-code first-token & total latency in usage views (#43)
Map latency magnitude to color across the usage surfaces, on two scales: - 首字延迟 (first-token / TTFT): tight 3-step scale (<3s green / <10s amber / >=10s red) — the pre-first-byte window stays small even when upstreams are slow. - 整体延迟 (total request latency): wider 5-step scale that keeps the original 1:3:6:12 spacing (<8s emerald / <24s lime / <48s amber / <96s orange / >=96s red), since a streaming reasoning request can legitimately run well over a minute. Changes: - llm_access_shared.rs: add shared LatencyBadgeColor alias, format_latency_ms, first_token_latency_color and total_latency_color helpers + unit tests. - admin_llm_gateway.rs: the Usage tab and Usage-journal-preview tab now use the shared helpers; first-token latency is rendered as a color-coded badge instead of plain muted text. The private format_latency_ms is dropped in favor of the shared one. - llm_access_usage.rs (public /llm-access/usage): the latency cell reuses the colored-badge style and now shows first-token latency; the 账号 (account) column is removed. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 7ce48b8 commit 484ae9a

3 files changed

Lines changed: 117 additions & 42 deletions

File tree

crates/frontend/src/pages/admin_llm_gateway.rs

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ use crate::{
6161
search_box::SearchBox, status_badge::StatusBadge, tab_bar::render_tab_bar,
6262
},
6363
pages::llm_access_shared::{
64-
confirm_destructive, credit_usage_missing_label, format_ms, format_number_i64,
65-
format_number_u64, token_usage_missing_label, MaskedSecretCode,
64+
confirm_destructive, credit_usage_missing_label, first_token_latency_color,
65+
format_latency_ms, format_ms, format_number_i64, format_number_u64, total_latency_color,
66+
token_usage_missing_label, MaskedSecretCode,
6667
},
6768
router::Route,
6869
};
@@ -297,10 +298,6 @@ enum KeySortMode {
297298
UsageDesc,
298299
}
299300

300-
fn format_latency_ms(latency_ms: i32) -> String {
301-
format!("{} ms", latency_ms.max(0))
302-
}
303-
304301
fn format_optional_latency_ms(latency_ms: Option<i32>) -> String {
305302
latency_ms
306303
.map(format_latency_ms)
@@ -6540,13 +6537,11 @@ pub fn admin_llm_gateway_page() -> Html {
65406537
})
65416538
};
65426539
let latency_ms_val = event.latency_ms.unwrap_or(0) as i32;
6543-
let latency_color = if latency_ms_val < 3000 {
6544-
("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
6545-
} else if latency_ms_val < 10000 {
6546-
("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
6547-
} else {
6548-
("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
6549-
};
6540+
let latency_color = total_latency_color(latency_ms_val);
6541+
let first_token = event.first_sse_write_ms.map(|first_ms| {
6542+
let first_ms = first_ms.clamp(0, i32::MAX as i64) as i32;
6543+
(first_ms, first_token_latency_color(first_ms))
6544+
});
65506545
let status_ok = event.status_code >= 200 && event.status_code < 300;
65516546
html! {
65526547
<tr class={classes!("border-t", "border-[var(--border)]", "align-top")}>
@@ -6594,12 +6589,14 @@ pub fn admin_llm_gateway_page() -> Html {
65946589
<span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
65956590
{ format_latency_ms(latency_ms_val) }
65966591
</span>
6597-
<div class={classes!("mt-0.5", "text-[10px]", "text-[var(--muted)]")}>
6598-
{ if let Some(first_ms) = event.first_sse_write_ms {
6599-
format!("首字 {}ms", first_ms.max(0))
6592+
<div class={classes!("mt-0.5")}>
6593+
if let Some((first_ms, first_color)) = first_token {
6594+
<span class={classes!("inline-flex", "rounded-full", "border", "px-1.5", "py-0.5", "text-[10px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
6595+
{ format!("首字 {}", format_latency_ms(first_ms)) }
6596+
</span>
66006597
} else {
6601-
"-".to_string()
6602-
}}
6598+
<span class={classes!("text-[10px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
6599+
}
66036600
</div>
66046601
} else {
66056602
<span class={classes!("text-xs", "text-[var(--muted)]")}>{ "-" }</span>
@@ -9174,14 +9171,11 @@ pub fn admin_llm_gateway_page() -> Html {
91749171
&event.request_url,
91759172
&event.endpoint,
91769173
);
9177-
let latency_ms_val = event.latency_ms;
9178-
let latency_color = if latency_ms_val < 3000 {
9179-
("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
9180-
} else if latency_ms_val < 10000 {
9181-
("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
9182-
} else {
9183-
("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
9184-
};
9174+
let latency_color = total_latency_color(event.latency_ms);
9175+
let first_token = event.first_sse_write_ms.map(|first_ms| {
9176+
let first_ms = first_ms.max(0);
9177+
(first_ms, first_token_latency_color(first_ms))
9178+
});
91859179
let status_ok = event.status_code == 200;
91869180
html! {
91879181
<tr class={classes!("border-t", "border-[var(--border)]", "align-top")}>
@@ -9228,12 +9222,14 @@ pub fn admin_llm_gateway_page() -> Html {
92289222
<span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
92299223
{ format_latency_ms(event.latency_ms) }
92309224
</span>
9231-
<div class={classes!("mt-0.5", "text-[10px]", "text-[var(--muted)]")}>
9232-
{ if let Some(first_ms) = event.first_sse_write_ms {
9233-
format!("首字 {}ms", first_ms.max(0))
9225+
<div class={classes!("mt-0.5")}>
9226+
if let Some((first_ms, first_color)) = first_token {
9227+
<span class={classes!("inline-flex", "rounded-full", "border", "px-1.5", "py-0.5", "text-[10px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
9228+
{ format!("首字 {}", format_latency_ms(first_ms)) }
9229+
</span>
92349230
} else {
9235-
"-".to_string()
9236-
}}
9231+
<span class={classes!("text-[10px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
9232+
}
92379233
</div>
92389234
</td>
92399235
<td class={classes!("py-2.5", "pr-3", "whitespace-nowrap", "font-mono", "text-[11px]")}>

crates/frontend/src/pages/llm_access_shared.rs

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,49 @@ pub fn token_usage_missing_label() -> &'static str {
205205
"token usage unavailable"
206206
}
207207

208+
/// Tailwind class tuple `(border, bg, text, dark:text)` for a latency badge — maps a
209+
/// latency value onto a green→red scale. Shared so the admin usage tables and the
210+
/// public usage lookup render latency identically.
211+
pub type LatencyBadgeColor = (&'static str, &'static str, &'static str, &'static str);
212+
213+
/// Render a latency in milliseconds as `"{n} ms"`, clamping negatives to 0.
214+
pub fn format_latency_ms(latency_ms: i32) -> String {
215+
format!("{} ms", latency_ms.max(0))
216+
}
217+
218+
/// Color scale for **first-token latency (首字延迟 / TTFT)**. This only covers the
219+
/// window up to the first streamed byte, which stays small even when the upstream is
220+
/// slow, so the thresholds are tight: `<3s` healthy · `<10s` warning · `>=10s` slow.
221+
pub fn first_token_latency_color(latency_ms: i32) -> LatencyBadgeColor {
222+
if latency_ms < 3_000 {
223+
("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
224+
} else if latency_ms < 10_000 {
225+
("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
226+
} else {
227+
("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
228+
}
229+
}
230+
231+
/// Color scale for **total request latency (整体延迟)**. This spans the whole
232+
/// generation, so a streaming reasoning request can legitimately run well over a
233+
/// minute. The range is therefore much wider than first-token latency and uses five
234+
/// steps (keeping the original `1 : 3 : 6 : 12` spacing) so the common band still shows
235+
/// gradient detail: `<8s` emerald · `<24s` lime · `<48s` amber · `<96s` orange ·
236+
/// `>=96s` red.
237+
pub fn total_latency_color(latency_ms: i32) -> LatencyBadgeColor {
238+
if latency_ms < 8_000 {
239+
("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
240+
} else if latency_ms < 24_000 {
241+
("border-lime-500/20", "bg-lime-500/10", "text-lime-700", "dark:text-lime-200")
242+
} else if latency_ms < 48_000 {
243+
("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
244+
} else if latency_ms < 96_000 {
245+
("border-orange-500/20", "bg-orange-500/10", "text-orange-700", "dark:text-orange-200")
246+
} else {
247+
("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
248+
}
249+
}
250+
208251
pub fn credit_usage_missing_label() -> &'static str {
209252
"credit usage unavailable"
210253
}
@@ -407,8 +450,8 @@ fn parse_gpt_model_rank(slug: &str) -> Option<(i32, i32, i32, i32)> {
407450
mod tests {
408451
use super::{
409452
codex_model_catalog_download_command, codex_provider_config, credit_usage_missing_label,
410-
format_kiro_disabled_reason, preferred_model_slug_from_catalog_json,
411-
token_usage_missing_label,
453+
first_token_latency_color, format_kiro_disabled_reason, format_latency_ms,
454+
preferred_model_slug_from_catalog_json, token_usage_missing_label, total_latency_color,
412455
};
413456

414457
#[test]
@@ -479,4 +522,29 @@ mod tests {
479522
assert_eq!(token_usage_missing_label(), "token usage unavailable");
480523
assert_eq!(credit_usage_missing_label(), "credit usage unavailable");
481524
}
525+
526+
#[test]
527+
fn format_latency_ms_clamps_negatives() {
528+
assert_eq!(format_latency_ms(1_234), "1234 ms");
529+
assert_eq!(format_latency_ms(0), "0 ms");
530+
assert_eq!(format_latency_ms(-5), "0 ms");
531+
}
532+
533+
#[test]
534+
fn first_token_latency_color_uses_tight_thresholds() {
535+
// healthy < 3s, warning < 10s, slow otherwise — boundaries are exclusive.
536+
assert_eq!(first_token_latency_color(2_999).0, "border-emerald-500/20");
537+
assert_eq!(first_token_latency_color(3_000).0, "border-amber-500/20");
538+
assert_eq!(first_token_latency_color(9_999).0, "border-amber-500/20");
539+
assert_eq!(first_token_latency_color(10_000).0, "border-red-500/20");
540+
}
541+
542+
#[test]
543+
fn total_latency_color_spans_a_wider_five_step_range() {
544+
assert_eq!(total_latency_color(7_999).0, "border-emerald-500/20");
545+
assert_eq!(total_latency_color(8_000).0, "border-lime-500/20");
546+
assert_eq!(total_latency_color(24_000).0, "border-amber-500/20");
547+
assert_eq!(total_latency_color(48_000).0, "border-orange-500/20");
548+
assert_eq!(total_latency_color(96_000).0, "border-red-500/20");
549+
}
482550
}

crates/frontend/src/pages/llm_access_usage.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ use crate::{
1010
},
1111
components::{pagination::Pagination, token_usage_trend_chart::TokenUsageTrendChart},
1212
pages::llm_access_shared::{
13-
format_ms, format_number_i64, format_number_u64, token_usage_missing_label,
13+
first_token_latency_color, format_latency_ms, format_ms, format_number_i64,
14+
format_number_u64, token_usage_missing_label, total_latency_color,
1415
},
1516
router::Route,
1617
};
@@ -397,7 +398,6 @@ pub fn llm_access_usage_page() -> Html {
397398
<thead class={classes!("border-b", "border-[var(--border)]", "font-mono", "text-[11px]", "uppercase", "tracking-[0.12em]", "text-[var(--muted)]")}>
398399
<tr>
399400
<th class={classes!("py-2", "pr-3")}>{ "时间 / Event ID" }</th>
400-
<th class={classes!("py-2", "pr-3")}>{ "账号" }</th>
401401
<th class={classes!("py-2", "pr-3")}>{ "请求" }</th>
402402
<th class={classes!("py-2", "pr-3")}>{ "模型 / 状态" }</th>
403403
<th class={classes!("py-2", "pr-3")}>{ "延迟" }</th>
@@ -407,6 +407,11 @@ pub fn llm_access_usage_page() -> Html {
407407
</thead>
408408
<tbody>
409409
{ for response.events.iter().map(|event| {
410+
let latency_color = total_latency_color(event.latency_ms);
411+
let first_token = event.first_sse_write_ms.map(|first_ms| {
412+
let first_ms = first_ms.max(0);
413+
(first_ms, first_token_latency_color(first_ms))
414+
});
410415
html! {
411416
<tr key={event.id.clone()} class={classes!("border-b", "border-[var(--border)]", "align-top")}>
412417
<td class={classes!("py-3", "pr-3", "min-w-[13rem]", "whitespace-nowrap", "font-mono", "text-xs")}>
@@ -415,11 +420,6 @@ pub fn llm_access_usage_page() -> Html {
415420
{ event.id.clone() }
416421
</div>
417422
</td>
418-
<td class={classes!("py-3", "pr-3", "min-w-[10rem]")}>
419-
<span class={classes!("inline-flex", "rounded-full", "border", "border-emerald-500/20", "bg-emerald-500/10", "px-2.5", "py-1", "text-xs", "font-semibold", "text-emerald-700", "dark:text-emerald-200")}>
420-
{ event.account_name.clone().unwrap_or_else(|| "legacy auth".to_string()) }
421-
</span>
422-
</td>
423423
<td class={classes!("py-3", "pr-3", "min-w-[22rem]")}>
424424
<div class={classes!("flex", "items-start", "gap-2")}>
425425
<span class={classes!("inline-flex", "rounded-full", "border", "border-sky-500/20", "bg-sky-500/10", "px-2", "py-1", "text-[11px]", "font-semibold", "uppercase", "tracking-[0.12em]", "text-sky-700", "dark:text-sky-200")}>
@@ -444,8 +444,19 @@ pub fn llm_access_usage_page() -> Html {
444444
</div>
445445
}
446446
</td>
447-
<td class={classes!("py-3", "pr-3", "whitespace-nowrap", "font-mono", "text-xs")}>
448-
{ format!("{} ms", event.latency_ms) }
447+
<td class={classes!("py-3", "pr-3", "whitespace-nowrap")}>
448+
<span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "font-mono", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
449+
{ format_latency_ms(event.latency_ms) }
450+
</span>
451+
<div class={classes!("mt-1")}>
452+
if let Some((first_ms, first_color)) = first_token {
453+
<span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "font-mono", "text-[11px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
454+
{ format!("首字 {}", format_latency_ms(first_ms)) }
455+
</span>
456+
} else {
457+
<span class={classes!("font-mono", "text-[11px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
458+
}
459+
</div>
449460
</td>
450461
<td class={classes!("py-3", "pr-3", "whitespace-nowrap", "font-mono", "text-xs")}>
451462
{ format!("{}/{}", event.client_ip, event.ip_region) }

0 commit comments

Comments
 (0)