feat(frontend): color-code first-token & total latency in usage views (#43)

acking-you · claude · web-flow · commit 484ae9a57909 · 2026-06-14T04:03:42.000+08:00
Map latency magnitude to color across the usage surfaces, on two scales:

- 首字延迟 (first-token / TTFT): tight 3-step scale (&lt;3s green / &lt;10s amber
  / &gt;=10s red) — the pre-first-byte window stays small even when upstreams
  are slow.
- 整体延迟 (total request latency): wider 5-step scale that keeps the original
  1:3:6:12 spacing (&lt;8s emerald / &lt;24s lime / &lt;48s amber / &lt;96s orange /
  &gt;=96s red), since a streaming reasoning request can legitimately run well
  over a minute.

Changes:
- llm_access_shared.rs: add shared LatencyBadgeColor alias, format_latency_ms,
  first_token_latency_color and total_latency_color helpers + unit tests.
- admin_llm_gateway.rs: the Usage tab and Usage-journal-preview tab now use
  the shared helpers; first-token latency is rendered as a color-coded badge
  instead of plain muted text. The private format_latency_ms is dropped in
  favor of the shared one.
- llm_access_usage.rs (public /llm-access/usage): the latency cell reuses the
  colored-badge style and now shows first-token latency; the 账号 (account)
  column is removed.

Co-authored-by: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/crates/frontend/src/pages/admin_llm_gateway.rs b/crates/frontend/src/pages/admin_llm_gateway.rs
@@ -61,8 +61,9 @@ use crate::{
         search_box::SearchBox, status_badge::StatusBadge, tab_bar::render_tab_bar,
     },
     pages::llm_access_shared::{
-        confirm_destructive, credit_usage_missing_label, format_ms, format_number_i64,
-        format_number_u64, token_usage_missing_label, MaskedSecretCode,
+        confirm_destructive, credit_usage_missing_label, first_token_latency_color,
+        format_latency_ms, format_ms, format_number_i64, format_number_u64, total_latency_color,
+        token_usage_missing_label, MaskedSecretCode,
     },
     router::Route,
 };
@@ -297,10 +298,6 @@ enum KeySortMode {
     UsageDesc,
 }
 
-fn format_latency_ms(latency_ms: i32) -> String {
-    format!("{} ms", latency_ms.max(0))
-}
-
 fn format_optional_latency_ms(latency_ms: Option<i32>) -> String {
     latency_ms
         .map(format_latency_ms)
@@ -6540,13 +6537,11 @@ pub fn admin_llm_gateway_page() -> Html {
                                                             })
                                                         };
                                                         let latency_ms_val = event.latency_ms.unwrap_or(0) as i32;
-                                                        let latency_color = if latency_ms_val < 3000 {
-                                                            ("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
-                                                        } else if latency_ms_val < 10000 {
-                                                            ("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
-                                                        } else {
-                                                            ("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
-                                                        };
+                                                        let latency_color = total_latency_color(latency_ms_val);
+                                                        let first_token = event.first_sse_write_ms.map(|first_ms| {
+                                                            let first_ms = first_ms.clamp(0, i32::MAX as i64) as i32;
+                                                            (first_ms, first_token_latency_color(first_ms))
+                                                        });
                                                         let status_ok = event.status_code >= 200 && event.status_code < 300;
                                                         html! {
                                                             <tr class={classes!("border-t", "border-[var(--border)]", "align-top")}>
@@ -6594,12 +6589,14 @@ pub fn admin_llm_gateway_page() -> Html {
                                                                         <span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
                                                                             { format_latency_ms(latency_ms_val) }
                                                                         </span>
-                                                                        <div class={classes!("mt-0.5", "text-[10px]", "text-[var(--muted)]")}>
-                                                                            { if let Some(first_ms) = event.first_sse_write_ms {
-                                                                                format!("首字 {}ms", first_ms.max(0))
+                                                                        <div class={classes!("mt-0.5")}>
+                                                                            if let Some((first_ms, first_color)) = first_token {
+                                                                                <span class={classes!("inline-flex", "rounded-full", "border", "px-1.5", "py-0.5", "text-[10px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
+                                                                                    { format!("首字 {}", format_latency_ms(first_ms)) }
+                                                                                </span>
                                                                             } else {
-                                                                                "-".to_string()
-                                                                            }}
+                                                                                <span class={classes!("text-[10px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
+                                                                            }
                                                                         </div>
                                                                     } else {
                                                                         <span class={classes!("text-xs", "text-[var(--muted)]")}>{ "-" }</span>
@@ -9174,14 +9171,11 @@ pub fn admin_llm_gateway_page() -> Html {
                                             &event.request_url,
                                             &event.endpoint,
                                         );
-                                        let latency_ms_val = event.latency_ms;
-                                        let latency_color = if latency_ms_val < 3000 {
-                                            ("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
-                                        } else if latency_ms_val < 10000 {
-                                            ("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
-                                        } else {
-                                            ("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
-                                        };
+                                        let latency_color = total_latency_color(event.latency_ms);
+                                        let first_token = event.first_sse_write_ms.map(|first_ms| {
+                                            let first_ms = first_ms.max(0);
+                                            (first_ms, first_token_latency_color(first_ms))
+                                        });
                                         let status_ok = event.status_code == 200;
                                         html! {
                                             <tr class={classes!("border-t", "border-[var(--border)]", "align-top")}>
@@ -9228,12 +9222,14 @@ pub fn admin_llm_gateway_page() -> Html {
                                                     <span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
                                                         { format_latency_ms(event.latency_ms) }
                                                     </span>
-                                                    <div class={classes!("mt-0.5", "text-[10px]", "text-[var(--muted)]")}>
-                                                        { if let Some(first_ms) = event.first_sse_write_ms {
-                                                            format!("首字 {}ms", first_ms.max(0))
+                                                    <div class={classes!("mt-0.5")}>
+                                                        if let Some((first_ms, first_color)) = first_token {
+                                                            <span class={classes!("inline-flex", "rounded-full", "border", "px-1.5", "py-0.5", "text-[10px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
+                                                                { format!("首字 {}", format_latency_ms(first_ms)) }
+                                                            </span>
                                                         } else {
-                                                            "-".to_string()
-                                                        }}
+                                                            <span class={classes!("text-[10px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
+                                                        }
                                                     </div>
                                                 </td>
                                                 <td class={classes!("py-2.5", "pr-3", "whitespace-nowrap", "font-mono", "text-[11px]")}>
diff --git a/crates/frontend/src/pages/llm_access_shared.rs b/crates/frontend/src/pages/llm_access_shared.rs
@@ -205,6 +205,49 @@ pub fn token_usage_missing_label() -> &'static str {
     "token usage unavailable"
 }
 
+/// Tailwind class tuple `(border, bg, text, dark:text)` for a latency badge — maps a
+/// latency value onto a green→red scale. Shared so the admin usage tables and the
+/// public usage lookup render latency identically.
+pub type LatencyBadgeColor = (&'static str, &'static str, &'static str, &'static str);
+
+/// Render a latency in milliseconds as `"{n} ms"`, clamping negatives to 0.
+pub fn format_latency_ms(latency_ms: i32) -> String {
+    format!("{} ms", latency_ms.max(0))
+}
+
+/// Color scale for **first-token latency (首字延迟 / TTFT)**. This only covers the
+/// window up to the first streamed byte, which stays small even when the upstream is
+/// slow, so the thresholds are tight: `<3s` healthy · `<10s` warning · `>=10s` slow.
+pub fn first_token_latency_color(latency_ms: i32) -> LatencyBadgeColor {
+    if latency_ms < 3_000 {
+        ("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
+    } else if latency_ms < 10_000 {
+        ("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
+    } else {
+        ("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
+    }
+}
+
+/// Color scale for **total request latency (整体延迟)**. This spans the whole
+/// generation, so a streaming reasoning request can legitimately run well over a
+/// minute. The range is therefore much wider than first-token latency and uses five
+/// steps (keeping the original `1 : 3 : 6 : 12` spacing) so the common band still shows
+/// gradient detail: `<8s` emerald · `<24s` lime · `<48s` amber · `<96s` orange ·
+/// `>=96s` red.
+pub fn total_latency_color(latency_ms: i32) -> LatencyBadgeColor {
+    if latency_ms < 8_000 {
+        ("border-emerald-500/20", "bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200")
+    } else if latency_ms < 24_000 {
+        ("border-lime-500/20", "bg-lime-500/10", "text-lime-700", "dark:text-lime-200")
+    } else if latency_ms < 48_000 {
+        ("border-amber-500/20", "bg-amber-500/10", "text-amber-700", "dark:text-amber-200")
+    } else if latency_ms < 96_000 {
+        ("border-orange-500/20", "bg-orange-500/10", "text-orange-700", "dark:text-orange-200")
+    } else {
+        ("border-red-500/20", "bg-red-500/10", "text-red-700", "dark:text-red-200")
+    }
+}
+
 pub fn credit_usage_missing_label() -> &'static str {
     "credit usage unavailable"
 }
@@ -407,8 +450,8 @@ fn parse_gpt_model_rank(slug: &str) -> Option<(i32, i32, i32, i32)> {
 mod tests {
     use super::{
         codex_model_catalog_download_command, codex_provider_config, credit_usage_missing_label,
-        format_kiro_disabled_reason, preferred_model_slug_from_catalog_json,
-        token_usage_missing_label,
+        first_token_latency_color, format_kiro_disabled_reason, format_latency_ms,
+        preferred_model_slug_from_catalog_json, token_usage_missing_label, total_latency_color,
     };
 
     #[test]
@@ -479,4 +522,29 @@ mod tests {
         assert_eq!(token_usage_missing_label(), "token usage unavailable");
         assert_eq!(credit_usage_missing_label(), "credit usage unavailable");
     }
+
+    #[test]
+    fn format_latency_ms_clamps_negatives() {
+        assert_eq!(format_latency_ms(1_234), "1234 ms");
+        assert_eq!(format_latency_ms(0), "0 ms");
+        assert_eq!(format_latency_ms(-5), "0 ms");
+    }
+
+    #[test]
+    fn first_token_latency_color_uses_tight_thresholds() {
+        // healthy < 3s, warning < 10s, slow otherwise — boundaries are exclusive.
+        assert_eq!(first_token_latency_color(2_999).0, "border-emerald-500/20");
+        assert_eq!(first_token_latency_color(3_000).0, "border-amber-500/20");
+        assert_eq!(first_token_latency_color(9_999).0, "border-amber-500/20");
+        assert_eq!(first_token_latency_color(10_000).0, "border-red-500/20");
+    }
+
+    #[test]
+    fn total_latency_color_spans_a_wider_five_step_range() {
+        assert_eq!(total_latency_color(7_999).0, "border-emerald-500/20");
+        assert_eq!(total_latency_color(8_000).0, "border-lime-500/20");
+        assert_eq!(total_latency_color(24_000).0, "border-amber-500/20");
+        assert_eq!(total_latency_color(48_000).0, "border-orange-500/20");
+        assert_eq!(total_latency_color(96_000).0, "border-red-500/20");
+    }
 }
diff --git a/crates/frontend/src/pages/llm_access_usage.rs b/crates/frontend/src/pages/llm_access_usage.rs
@@ -10,7 +10,8 @@ use crate::{
     },
     components::{pagination::Pagination, token_usage_trend_chart::TokenUsageTrendChart},
     pages::llm_access_shared::{
-        format_ms, format_number_i64, format_number_u64, token_usage_missing_label,
+        first_token_latency_color, format_latency_ms, format_ms, format_number_i64,
+        format_number_u64, token_usage_missing_label, total_latency_color,
     },
     router::Route,
 };
@@ -397,7 +398,6 @@ pub fn llm_access_usage_page() -> Html {
                                 <thead class={classes!("border-b", "border-[var(--border)]", "font-mono", "text-[11px]", "uppercase", "tracking-[0.12em]", "text-[var(--muted)]")}>
                                     <tr>
                                         <th class={classes!("py-2", "pr-3")}>{ "时间 / Event ID" }</th>
-                                        <th class={classes!("py-2", "pr-3")}>{ "账号" }</th>
                                         <th class={classes!("py-2", "pr-3")}>{ "请求" }</th>
                                         <th class={classes!("py-2", "pr-3")}>{ "模型 / 状态" }</th>
                                         <th class={classes!("py-2", "pr-3")}>{ "延迟" }</th>
@@ -407,6 +407,11 @@ pub fn llm_access_usage_page() -> Html {
                                 </thead>
                                 <tbody>
                                     { for response.events.iter().map(|event| {
+                                        let latency_color = total_latency_color(event.latency_ms);
+                                        let first_token = event.first_sse_write_ms.map(|first_ms| {
+                                            let first_ms = first_ms.max(0);
+                                            (first_ms, first_token_latency_color(first_ms))
+                                        });
                                         html! {
                                             <tr key={event.id.clone()} class={classes!("border-b", "border-[var(--border)]", "align-top")}>
                                                 <td class={classes!("py-3", "pr-3", "min-w-[13rem]", "whitespace-nowrap", "font-mono", "text-xs")}>
@@ -415,11 +420,6 @@ pub fn llm_access_usage_page() -> Html {
                                                         { event.id.clone() }
                                                     </div>
                                                 </td>
-                                                <td class={classes!("py-3", "pr-3", "min-w-[10rem]")}>
-                                                    <span class={classes!("inline-flex", "rounded-full", "border", "border-emerald-500/20", "bg-emerald-500/10", "px-2.5", "py-1", "text-xs", "font-semibold", "text-emerald-700", "dark:text-emerald-200")}>
-                                                        { event.account_name.clone().unwrap_or_else(|| "legacy auth".to_string()) }
-                                                    </span>
-                                                </td>
                                                 <td class={classes!("py-3", "pr-3", "min-w-[22rem]")}>
                                                     <div class={classes!("flex", "items-start", "gap-2")}>
                                                         <span class={classes!("inline-flex", "rounded-full", "border", "border-sky-500/20", "bg-sky-500/10", "px-2", "py-1", "text-[11px]", "font-semibold", "uppercase", "tracking-[0.12em]", "text-sky-700", "dark:text-sky-200")}>
@@ -444,8 +444,19 @@ pub fn llm_access_usage_page() -> Html {
                                                         </div>
                                                     }
                                                 </td>
-                                                <td class={classes!("py-3", "pr-3", "whitespace-nowrap", "font-mono", "text-xs")}>
-                                                    { format!("{} ms", event.latency_ms) }
+                                                <td class={classes!("py-3", "pr-3", "whitespace-nowrap")}>
+                                                    <span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "font-mono", "text-[11px]", "font-semibold", latency_color.0, latency_color.1, latency_color.2, latency_color.3)}>
+                                                        { format_latency_ms(event.latency_ms) }
+                                                    </span>
+                                                    <div class={classes!("mt-1")}>
+                                                        if let Some((first_ms, first_color)) = first_token {
+                                                            <span class={classes!("inline-flex", "rounded-full", "border", "px-2", "py-0.5", "font-mono", "text-[11px]", "font-semibold", first_color.0, first_color.1, first_color.2, first_color.3)}>
+                                                                { format!("首字 {}", format_latency_ms(first_ms)) }
+                                                            </span>
+                                                        } else {
+                                                            <span class={classes!("font-mono", "text-[11px]", "text-[var(--muted)]")}>{ "首字 -" }</span>
+                                                        }
+                                                    </div>
                                                 </td>
                                                 <td class={classes!("py-3", "pr-3", "whitespace-nowrap", "font-mono", "text-xs")}>
                                                     { format!("{}/{}", event.client_ip, event.ip_region) }