refactor(routing): rename hint:reasoning-quick → hint:chat (tinyhumansai#1801)

senamakel · web-flow · commit 8dda038280fa · 2026-05-15T03:02:23.000-07:00
diff --git a/src/openhuman/agent/agents/loader.rs b/src/openhuman/agent/agents/loader.rs
@@ -301,9 +301,9 @@ mod tests {
     }
 
     #[test]
-    fn orchestrator_has_reasoning_hint_and_named_tools() {
+    fn orchestrator_has_chat_hint_and_named_tools() {
         let def = find("orchestrator");
-        assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "reasoning-quick"));
+        assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "chat"));
         match def.tools {
             ToolScope::Named(tools) => {
                 // spawn_subagent was removed in #1141; spawn_worker_thread is the replacement
diff --git a/src/openhuman/agent/agents/orchestrator/agent.toml b/src/openhuman/agent/agents/orchestrator/agent.toml
@@ -61,14 +61,15 @@ subagents = [
 ]
 
 [model]
-# `reasoning-quick` (Kimi K2.6 Turbo on Fireworks via backend PR #760)
-# is tuned for low time-to-first-token on conversational turns. The
-# orchestrator is a planner/router that mostly picks a delegate and
-# synthesises sub-agent output — workload that doesn't benefit from
-# the slower deep-reasoning tier. Sub-agents that need heavier
-# reasoning can still opt into `reasoning-v1` (DeepSeek V4 Pro) via
+# Front-line conversational agent: TTFT dominates UX. `hint:chat` resolves
+# to the fast chat tier (`reasoning-quick-v1` / Kimi K2.6 Turbo on
+# Fireworks via backend PR #760, 128k context, `supportsThinking: false`).
+# The orchestrator is a planner/router that picks a delegate and
+# synthesises sub-agent output — workload that doesn't benefit from the
+# slower deep-reasoning tier. Sub-agents that need heavier reasoning
+# can still opt into `reasoning-v1` (DeepSeek V4 Pro) via
 # `ModelSpec::Hint("reasoning")` in their own definitions.
-hint = "reasoning-quick"
+hint = "chat"
 
 [tools]
 # Direct tools — things the orchestrator calls itself rather than
diff --git a/src/openhuman/config/schema/types.rs b/src/openhuman/config/schema/types.rs
@@ -9,12 +9,13 @@ use std::path::PathBuf;
 /// Standard model identifiers matching the backend model registry.
 pub const MODEL_AGENTIC_V1: &str = "agentic-v1";
 pub const MODEL_REASONING_V1: &str = "reasoning-v1";
-/// Low-latency reasoning tier. Backend maps this to Kimi K2.6 Turbo on
+/// Low-latency chat tier. Backend maps this to Kimi K2.6 Turbo on
 /// Fireworks (128k context, `supportsThinking: false`) — tuned for
 /// time-to-first-token on conversational turns. See backend PR #760.
 /// The orchestrator (user-facing front-line agent) rides on this tier
-/// by default so chat responses feel snappy; reach for the slower
-/// `reasoning-v1` (DeepSeek V4 Pro) only when deep reasoning is needed.
+/// by default (via `hint:chat`) so chat responses feel snappy; reach
+/// for the slower `reasoning-v1` (DeepSeek V4 Pro) only when deep
+/// reasoning is needed.
 pub const MODEL_REASONING_QUICK_V1: &str = "reasoning-quick-v1";
 pub const MODEL_CODING_V1: &str = "coding-v1";
 /// Default model used when no explicit model is configured.
diff --git a/src/openhuman/providers/router.rs b/src/openhuman/providers/router.rs
@@ -10,7 +10,7 @@ use std::collections::HashMap;
 fn openhuman_tier_to_hint(model: &str) -> Option<&'static str> {
     match model {
         "reasoning-v1" => Some("reasoning"),
-        "reasoning-quick-v1" => Some("reasoning-quick"),
+        "reasoning-quick-v1" => Some("chat"),
         "agentic-v1" => Some("agentic"),
         "coding-v1" => Some("coding"),
         "summarization-v1" => Some("summarization"),
diff --git a/src/openhuman/routing/policy.rs b/src/openhuman/routing/policy.rs
@@ -91,7 +91,10 @@ impl RoutingTarget {
 /// - `hint:reaction`, `hint:classify`, `hint:format`, `hint:sentiment`,
 ///   `hint:lightweight` → [`TaskCategory::Lightweight`]
 /// - `hint:summarize`, `hint:medium`, `hint:tool_lite` → [`TaskCategory::Medium`]
-/// - All other `hint:*` values and exact model names → [`TaskCategory::Heavy`]
+/// - `hint:chat`, `hint:reasoning`, and all other `hint:*` values and exact
+///   model names → [`TaskCategory::Heavy`]. `hint:chat` is the orchestrator's
+///   front-line conversational tier — it must always go remote because the
+///   local model is too slow for the TTFT budget that motivated the hint.
 pub fn classify(model: &str) -> TaskCategory {
     match model.strip_prefix("hint:") {
         Some("reaction" | "classify" | "format" | "sentiment" | "lightweight") => {
diff --git a/src/openhuman/routing/provider.rs b/src/openhuman/routing/provider.rs
@@ -95,7 +95,10 @@ impl IntelligentRoutingProvider {
         // Keep remote model naming aligned with backend modelRegistry.
         match requested_model.strip_prefix("hint:") {
             Some("reasoning") => MODEL_REASONING_V1.to_string(),
-            Some("reasoning-quick") => MODEL_REASONING_QUICK_V1.to_string(),
+            // Orchestrator's low-TTFT chat tier — Kimi K2.6 Turbo on the
+            // backend's `reasoning-quick-v1`. Backend support added in
+            // tinyhumansai/backend#760.
+            Some("chat") => MODEL_REASONING_QUICK_V1.to_string(),
             Some("agentic") => MODEL_AGENTIC_V1.to_string(),
             Some("coding") => MODEL_CODING_V1.to_string(),
             _ => requested_model.to_string(),
diff --git a/src/openhuman/routing/provider_tests.rs b/src/openhuman/routing/provider_tests.rs
@@ -387,6 +387,29 @@ async fn regression_reasoning_hint_routes_remote_with_backend_model_name() {
     assert_eq!(local.calls(), 0);
 }
 
+#[tokio::test]
+async fn regression_chat_hint_routes_remote_as_reasoning_quick_v1() {
+    let local = MockProvider::new("local", "l");
+    let remote = MockProvider::new("remote", "r");
+    let health = LocalHealthChecker::seeded(true);
+
+    let r = router(
+        Arc::clone(&local),
+        Arc::clone(&remote),
+        health,
+        RoutingHints::default(),
+    );
+    r.chat_with_system(None, "hi", "hint:chat", 0.7)
+        .await
+        .unwrap();
+
+    // hint:chat must be translated to the backend's reasoning-quick-v1 tier
+    // (Kimi K2.6 Turbo). Sending the literal "hint:chat" would 400 on the
+    // backend since modelRegistry has no `hint:*` aliases.
+    assert_eq!(remote.last_model(), "reasoning-quick-v1");
+    assert_eq!(local.calls(), 0);
+}
+
 #[tokio::test]
 async fn remote_failure_propagates_without_local_fallback() {
     let local = MockProvider::new("local", "l");