Skip to content

Commit 8dda038

Browse files
authored
refactor(routing): rename hint:reasoning-quick → hint:chat (tinyhumansai#1801)
1 parent aa57a33 commit 8dda038

7 files changed

Lines changed: 46 additions & 15 deletions

File tree

src/openhuman/agent/agents/loader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,9 @@ mod tests {
301301
}
302302

303303
#[test]
304-
fn orchestrator_has_reasoning_hint_and_named_tools() {
304+
fn orchestrator_has_chat_hint_and_named_tools() {
305305
let def = find("orchestrator");
306-
assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "reasoning-quick"));
306+
assert!(matches!(def.model, ModelSpec::Hint(ref h) if h == "chat"));
307307
match def.tools {
308308
ToolScope::Named(tools) => {
309309
// spawn_subagent was removed in #1141; spawn_worker_thread is the replacement

src/openhuman/agent/agents/orchestrator/agent.toml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@ subagents = [
6161
]
6262

6363
[model]
64-
# `reasoning-quick` (Kimi K2.6 Turbo on Fireworks via backend PR #760)
65-
# is tuned for low time-to-first-token on conversational turns. The
66-
# orchestrator is a planner/router that mostly picks a delegate and
67-
# synthesises sub-agent output — workload that doesn't benefit from
68-
# the slower deep-reasoning tier. Sub-agents that need heavier
69-
# reasoning can still opt into `reasoning-v1` (DeepSeek V4 Pro) via
64+
# Front-line conversational agent: TTFT dominates UX. `hint:chat` resolves
65+
# to the fast chat tier (`reasoning-quick-v1` / Kimi K2.6 Turbo on
66+
# Fireworks via backend PR #760, 128k context, `supportsThinking: false`).
67+
# The orchestrator is a planner/router that picks a delegate and
68+
# synthesises sub-agent output — workload that doesn't benefit from the
69+
# slower deep-reasoning tier. Sub-agents that need heavier reasoning
70+
# can still opt into `reasoning-v1` (DeepSeek V4 Pro) via
7071
# `ModelSpec::Hint("reasoning")` in their own definitions.
71-
hint = "reasoning-quick"
72+
hint = "chat"
7273

7374
[tools]
7475
# Direct tools — things the orchestrator calls itself rather than

src/openhuman/config/schema/types.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@ use std::path::PathBuf;
99
/// Standard model identifiers matching the backend model registry.
1010
pub const MODEL_AGENTIC_V1: &str = "agentic-v1";
1111
pub const MODEL_REASONING_V1: &str = "reasoning-v1";
12-
/// Low-latency reasoning tier. Backend maps this to Kimi K2.6 Turbo on
12+
/// Low-latency chat tier. Backend maps this to Kimi K2.6 Turbo on
1313
/// Fireworks (128k context, `supportsThinking: false`) — tuned for
1414
/// time-to-first-token on conversational turns. See backend PR #760.
1515
/// The orchestrator (user-facing front-line agent) rides on this tier
16-
/// by default so chat responses feel snappy; reach for the slower
17-
/// `reasoning-v1` (DeepSeek V4 Pro) only when deep reasoning is needed.
16+
/// by default (via `hint:chat`) so chat responses feel snappy; reach
17+
/// for the slower `reasoning-v1` (DeepSeek V4 Pro) only when deep
18+
/// reasoning is needed.
1819
pub const MODEL_REASONING_QUICK_V1: &str = "reasoning-quick-v1";
1920
pub const MODEL_CODING_V1: &str = "coding-v1";
2021
/// Default model used when no explicit model is configured.

src/openhuman/providers/router.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use std::collections::HashMap;
1010
fn openhuman_tier_to_hint(model: &str) -> Option<&'static str> {
1111
match model {
1212
"reasoning-v1" => Some("reasoning"),
13-
"reasoning-quick-v1" => Some("reasoning-quick"),
13+
"reasoning-quick-v1" => Some("chat"),
1414
"agentic-v1" => Some("agentic"),
1515
"coding-v1" => Some("coding"),
1616
"summarization-v1" => Some("summarization"),

src/openhuman/routing/policy.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,10 @@ impl RoutingTarget {
9191
/// - `hint:reaction`, `hint:classify`, `hint:format`, `hint:sentiment`,
9292
/// `hint:lightweight` → [`TaskCategory::Lightweight`]
9393
/// - `hint:summarize`, `hint:medium`, `hint:tool_lite` → [`TaskCategory::Medium`]
94-
/// - All other `hint:*` values and exact model names → [`TaskCategory::Heavy`]
94+
/// - `hint:chat`, `hint:reasoning`, and all other `hint:*` values and exact
95+
/// model names → [`TaskCategory::Heavy`]. `hint:chat` is the orchestrator's
96+
/// front-line conversational tier — it must always go remote because the
97+
/// local model is too slow for the TTFT budget that motivated the hint.
9598
pub fn classify(model: &str) -> TaskCategory {
9699
match model.strip_prefix("hint:") {
97100
Some("reaction" | "classify" | "format" | "sentiment" | "lightweight") => {

src/openhuman/routing/provider.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ impl IntelligentRoutingProvider {
9595
// Keep remote model naming aligned with backend modelRegistry.
9696
match requested_model.strip_prefix("hint:") {
9797
Some("reasoning") => MODEL_REASONING_V1.to_string(),
98-
Some("reasoning-quick") => MODEL_REASONING_QUICK_V1.to_string(),
98+
// Orchestrator's low-TTFT chat tier — Kimi K2.6 Turbo on the
99+
// backend's `reasoning-quick-v1`. Backend support added in
100+
// tinyhumansai/backend#760.
101+
Some("chat") => MODEL_REASONING_QUICK_V1.to_string(),
99102
Some("agentic") => MODEL_AGENTIC_V1.to_string(),
100103
Some("coding") => MODEL_CODING_V1.to_string(),
101104
_ => requested_model.to_string(),

src/openhuman/routing/provider_tests.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,29 @@ async fn regression_reasoning_hint_routes_remote_with_backend_model_name() {
387387
assert_eq!(local.calls(), 0);
388388
}
389389

390+
#[tokio::test]
391+
async fn regression_chat_hint_routes_remote_as_reasoning_quick_v1() {
392+
let local = MockProvider::new("local", "l");
393+
let remote = MockProvider::new("remote", "r");
394+
let health = LocalHealthChecker::seeded(true);
395+
396+
let r = router(
397+
Arc::clone(&local),
398+
Arc::clone(&remote),
399+
health,
400+
RoutingHints::default(),
401+
);
402+
r.chat_with_system(None, "hi", "hint:chat", 0.7)
403+
.await
404+
.unwrap();
405+
406+
// hint:chat must be translated to the backend's reasoning-quick-v1 tier
407+
// (Kimi K2.6 Turbo). Sending the literal "hint:chat" would 400 on the
408+
// backend since modelRegistry has no `hint:*` aliases.
409+
assert_eq!(remote.last_model(), "reasoning-quick-v1");
410+
assert_eq!(local.calls(), 0);
411+
}
412+
390413
#[tokio::test]
391414
async fn remote_failure_propagates_without_local_fallback() {
392415
let local = MockProvider::new("local", "l");

0 commit comments

Comments
 (0)