Skip to content

Commit 7ebd5f8

Browse files
authored
feat: [router] serve worker KV query over dynamo endpoint instead of nats (#5451)
1 parent 283b20c commit 7ebd5f8

6 files changed

Lines changed: 254 additions & 110 deletions

File tree

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
set -e
5+
trap 'echo Cleaning up...; kill 0' EXIT
6+
7+
# Set deterministic hash for KV event IDs
8+
export PYTHONHASHSEED=0
9+
10+
# Common configuration
11+
MODEL="Qwen/Qwen3-0.6B"
12+
BLOCK_SIZE=64
13+
14+
# run two routers (different HTTP + system ports)
15+
# Note: use --router-reset-states only on one router to avoid wiping shared state twice.
16+
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_R1:-8091} \
17+
python -m dynamo.frontend \
18+
--router-mode kv \
19+
--router-reset-states \
20+
--http-port ${DYN_HTTP_PORT_R1:-8000} &
21+
22+
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_R2:-8092} \
23+
python -m dynamo.frontend \
24+
--router-mode kv \
25+
--http-port ${DYN_HTTP_PORT_R2:-8001} &
26+
27+
# run workers (enable local indexer so routers can query on restart)
28+
DYN_LOCAL_INDEXER=true \
29+
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
30+
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm \
31+
--model $MODEL \
32+
--block-size $BLOCK_SIZE \
33+
--enforce-eager \
34+
--connector none \
35+
--enable-local-indexer true \
36+
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080","enable_kv_cache_events":true}' &
37+
38+
DYN_LOCAL_INDEXER=true \
39+
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
40+
VLLM_NIXL_SIDE_CHANNEL_PORT=20097 \
41+
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \
42+
--model $MODEL \
43+
--block-size $BLOCK_SIZE \
44+
--enforce-eager \
45+
--connector none \
46+
--enable-local-indexer true \
47+
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}'

lib/llm/src/kv_router.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ pub const RADIX_STATE_BUCKET: &str = "radix-bucket";
7979
pub const RADIX_STATE_FILE: &str = "radix-state";
8080

8181
// for worker-local kvindexer query
82-
pub const WORKER_KV_INDEXER_QUERY_SUBJECT: &str = "worker_kv_indexer_query";
82+
pub const WORKER_KV_INDEXER_QUERY_ENDPOINT: &str = "worker_kv_indexer_query";
8383
pub const WORKER_KV_INDEXER_BUFFER_SIZE: usize = 1024; // store 1024 most recent events in worker buffer
8484

8585
// for router discovery registration

lib/llm/src/kv_router/indexer.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use async_trait::async_trait;
3535
use dynamo_runtime::{
3636
component::Component,
3737
metrics::{MetricsHierarchy, prometheus_names::kvrouter},
38+
protocols::maybe_error::MaybeError,
3839
};
3940
use prometheus::{IntCounterVec, Opts};
4041
use serde::{Deserialize, Serialize};
@@ -142,6 +143,21 @@ pub enum WorkerKvQueryResponse {
142143
},
143144
/// Invalid range: end_id < start_id
144145
InvalidRange { start_id: u64, end_id: u64 },
146+
/// Query failed on worker (serialized error)
147+
Error(String),
148+
}
149+
150+
impl MaybeError for WorkerKvQueryResponse {
151+
fn from_err(err: Box<dyn std::error::Error + Send + Sync>) -> Self {
152+
WorkerKvQueryResponse::Error(err.to_string())
153+
}
154+
155+
fn err(&self) -> Option<anyhow::Error> {
156+
match self {
157+
WorkerKvQueryResponse::Error(msg) => Some(anyhow::Error::msg(msg.clone())),
158+
_ => None,
159+
}
160+
}
145161
}
146162

147163
/// A block in the Radix Tree.

lib/llm/src/kv_router/publisher.rs

Lines changed: 7 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,17 @@ use tokio_util::sync::CancellationToken;
1616
use zeromq::{Socket, SocketRecv, SubSocket};
1717

1818
use dynamo_runtime::metrics::{MetricsHierarchy, prometheus_names::kvstats};
19-
use dynamo_runtime::traits::{
20-
DistributedRuntimeProvider, events::EventPublisher, events::EventSubscriber,
21-
};
19+
use dynamo_runtime::traits::{DistributedRuntimeProvider, events::EventPublisher};
2220
use dynamo_runtime::{
2321
component::{Component, Namespace},
2422
transports::nats::{NatsQueue, Slug},
2523
};
26-
use futures::StreamExt;
2724

2825
use crate::kv_router::{
2926
KV_EVENT_SUBJECT, KV_METRICS_SUBJECT, WORKER_KV_INDEXER_BUFFER_SIZE,
30-
WORKER_KV_INDEXER_QUERY_SUBJECT,
31-
indexer::{KvIndexerMetrics, LocalKvIndexer, RouterEvent, WorkerKvQueryRequest},
27+
indexer::{KvIndexerMetrics, LocalKvIndexer, RouterEvent},
3228
protocols::*,
29+
worker_query::start_worker_kv_query_endpoint,
3330
};
3431
use dynamo_runtime::config::environment_names::nats as env_nats;
3532

@@ -173,11 +170,10 @@ impl KvEventPublisher {
173170
.drt()
174171
.runtime()
175172
.secondary()
176-
.spawn(start_worker_kv_query_service(
173+
.spawn(start_worker_kv_query_endpoint(
177174
component,
178175
worker_id,
179176
local_indexer,
180-
cancellation_token.clone(),
181177
))
182178
});
183179

@@ -311,80 +307,6 @@ async fn start_event_processor<P: EventPublisher + Send + Sync + 'static>(
311307
}
312308
}
313309

314-
// Processor for Router -> LocalKvIndexer query service
315-
async fn start_worker_kv_query_service(
316-
component: Component,
317-
worker_id: u64,
318-
local_indexer: Arc<LocalKvIndexer>,
319-
cancellation_token: CancellationToken,
320-
) {
321-
// Create NATS subscriber on a subject specific to worker's id
322-
let subject = format!("{}.{}", WORKER_KV_INDEXER_QUERY_SUBJECT, worker_id);
323-
let mut subscriber = match component.subscribe(&subject).await {
324-
Ok(sub) => sub,
325-
Err(e) => {
326-
tracing::error!(
327-
"Query service failed to subscribe for worker {worker_id} on subject {subject}: {e}"
328-
);
329-
return;
330-
}
331-
};
332-
tracing::info!("Query service listening on NATS for worker {worker_id} on subject {subject}");
333-
334-
// Receive query request from router, retrieve event(s) from LocalKvIndexer, return response
335-
loop {
336-
tokio::select! {
337-
_ = cancellation_token.cancelled() => {
338-
tracing::info!("Query service received cancellation signal for worker {worker_id}");
339-
break;
340-
}
341-
342-
msg = subscriber.next() => {
343-
let Some(msg) = msg else {
344-
tracing::warn!("Query service NATS stream ended for worker {worker_id}");
345-
break;
346-
};
347-
348-
// deserialize from msg (async_nats::Message)
349-
let request: WorkerKvQueryRequest = match serde_json::from_slice(&msg.payload) {
350-
Ok(request) => request,
351-
Err(e) => {
352-
tracing::error!("Failed to deserialize WorkerKvQueryRequest for worker {worker_id}: {e}");
353-
continue;
354-
}
355-
};
356-
357-
tracing::debug!("Received query request for worker {worker_id}: {request:?}");
358-
359-
// Query events based on optional start/end ids
360-
let response = local_indexer
361-
.get_events_in_id_range(request.start_event_id, request.end_event_id)
362-
.await;
363-
364-
// Send reply back (if reply subject exists)
365-
if let Some(reply_subject) = msg.reply {
366-
let payload = match serde_json::to_vec(&response) {
367-
Ok(p) => p,
368-
Err(e) => {
369-
tracing::error!("Failed to serialize response for worker {worker_id}: {e}");
370-
continue;
371-
}
372-
};
373-
374-
// Publish through DRT/NATS directly instead of namespace (adds a prefix)
375-
if let Err(e) = component
376-
.drt()
377-
.kv_router_nats_publish(reply_subject.to_string(), payload.into())
378-
.await
379-
{
380-
tracing::error!("Failed to send reply for worker {worker_id}: {e}");
381-
}
382-
}
383-
}
384-
}
385-
}
386-
}
387-
388310
/// Calculate exponential backoff duration based on consecutive error count
389311
fn calculate_backoff_ms(consecutive_errors: u32) -> u64 {
390312
std::cmp::min(
@@ -1864,6 +1786,9 @@ mod tests_startup_helpers {
18641786
let missed_events = match response {
18651787
crate::kv_router::indexer::WorkerKvQueryResponse::Events(e) => e,
18661788
crate::kv_router::indexer::WorkerKvQueryResponse::TreeDump(e) => e,
1789+
crate::kv_router::indexer::WorkerKvQueryResponse::Error(message) => {
1790+
panic!("Unexpected error response: {message}")
1791+
}
18671792
other => panic!("Unexpected response: {:?}", other),
18681793
};
18691794
assert_eq!(

lib/llm/src/kv_router/subscriber.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,9 @@ pub async fn recover_from_worker(
232232
WorkerKvQueryResponse::InvalidRange { start_id, end_id } => {
233233
anyhow::bail!("Invalid range: end_id ({end_id}) < start_id ({start_id})");
234234
}
235+
WorkerKvQueryResponse::Error(message) => {
236+
anyhow::bail!("Worker {worker_id} query failed: {message}");
237+
}
235238
};
236239

237240
let events_count = events.len();

0 commit comments

Comments
 (0)