Skip to content

Commit 5056432

Browse files
authored
fix: disabling sse keep-alive (NVIDIA#408)
1 parent 411f07e commit 5056432

2 files changed

Lines changed: 17 additions & 6 deletions

File tree

lib/llm/src/http/service.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use crate::types::openai::{
5252
use std::{
5353
collections::HashMap,
5454
sync::{Arc, Mutex},
55+
time::Duration,
5556
};
5657

5758
#[derive(Clone)]
@@ -191,6 +192,7 @@ pub struct DeploymentState {
191192
completion_engines: Arc<Mutex<ModelEngines<OpenAICompletionsStreamingEngine>>>,
192193
chat_completion_engines: Arc<Mutex<ModelEngines<OpenAIChatCompletionsStreamingEngine>>>,
193194
metrics: Arc<Metrics>,
195+
sse_keep_alive: Option<Duration>,
194196
}
195197

196198
impl DeploymentState {
@@ -199,6 +201,7 @@ impl DeploymentState {
199201
completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
200202
chat_completion_engines: Arc::new(Mutex::new(ModelEngines::default())),
201203
metrics: Arc::new(Metrics::default()),
204+
sse_keep_alive: None,
202205
}
203206
}
204207

lib/llm/src/http/service/openai.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,13 @@ async fn completions(
182182
let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
183183
let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
184184

185-
Ok(Sse::new(stream)
186-
.keep_alive(KeepAlive::default())
187-
.into_response())
185+
let mut sse_stream = Sse::new(stream);
186+
187+
if let Some(keep_alive) = state.sse_keep_alive {
188+
sse_stream = sse_stream.keep_alive(KeepAlive::default().interval(keep_alive));
189+
}
190+
191+
Ok(sse_stream.into_response())
188192
} else {
189193
let response = CompletionResponse::from_annotated_stream(stream.into())
190194
.await
@@ -270,9 +274,13 @@ async fn chat_completions(
270274
let stream = stream.map(|response| Event::try_from(EventConverter::from(response)));
271275
let stream = monitor_for_disconnects(stream.boxed(), ctx, inflight).await;
272276

273-
Ok(Sse::new(stream)
274-
.keep_alive(KeepAlive::default())
275-
.into_response())
277+
let mut sse_stream = Sse::new(stream);
278+
279+
if let Some(keep_alive) = state.sse_keep_alive {
280+
sse_stream = sse_stream.keep_alive(KeepAlive::default().interval(keep_alive));
281+
}
282+
283+
Ok(sse_stream.into_response())
276284
} else {
277285
let response = NvCreateChatCompletionResponse::from_annotated_stream(stream.into())
278286
.await

0 commit comments

Comments
 (0)