Skip to content

Commit 2a85f72

Browse files
authored
server : handle closed connection for tasks (ggml-org#18459)
1 parent 7cbec34 commit 2a85f72

1 file changed

Lines changed: 51 additions & 12 deletions

File tree

tools/server/server-context.cpp

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2960,19 +2960,22 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
29602960
// in streaming mode, the first error must be treated as non-stream response
29612961
// this is to match the OAI API behavior
29622962
// ref: https://github.com/ggml-org/llama.cpp/pull/16486#discussion_r2419657309
2963-
server_task_result_ptr first_result = rd.next(req.should_stop);
2963+
auto first_result = rd.next(req.should_stop);
29642964
if (first_result == nullptr) {
2965+
GGML_ASSERT(req.should_stop());
29652966
return res; // connection is closed
2966-
} else if (first_result->is_error()) {
2967+
}
2968+
2969+
if (first_result->is_error()) {
29672970
res->error(first_result->to_json());
29682971
return res;
2969-
} else {
2970-
GGML_ASSERT(
2971-
dynamic_cast<server_task_result_cmpl_partial*>(first_result.get()) != nullptr
2972-
|| dynamic_cast<server_task_result_cmpl_final*>(first_result.get()) != nullptr
2973-
);
29742972
}
29752973

2974+
GGML_ASSERT(
2975+
dynamic_cast<server_task_result_cmpl_partial*>(first_result.get()) != nullptr ||
2976+
dynamic_cast<server_task_result_cmpl_final*> (first_result.get()) != nullptr
2977+
);
2978+
29762979
// next responses are streamed
29772980
// to be sent immediately
29782981
json first_result_json = first_result->to_json();
@@ -3028,6 +3031,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
30283031
auto result = rd.next(req.should_stop);
30293032
if (result == nullptr) {
30303033
SRV_DBG("%s", "stopping streaming due to should_stop condition\n");
3034+
GGML_ASSERT(req.should_stop());
30313035
return false; // should_stop condition met
30323036
}
30333037

@@ -3111,6 +3115,11 @@ void server_routes::init_routes() {
31113115

31123116
// get the result
31133117
auto result = res->rd.next(req.should_stop);
3118+
if (!result) {
3119+
// connection was closed
3120+
GGML_ASSERT(req.should_stop());
3121+
return res;
3122+
}
31143123

31153124
if (result->is_error()) {
31163125
res->error(result->to_json());
@@ -3211,6 +3220,11 @@ void server_routes::init_routes() {
32113220

32123221
// get the result
32133222
auto result = res->rd.next(req.should_stop);
3223+
if (!result) {
3224+
// connection was closed
3225+
GGML_ASSERT(req.should_stop());
3226+
return res;
3227+
}
32143228

32153229
if (result->is_error()) {
32163230
res->error(result->to_json());
@@ -3717,7 +3731,12 @@ void server_routes::init_routes() {
37173731
}
37183732

37193733
// get the result
3720-
server_task_result_ptr result = rd.next(req.should_stop);
3734+
auto result = rd.next(req.should_stop);
3735+
if (!result) {
3736+
// connection was closed
3737+
GGML_ASSERT(req.should_stop());
3738+
return res;
3739+
}
37213740

37223741
if (result->is_error()) {
37233742
res->error(result->to_json());
@@ -3746,7 +3765,12 @@ void server_routes::init_routes() {
37463765
}
37473766

37483767
// get the result
3749-
server_task_result_ptr result = rd.next(req.should_stop);
3768+
auto result = rd.next(req.should_stop);
3769+
if (!result) {
3770+
// connection was closed
3771+
GGML_ASSERT(req.should_stop());
3772+
return res;
3773+
}
37503774

37513775
if (result->is_error()) {
37523776
res->error(result->to_json());
@@ -3779,7 +3803,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_save(const ser
37793803
rd.post_task(std::move(task));
37803804
}
37813805

3782-
server_task_result_ptr result = rd.next(req.should_stop);
3806+
auto result = rd.next(req.should_stop);
3807+
if (!result) {
3808+
// connection was closed
3809+
GGML_ASSERT(req.should_stop());
3810+
return res;
3811+
}
37833812

37843813
if (result->is_error()) {
37853814
res->error(result->to_json());
@@ -3810,7 +3839,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_restore(const
38103839
rd.post_task(std::move(task));
38113840
}
38123841

3813-
server_task_result_ptr result = rd.next(req.should_stop);
3842+
auto result = rd.next(req.should_stop);
3843+
if (!result) {
3844+
// connection was closed
3845+
GGML_ASSERT(req.should_stop());
3846+
return res;
3847+
}
38143848

38153849
if (result->is_error()) {
38163850
res->error(result->to_json());
@@ -3832,7 +3866,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_erase(const se
38323866
rd.post_task(std::move(task));
38333867
}
38343868

3835-
server_task_result_ptr result = rd.next(req.should_stop);
3869+
auto result = rd.next(req.should_stop);
3870+
if (!result) {
3871+
// connection was closed
3872+
GGML_ASSERT(req.should_stop());
3873+
return res;
3874+
}
38363875

38373876
if (result->is_error()) {
38383877
res->error(result->to_json());

0 commit comments

Comments
 (0)