@@ -2960,19 +2960,22 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
29602960 // in streaming mode, the first error must be treated as non-stream response
29612961 // this is to match the OAI API behavior
29622962 // ref: https://github.com/ggml-org/llama.cpp/pull/16486#discussion_r2419657309
2963- server_task_result_ptr first_result = rd.next (req.should_stop );
2963+ auto first_result = rd.next (req.should_stop );
29642964 if (first_result == nullptr ) {
2965+ GGML_ASSERT (req.should_stop ());
29652966 return res; // connection is closed
2966- } else if (first_result->is_error ()) {
2967+ }
2968+
2969+ if (first_result->is_error ()) {
29672970 res->error (first_result->to_json ());
29682971 return res;
2969- } else {
2970- GGML_ASSERT (
2971- dynamic_cast <server_task_result_cmpl_partial*>(first_result.get ()) != nullptr
2972- || dynamic_cast <server_task_result_cmpl_final*>(first_result.get ()) != nullptr
2973- );
29742972 }
29752973
2974+ GGML_ASSERT (
2975+ dynamic_cast <server_task_result_cmpl_partial*>(first_result.get ()) != nullptr ||
2976+ dynamic_cast <server_task_result_cmpl_final*> (first_result.get ()) != nullptr
2977+ );
2978+
29762979 // next responses are streamed
29772980 // to be sent immediately
29782981 json first_result_json = first_result->to_json ();
@@ -3028,6 +3031,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
30283031 auto result = rd.next (req.should_stop );
30293032 if (result == nullptr ) {
30303033 SRV_DBG (" %s" , " stopping streaming due to should_stop condition\n " );
3034+ GGML_ASSERT (req.should_stop ());
30313035 return false ; // should_stop condition met
30323036 }
30333037
@@ -3111,6 +3115,11 @@ void server_routes::init_routes() {
31113115
31123116 // get the result
31133117 auto result = res->rd .next (req.should_stop );
3118+ if (!result) {
3119+ // connection was closed
3120+ GGML_ASSERT (req.should_stop ());
3121+ return res;
3122+ }
31143123
31153124 if (result->is_error ()) {
31163125 res->error (result->to_json ());
@@ -3211,6 +3220,11 @@ void server_routes::init_routes() {
32113220
32123221 // get the result
32133222 auto result = res->rd .next (req.should_stop );
3223+ if (!result) {
3224+ // connection was closed
3225+ GGML_ASSERT (req.should_stop ());
3226+ return res;
3227+ }
32143228
32153229 if (result->is_error ()) {
32163230 res->error (result->to_json ());
@@ -3717,7 +3731,12 @@ void server_routes::init_routes() {
37173731 }
37183732
37193733 // get the result
3720- server_task_result_ptr result = rd.next (req.should_stop );
3734+ auto result = rd.next (req.should_stop );
3735+ if (!result) {
3736+ // connection was closed
3737+ GGML_ASSERT (req.should_stop ());
3738+ return res;
3739+ }
37213740
37223741 if (result->is_error ()) {
37233742 res->error (result->to_json ());
@@ -3746,7 +3765,12 @@ void server_routes::init_routes() {
37463765 }
37473766
37483767 // get the result
3749- server_task_result_ptr result = rd.next (req.should_stop );
3768+ auto result = rd.next (req.should_stop );
3769+ if (!result) {
3770+ // connection was closed
3771+ GGML_ASSERT (req.should_stop ());
3772+ return res;
3773+ }
37503774
37513775 if (result->is_error ()) {
37523776 res->error (result->to_json ());
@@ -3779,7 +3803,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_save(const ser
37793803 rd.post_task (std::move (task));
37803804 }
37813805
3782- server_task_result_ptr result = rd.next (req.should_stop );
3806+ auto result = rd.next (req.should_stop );
3807+ if (!result) {
3808+ // connection was closed
3809+ GGML_ASSERT (req.should_stop ());
3810+ return res;
3811+ }
37833812
37843813 if (result->is_error ()) {
37853814 res->error (result->to_json ());
@@ -3810,7 +3839,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_restore(const
38103839 rd.post_task (std::move (task));
38113840 }
38123841
3813- server_task_result_ptr result = rd.next (req.should_stop );
3842+ auto result = rd.next (req.should_stop );
3843+ if (!result) {
3844+ // connection was closed
3845+ GGML_ASSERT (req.should_stop ());
3846+ return res;
3847+ }
38143848
38153849 if (result->is_error ()) {
38163850 res->error (result->to_json ());
@@ -3832,7 +3866,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_slots_erase(const se
38323866 rd.post_task (std::move (task));
38333867 }
38343868
3835- server_task_result_ptr result = rd.next (req.should_stop );
3869+ auto result = rd.next (req.should_stop );
3870+ if (!result) {
3871+ // connection was closed
3872+ GGML_ASSERT (req.should_stop ());
3873+ return res;
3874+ }
38363875
38373876 if (result->is_error ()) {
38383877 res->error (result->to_json ());
0 commit comments