@@ -3037,6 +3037,84 @@ void server_context::on_sleeping_changed(std::function<void(bool)> callback) {
30373037 impl->queue_tasks .on_sleeping_state (std::move (callback));
30383038}
30393039
3040+ static json completion_results_to_response_json (json arr, task_response_type res_type) {
3041+ GGML_ASSERT (!arr.empty () && " empty completion results" );
3042+
3043+ if (arr.size () == 1 ) {
3044+ return arr[0 ];
3045+ }
3046+
3047+ if (res_type == TASK_RESPONSE_TYPE_OAI_CHAT || res_type == TASK_RESPONSE_TYPE_OAI_CMPL) {
3048+ json & choices = arr[0 ][" choices" ];
3049+ for (size_t i = 1 ; i < arr.size (); ++i) {
3050+ choices.push_back (std::move (arr[i][" choices" ][0 ]));
3051+ }
3052+ return arr[0 ];
3053+ }
3054+
3055+ return arr;
3056+ }
3057+
3058+ static json completion_final_result_to_non_stream_json (server_task_result_cmpl_final & result) {
3059+ switch (result.res_type ) {
3060+ case TASK_RESPONSE_TYPE_NONE:
3061+ return result.to_json_non_oaicompat ();
3062+ case TASK_RESPONSE_TYPE_OAI_CMPL:
3063+ return result.to_json_oaicompat ();
3064+ case TASK_RESPONSE_TYPE_OAI_CHAT:
3065+ return result.to_json_oaicompat_chat ();
3066+ case TASK_RESPONSE_TYPE_OAI_RESP:
3067+ return result.to_json_oaicompat_resp ();
3068+ case TASK_RESPONSE_TYPE_ANTHROPIC:
3069+ return result.to_json_anthropic ();
3070+ default :
3071+ throw std::logic_error (" Invalid task_response_type" );
3072+ }
3073+ }
3074+
3075+ static void maybe_save_request_artifact (
3076+ const common_params & params,
3077+ const server_http_req & req,
3078+ bool stream,
3079+ const json & request_data,
3080+ const json & prompt_data,
3081+ int status,
3082+ const json & response_data) {
3083+ if (params.request_save_path .empty ()) {
3084+ return ;
3085+ }
3086+
3087+ const std::string filename_base = server_timestamp_utc_filename ();
3088+ std::string timestamp = filename_base;
3089+ int hyphen_count = 0 ;
3090+ for (char & ch : timestamp) {
3091+ if (ch == ' -' ) {
3092+ ++hyphen_count;
3093+ if (hyphen_count > 2 ) {
3094+ ch = ' :' ;
3095+ }
3096+ }
3097+ }
3098+
3099+ json artifact = {
3100+ {" timestamp" , timestamp},
3101+ {" endpoint" , req.path },
3102+ {" stream" , stream},
3103+ {" status" , status},
3104+ {" request" , request_data},
3105+ {" prompt" , prompt_data},
3106+ {" response" , response_data},
3107+ };
3108+
3109+ std::string filepath;
3110+ if (!server_save_json_artifact (params.request_save_path , artifact, filename_base, &filepath)) {
3111+ SRV_WRN (" failed to save request artifact in %s\n " , params.request_save_path .c_str ());
3112+ return ;
3113+ }
3114+
3115+ SRV_DBG (" saved request artifact: %s\n " , filepath.c_str ());
3116+ }
3117+
30403118
30413119//
30423120// server_routes
@@ -3053,6 +3131,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
30533131 auto res = create_response ();
30543132 auto completion_id = gen_chatcmplid ();
30553133 auto & rd = res->rd ;
3134+ const json prompt_data = data.contains (" prompt" ) ? data.at (" prompt" ) : json (nullptr );
30563135
30573136 try {
30583137 std::vector<server_task> tasks;
@@ -3105,7 +3184,10 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
31053184
31063185 rd.post_tasks (std::move (tasks));
31073186 } catch (const std::exception & e) {
3108- res->error (format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST));
3187+ json error_json = format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST);
3188+ json response_json = {{" error" , error_json}};
3189+ res->error (error_json);
3190+ maybe_save_request_artifact (params, req, json_value (data, " stream" , false ), data, prompt_data, res->status , response_json);
31093191 return res;
31103192 }
31113193
@@ -3117,29 +3199,20 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
31173199 if (all_results.is_terminated ) {
31183200 return res; // connection is closed
31193201 } else if (all_results.error ) {
3120- res->error (all_results.error ->to_json ());
3202+ json error_json = all_results.error ->to_json ();
3203+ json response_json = {{" error" , error_json}};
3204+ res->error (error_json);
3205+ maybe_save_request_artifact (params, req, stream, data, prompt_data, res->status , response_json);
31213206 return res;
31223207 } else {
31233208 json arr = json::array ();
31243209 for (auto & res : all_results.results ) {
31253210 GGML_ASSERT (dynamic_cast <server_task_result_cmpl_final*>(res.get ()) != nullptr );
31263211 arr.push_back (res->to_json ());
31273212 }
3128- GGML_ASSERT (!arr.empty () && " empty results" );
3129- if (arr.size () == 1 ) {
3130- // if single request, return single object instead of array
3131- res->ok (arr[0 ]);
3132- } else if (res_type == TASK_RESPONSE_TYPE_OAI_CHAT || res_type == TASK_RESPONSE_TYPE_OAI_CMPL) {
3133- // if multiple results in OAI format, we need to re-format them
3134- json & choices = arr[0 ][" choices" ];
3135- for (size_t i = 1 ; i < arr.size (); i++) {
3136- choices.push_back (std::move (arr[i][" choices" ][0 ]));
3137- }
3138- res->ok (arr[0 ]);
3139- } else {
3140- // multi-results, non-OAI compat
3141- res->ok (arr);
3142- }
3213+ json response_json = completion_results_to_response_json (std::move (arr), res_type);
3214+ res->ok (response_json);
3215+ maybe_save_request_artifact (params, req, stream, data, prompt_data, res->status , response_json);
31433216 }
31443217 } else {
31453218 // in streaming mode, the first error must be treated as non-stream response
@@ -3152,7 +3225,10 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
31523225 }
31533226
31543227 if (first_result->is_error ()) {
3155- res->error (first_result->to_json ());
3228+ json error_json = first_result->to_json ();
3229+ json response_json = {{" error" , error_json}};
3230+ res->error (error_json);
3231+ maybe_save_request_artifact (params, req, stream, data, prompt_data, res->status , response_json);
31563232 return res;
31573233 }
31583234
@@ -3161,6 +3237,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
31613237 dynamic_cast <server_task_result_cmpl_final*> (first_result.get ()) != nullptr
31623238 );
31633239
3240+ auto final_results = std::make_shared<json>(json::array ());
3241+ if (auto * final_result = dynamic_cast <server_task_result_cmpl_final *>(first_result.get ())) {
3242+ final_results->push_back (completion_final_result_to_non_stream_json (*final_result));
3243+ }
3244+ auto is_saved = std::make_shared<bool >(false );
3245+
31643246 // next responses are streamed
31653247 // to be sent immediately
31663248 json first_result_json = first_result->to_json ();
@@ -3173,7 +3255,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
31733255 }
31743256 res->status = 200 ;
31753257 res->content_type = " text/event-stream" ;
3176- res->next = [res_this = res.get (), res_type, &req](std::string & output) -> bool {
3258+ res->next = [res_this = res.get (), res_type, &req, request_data = data, prompt_data, final_results, is_saved, this ](std::string & output) -> bool {
31773259 static auto format_error = [](task_response_type res_type, const json & res_json) {
31783260 if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
31793261 return format_anthropic_sse ({
@@ -3202,6 +3284,12 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
32023284
32033285 // check if there is more data
32043286 if (!rd.has_next ()) {
3287+ if (!*is_saved && !final_results->empty ()) {
3288+ json response_json = completion_results_to_response_json (*final_results, res_type);
3289+ maybe_save_request_artifact (params, req, true , request_data, prompt_data, 200 , response_json);
3290+ *is_saved = true ;
3291+ }
3292+
32053293 switch (res_type) {
32063294 case TASK_RESPONSE_TYPE_NONE:
32073295 case TASK_RESPONSE_TYPE_OAI_RESP:
@@ -3228,6 +3316,10 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
32283316 // send the results
32293317 if (result->is_error ()) {
32303318 json res_json = result->to_json ();
3319+ if (!*is_saved) {
3320+ maybe_save_request_artifact (params, req, true , request_data, prompt_data, 500 , json{{" error" , res_json}});
3321+ *is_saved = true ;
3322+ }
32313323 output = format_error (res_type, res_json);
32323324 SRV_DBG (" %s" , " error received during streaming, terminating stream\n " );
32333325 return false ; // terminate on error
@@ -3236,6 +3328,9 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
32363328 dynamic_cast <server_task_result_cmpl_partial*>(result.get ()) != nullptr
32373329 || dynamic_cast <server_task_result_cmpl_final*>(result.get ()) != nullptr
32383330 );
3331+ if (auto * final_result = dynamic_cast <server_task_result_cmpl_final *>(result.get ())) {
3332+ final_results->push_back (completion_final_result_to_non_stream_json (*final_result));
3333+ }
32393334 json res_json = result->to_json ();
32403335 if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
32413336 output = format_anthropic_sse (res_json);
@@ -3251,6 +3346,10 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
32513346
32523347 } catch (const std::exception & e) {
32533348 json error_json = format_error_response (e.what (), ERROR_TYPE_SERVER);
3349+ if (!*is_saved) {
3350+ maybe_save_request_artifact (params, req, true , request_data, prompt_data, 500 , json{{" error" , error_json}});
3351+ *is_saved = true ;
3352+ }
32543353 output = format_error (res_type, error_json);
32553354
32563355 // terminate on exception
0 commit comments