@@ -86,18 +86,19 @@ export const sakanaProvider: ProviderConfig = {
8686 if ( request . temperature !== undefined ) payload . temperature = request . temperature
8787 if ( request . maxTokens != null ) payload . max_completion_tokens = request . maxTokens
8888
89- if ( request . responseFormat ) {
90- payload . response_format = {
91- type : 'json_schema' ,
92- json_schema : {
93- name : request . responseFormat . name || 'response_schema' ,
94- schema : request . responseFormat . schema || request . responseFormat ,
95- strict : request . responseFormat . strict !== false ,
96- } ,
97- }
98- }
89+ const responseFormatPayload = request . responseFormat
90+ ? {
91+ type : 'json_schema' as const ,
92+ json_schema : {
93+ name : request . responseFormat . name || 'response_schema' ,
94+ schema : request . responseFormat . schema || request . responseFormat ,
95+ strict : request . responseFormat . strict !== false ,
96+ } ,
97+ }
98+ : undefined
9999
100100 let preparedTools : ReturnType < typeof prepareToolsWithUsageControl > | null = null
101+ let hasActiveTools = false
101102
102103 if ( tools ?. length ) {
103104 preparedTools = prepareToolsWithUsageControl ( tools , request . tools , logger , 'openai' )
@@ -106,6 +107,7 @@ export const sakanaProvider: ProviderConfig = {
106107 if ( filteredTools ?. length && toolChoice ) {
107108 payload . tools = filteredTools
108109 payload . tool_choice = toolChoice
110+ hasActiveTools = true
109111
110112 logger . info ( 'Sakana request configuration:' , {
111113 toolCount : filteredTools . length ,
@@ -120,6 +122,14 @@ export const sakanaProvider: ProviderConfig = {
120122 }
121123 }
122124
125+ // Structured output and tool calling cannot be sent together — OpenAI-compatible
126+ // backends reject a request that carries both `response_format` and active
127+ // `tools`/`tool_choice`. Defer the schema until after the tool loop completes.
128+ const deferResponseFormat = ! ! responseFormatPayload && hasActiveTools
129+ if ( responseFormatPayload && ! deferResponseFormat ) {
130+ payload . response_format = responseFormatPayload
131+ }
132+
123133 if ( request . stream && ( ! tools || tools . length === 0 ) ) {
124134 logger . info ( 'Using streaming response for Sakana request (no tools)' )
125135
@@ -430,19 +440,20 @@ export const sakanaProvider: ProviderConfig = {
430440 logger . error ( 'Error in Sakana request:' , { error } )
431441 }
432442
433- const providerEndTime = Date . now ( )
434- const providerEndTimeISO = new Date ( providerEndTime ) . toISOString ( )
435- const totalDuration = providerEndTime - providerStartTime
436-
437443 if ( request . stream ) {
438444 logger . info ( 'Using streaming for final Sakana response after tool processing' )
439445
440- const streamingPayload = {
446+ const streamingPayload : any = {
441447 ...payload ,
442448 messages : currentMessages ,
443449 tool_choice : 'auto' ,
444450 stream : true ,
445451 }
452+ if ( deferResponseFormat && responseFormatPayload ) {
453+ streamingPayload . response_format = responseFormatPayload
454+ streamingPayload . tool_choice = 'none'
455+ streamingPayload . parallel_tool_calls = false
456+ }
446457
447458 const streamResponse = await sakana . chat . completions . create (
448459 streamingPayload ,
@@ -509,6 +520,58 @@ export const sakanaProvider: ProviderConfig = {
509520 return streamingResult
510521 }
511522
523+ // Tools were active, so `response_format` was withheld from the loop. Make one final
524+ // tool-free call to obtain the structured response now that the tool work is done.
525+ if ( deferResponseFormat && responseFormatPayload ) {
526+ logger . info ( 'Applying deferred JSON schema response format after tool processing' )
527+
528+ const finalFormatStartTime = Date . now ( )
529+ const finalPayload : any = {
530+ ...payload ,
531+ messages : currentMessages ,
532+ response_format : responseFormatPayload ,
533+ tool_choice : 'none' ,
534+ parallel_tool_calls : false ,
535+ }
536+
537+ currentResponse = await sakana . chat . completions . create (
538+ finalPayload ,
539+ request . abortSignal ? { signal : request . abortSignal } : undefined
540+ )
541+
542+ const finalFormatEndTime = Date . now ( )
543+ timeSegments . push ( {
544+ type : 'model' ,
545+ name : request . model ,
546+ startTime : finalFormatStartTime ,
547+ endTime : finalFormatEndTime ,
548+ duration : finalFormatEndTime - finalFormatStartTime ,
549+ } )
550+ modelTime += finalFormatEndTime - finalFormatStartTime
551+
552+ const formattedContent = currentResponse . choices [ 0 ] ?. message ?. content
553+ if ( formattedContent ) {
554+ content = formattedContent
555+ }
556+
557+ if ( currentResponse . usage ) {
558+ tokens . input += currentResponse . usage . prompt_tokens || 0
559+ tokens . output += currentResponse . usage . completion_tokens || 0
560+ tokens . total += currentResponse . usage . total_tokens || 0
561+ }
562+
563+ enrichLastModelSegmentFromChatCompletions (
564+ timeSegments ,
565+ currentResponse ,
566+ currentResponse . choices [ 0 ] ?. message ?. tool_calls ,
567+ { model : request . model , provider : 'sakana' }
568+ )
569+ }
570+
571+ const providerEndTime = Date . now ( )
572+ const providerEndTimeISO = new Date ( providerEndTime ) . toISOString ( )
573+ const totalDuration = providerEndTime - providerStartTime
574+
512575 return {
513576 content,
514577 model : request . model ,
0 commit comments