@@ -23,6 +23,7 @@ import {
2323 type AssistantContentExt ,
2424 type CoreContent ,
2525 type FilePartExt ,
26+ GenerateJsonOptions ,
2627 type GenerateTextOptions ,
2728 type GenerationStats ,
2829 type ImagePartExt ,
@@ -170,57 +171,62 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
170171 } ) ;
171172 }
172173
174+ configureOptions ( opts : GenerateTextOptions ) {
175+ // Gemini Flash 2.0 thinking max is about 42
176+ if ( opts . topK && opts . topK > 40 ) opts . topK = 40 ;
177+
178+ opts . providerOptions ??= { } ;
179+ const providerOptions : any = opts . providerOptions ;
180+ if ( opts . thinking ) {
181+ // if (this.getService() === 'groq') {
182+ // providerOptions.groq = { reasoningFormat: 'parsed' };
183+ // }
184+
185+ // https://sdk.vercel.ai/docs/guides/o3#refining-reasoning-effort
186+ if ( this . getService ( ) === 'openai' && this . getModel ( ) . includes ( 'gpt5' ) ) providerOptions . openai = { reasoningEffort : opts . thinking } ;
187+ let thinkingBudget : number | undefined ;
188+ // https://sdk.vercel.ai/docs/guides/sonnet-3-7#reasoning-ability
189+ // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
190+ if ( this . getModel ( ) . includes ( 'claude-3-7' ) || this . getModel ( ) . includes ( 'opus-4' ) || this . getModel ( ) . includes ( 'sonnet-4' ) ) {
191+ if ( opts . thinking === 'low' ) thinkingBudget = 3000 ;
192+ if ( opts . thinking === 'medium' ) thinkingBudget = 8192 ;
193+ else if ( opts . thinking === 'high' ) thinkingBudget = 21_333 ; // maximum without streaming
194+ if ( thinkingBudget ) {
195+ providerOptions . anthropic = {
196+ thinking : { type : 'enabled' , budgetTokens : thinkingBudget } ,
197+ } ;
198+ opts . temperature = undefined ; // temperature is not supported when thinking is enabled
199+ }
200+ // maxOutputTokens += budgetTokens;
201+ // Streaming is required when max_tokens is greater than 21,333
202+ }
203+ // https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#budget
204+ else if ( this . getId ( ) . includes ( 'gemini-2.5' ) ) {
205+ if ( opts . thinking === 'low' ) thinkingBudget = 3000 ;
206+ else if ( opts . thinking === 'medium' )
207+ thinkingBudget = 8192 ; // default thinking budget for Gemini
208+ else if ( opts . thinking === 'high' ) thinkingBudget = 24_576 ;
209+ if ( thinkingBudget ) {
210+ providerOptions . google = {
211+ thinkingConfig : {
212+ includeThoughts : true ,
213+ thinkingBudget,
214+ } ,
215+ } ;
216+ }
217+ }
218+ }
219+ }
220+
173221 @quotaRetry ( { retries : 5 , initialBackoffMs : 5000 } )
174- override async _generateMessage ( llmMessages : LlmMessage [ ] , opts ? : GenerateTextOptions ) : Promise < LlmMessage > {
222+ override async _generateMessage ( llmMessages : LlmMessage [ ] , opts : GenerateTextOptions | GenerateJsonOptions = { } ) : Promise < LlmMessage > {
175223 const combinedOpts = { ...this . defaultOptions , ...opts } ;
176224 const description = combinedOpts . id ?? '' ;
177225 return await withActiveSpan ( `generateTextFromMessages ${ description } ` , async ( span ) => {
178226 // The processMessages method now correctly returns CoreMessage[] and strips out reasoning parts
179227 const messages : CoreMessage [ ] = this . processMessages ( llmMessages ) ;
180228
181- // Gemini Flash 2.0 thinking max is about 42
182- if ( combinedOpts . topK && combinedOpts . topK > 40 ) combinedOpts . topK = 40 ;
183-
184- combinedOpts . providerOptions ??= { } ;
185- const providerOptions : any = combinedOpts . providerOptions ;
186- if ( combinedOpts . thinking ) {
187- // if (this.getService() === 'groq') {
188- // providerOptions.groq = { reasoningFormat: 'parsed' };
189- // }
190-
191- // https://sdk.vercel.ai/docs/guides/o3#refining-reasoning-effort
192- if ( this . getService ( ) === 'openai' && this . getModel ( ) . includes ( 'gpt5' ) ) providerOptions . openai = { reasoningEffort : combinedOpts . thinking } ;
193- let thinkingBudget : number | undefined ;
194- // https://sdk.vercel.ai/docs/guides/sonnet-3-7#reasoning-ability
195- // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
196- if ( this . getModel ( ) . includes ( 'claude-3-7' ) || this . getModel ( ) . includes ( 'opus-4' ) || this . getModel ( ) . includes ( 'sonnet-4' ) ) {
197- if ( combinedOpts . thinking === 'low' ) thinkingBudget = 3000 ;
198- if ( combinedOpts . thinking === 'medium' ) thinkingBudget = 8192 ;
199- else if ( combinedOpts . thinking === 'high' ) thinkingBudget = 21_333 ; // maximum without streaming
200- if ( thinkingBudget ) {
201- providerOptions . anthropic = {
202- thinking : { type : 'enabled' , budgetTokens : thinkingBudget } ,
203- } ;
204- }
205- // maxOutputTokens += budgetTokens;
206- // Streaming is required when max_tokens is greater than 21,333
207- }
208- // https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#budget
209- else if ( this . getId ( ) . includes ( 'gemini-2.5' ) ) {
210- if ( combinedOpts . thinking === 'low' ) thinkingBudget = 3000 ;
211- else if ( combinedOpts . thinking === 'medium' )
212- thinkingBudget = 8192 ; // default thinking budget for Gemini
213- else if ( combinedOpts . thinking === 'high' ) thinkingBudget = 24_576 ;
214- if ( thinkingBudget ) {
215- providerOptions . google = {
216- thinkingConfig : {
217- includeThoughts : true ,
218- thinkingBudget,
219- } ,
220- } ;
221- }
222- }
223- }
229+ this . configureOptions ( combinedOpts ) ;
224230
225231 const prompt = messages . map ( ( m ) => m . content ) . join ( '\n' ) ;
226232 span . setAttributes ( {
@@ -249,17 +255,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
249255 description,
250256 settings : combinedOpts ,
251257 } ;
252- let llmCall : LlmCall ;
253- try {
254- llmCall = await appContext ( ) . llmCallService . saveRequest ( createLlmCallRequest ) ;
255- } catch ( e ) {
256- // If the initial save fails then we'll just save it later with the response
257- llmCall = {
258- ...createLlmCallRequest ,
259- id : randomUUID ( ) ,
260- requestTime : Date . now ( ) ,
261- } ;
262- }
258+ const llmCall : LlmCall = await this . saveLlmCallRequest ( createLlmCallRequest ) ;
263259
264260 const requestTime = Date . now ( ) ;
265261 try {
@@ -275,7 +271,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
275271 stopSequences : combinedOpts . stopSequences ,
276272 maxRetries : combinedOpts . maxRetries ,
277273 maxOutputTokens : combinedOpts . maxOutputTokens ,
278- providerOptions,
274+ providerOptions : combinedOpts . providerOptions ,
279275 // abortSignal: combinedOpts.abortSignal,
280276 } ;
281277 // Messages can be large, and model property with schemas, so just log the reference to the LlmCall its saved in
@@ -384,20 +380,12 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
384380 cost,
385381 } ) ;
386382
387- try {
388- await appContext ( ) . llmCallService . saveResponse ( llmCall ) ;
389- } catch ( e ) {
390- logger . warn ( e , `Error saving LlmCall response ${ e . message } ` ) ;
391- }
383+ this . saveLlmCallResponse ( llmCall ) ;
392384
393385 return message ;
394386 } catch ( error ) {
395387 llmCall . error = errorToString ( error ) ;
396- try {
397- await appContext ( ) . llmCallService . saveResponse ( llmCall ) ;
398- } catch ( e ) {
399- logger . warn ( e , `Error saving LlmCall response with error ${ e . message } ` ) ;
400- }
388+ this . saveLlmCallResponse ( llmCall ) ;
401389
402390 span . recordException ( error ) ;
403391 throw error ;
@@ -409,13 +397,15 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
409397 override async streamText (
410398 llmMessages : LlmMessage [ ] ,
411399 onChunkCallback : ( chunk : TextStreamPart < any > ) => void ,
412- opts ? : GenerateTextOptions ,
400+ opts : GenerateTextOptions | GenerateJsonOptions = { } ,
413401 ) : Promise < GenerationStats > {
414402 const combinedOpts = { ...this . defaultOptions , ...opts } ;
415403 return withActiveSpan ( `streamText ${ combinedOpts ?. id ?? '' } ` , async ( span ) => {
416404 // The processMessages method now correctly returns CoreMessage[]
417405 const messages : CoreMessage [ ] = this . processMessages ( llmMessages ) ;
418406
407+ this . configureOptions ( combinedOpts ) ;
408+
419409 const prompt = messages . map ( ( m ) => ( typeof m . content === 'string' ? m . content : m . content . map ( ( p ) => ( 'text' in p ? p . text : '' ) ) . join ( '' ) ) ) . join ( '\n' ) ;
420410 span . setAttributes ( {
421411 inputChars : prompt . length ,
@@ -431,17 +421,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
431421 callStack : callStack ( ) ,
432422 settings : combinedOpts ,
433423 } ;
434- let llmCall : LlmCall ;
435- try {
436- llmCall = await appContext ( ) . llmCallService . saveRequest ( createLlmCallRequest ) ;
437- } catch ( e ) {
438- // If the initial save fails then we'll just save it later with the response
439- llmCall = {
440- ...createLlmCallRequest ,
441- id : randomUUID ( ) ,
442- requestTime : Date . now ( ) ,
443- } ;
444- }
424+ const llmCall : LlmCall = await this . saveLlmCallRequest ( createLlmCallRequest ) ;
445425
446426 const requestTime = Date . now ( ) ;
447427
@@ -548,15 +528,32 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
548528 totalCost,
549529 } ) ;
550530
551- try {
552- await appContext ( ) . llmCallService . saveResponse ( llmCall ) ;
553- } catch ( e ) {
554- logger . error ( e ) ;
555- }
531+ this . saveLlmCallResponse ( llmCall ) ;
556532
557533 if ( finishReason !== 'stop' ) throw new Error ( `Unexpected finish reason: ${ finishReason } ` ) ;
558534
559535 return stats ;
560536 } ) ;
561537 }
538+
539+ async saveLlmCallRequest ( llmCall : CreateLlmRequest ) : Promise < LlmCall > {
540+ try {
541+ return await appContext ( ) . llmCallService . saveRequest ( llmCall ) ;
542+ } catch ( e ) {
543+ // If the initial save fails then we'll just save it later with the response
544+ return {
545+ ...llmCall ,
546+ id : randomUUID ( ) ,
547+ requestTime : Date . now ( ) ,
548+ } ;
549+ }
550+ }
551+
552+ async saveLlmCallResponse ( llmCall : LlmCall ) {
553+ try {
554+ await appContext ( ) . llmCallService . saveResponse ( llmCall ) ;
555+ } catch ( e ) {
556+ logger . error ( e ) ;
557+ }
558+ }
562559}
0 commit comments