Skip to content

Commit f59245d

Browse files
Extract common functions in ai-llm.ts
1 parent 4732c49 commit f59245d

File tree

1 file changed

+80
-83
lines changed

1 file changed

+80
-83
lines changed

src/llm/services/ai-llm.ts

Lines changed: 80 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
type AssistantContentExt,
2424
type CoreContent,
2525
type FilePartExt,
26+
GenerateJsonOptions,
2627
type GenerateTextOptions,
2728
type GenerationStats,
2829
type ImagePartExt,
@@ -170,57 +171,62 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
170171
});
171172
}
172173

174+
configureOptions(opts: GenerateTextOptions) {
175+
// Gemini Flash 2.0 thinking max is about 42
176+
if (opts.topK && opts.topK > 40) opts.topK = 40;
177+
178+
opts.providerOptions ??= {};
179+
const providerOptions: any = opts.providerOptions;
180+
if (opts.thinking) {
181+
// if (this.getService() === 'groq') {
182+
// providerOptions.groq = { reasoningFormat: 'parsed' };
183+
// }
184+
185+
// https://sdk.vercel.ai/docs/guides/o3#refining-reasoning-effort
186+
if (this.getService() === 'openai' && this.getModel().includes('gpt5')) providerOptions.openai = { reasoningEffort: opts.thinking };
187+
let thinkingBudget: number | undefined;
188+
// https://sdk.vercel.ai/docs/guides/sonnet-3-7#reasoning-ability
189+
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
190+
if (this.getModel().includes('claude-3-7') || this.getModel().includes('opus-4') || this.getModel().includes('sonnet-4')) {
191+
if (opts.thinking === 'low') thinkingBudget = 3000;
192+
if (opts.thinking === 'medium') thinkingBudget = 8192;
193+
else if (opts.thinking === 'high') thinkingBudget = 21_333; // maximum without streaming
194+
if (thinkingBudget) {
195+
providerOptions.anthropic = {
196+
thinking: { type: 'enabled', budgetTokens: thinkingBudget },
197+
};
198+
opts.temperature = undefined; // temperature is not supported when thinking is enabled
199+
}
200+
// maxOutputTokens += budgetTokens;
201+
// Streaming is required when max_tokens is greater than 21,333
202+
}
203+
// https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#budget
204+
else if (this.getId().includes('gemini-2.5')) {
205+
if (opts.thinking === 'low') thinkingBudget = 3000;
206+
else if (opts.thinking === 'medium')
207+
thinkingBudget = 8192; // default thinking budget for Gemini
208+
else if (opts.thinking === 'high') thinkingBudget = 24_576;
209+
if (thinkingBudget) {
210+
providerOptions.google = {
211+
thinkingConfig: {
212+
includeThoughts: true,
213+
thinkingBudget,
214+
},
215+
};
216+
}
217+
}
218+
}
219+
}
220+
173221
@quotaRetry({ retries: 5, initialBackoffMs: 5000 })
174-
override async _generateMessage(llmMessages: LlmMessage[], opts?: GenerateTextOptions): Promise<LlmMessage> {
222+
override async _generateMessage(llmMessages: LlmMessage[], opts: GenerateTextOptions | GenerateJsonOptions = {}): Promise<LlmMessage> {
175223
const combinedOpts = { ...this.defaultOptions, ...opts };
176224
const description = combinedOpts.id ?? '';
177225
return await withActiveSpan(`generateTextFromMessages ${description}`, async (span) => {
178226
// The processMessages method now correctly returns CoreMessage[] and strips out reasoning parts
179227
const messages: CoreMessage[] = this.processMessages(llmMessages);
180228

181-
// Gemini Flash 2.0 thinking max is about 42
182-
if (combinedOpts.topK && combinedOpts.topK > 40) combinedOpts.topK = 40;
183-
184-
combinedOpts.providerOptions ??= {};
185-
const providerOptions: any = combinedOpts.providerOptions;
186-
if (combinedOpts.thinking) {
187-
// if (this.getService() === 'groq') {
188-
// providerOptions.groq = { reasoningFormat: 'parsed' };
189-
// }
190-
191-
// https://sdk.vercel.ai/docs/guides/o3#refining-reasoning-effort
192-
if (this.getService() === 'openai' && this.getModel().includes('gpt5')) providerOptions.openai = { reasoningEffort: combinedOpts.thinking };
193-
let thinkingBudget: number | undefined;
194-
// https://sdk.vercel.ai/docs/guides/sonnet-3-7#reasoning-ability
195-
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
196-
if (this.getModel().includes('claude-3-7') || this.getModel().includes('opus-4') || this.getModel().includes('sonnet-4')) {
197-
if (combinedOpts.thinking === 'low') thinkingBudget = 3000;
198-
if (combinedOpts.thinking === 'medium') thinkingBudget = 8192;
199-
else if (combinedOpts.thinking === 'high') thinkingBudget = 21_333; // maximum without streaming
200-
if (thinkingBudget) {
201-
providerOptions.anthropic = {
202-
thinking: { type: 'enabled', budgetTokens: thinkingBudget },
203-
};
204-
}
205-
// maxOutputTokens += budgetTokens;
206-
// Streaming is required when max_tokens is greater than 21,333
207-
}
208-
// https://cloud.google.com/vertex-ai/generative-ai/docs/thinking#budget
209-
else if (this.getId().includes('gemini-2.5')) {
210-
if (combinedOpts.thinking === 'low') thinkingBudget = 3000;
211-
else if (combinedOpts.thinking === 'medium')
212-
thinkingBudget = 8192; // default thinking budget for Gemini
213-
else if (combinedOpts.thinking === 'high') thinkingBudget = 24_576;
214-
if (thinkingBudget) {
215-
providerOptions.google = {
216-
thinkingConfig: {
217-
includeThoughts: true,
218-
thinkingBudget,
219-
},
220-
};
221-
}
222-
}
223-
}
229+
this.configureOptions(combinedOpts);
224230

225231
const prompt = messages.map((m) => m.content).join('\n');
226232
span.setAttributes({
@@ -249,17 +255,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
249255
description,
250256
settings: combinedOpts,
251257
};
252-
let llmCall: LlmCall;
253-
try {
254-
llmCall = await appContext().llmCallService.saveRequest(createLlmCallRequest);
255-
} catch (e) {
256-
// If the initial save fails then we'll just save it later with the response
257-
llmCall = {
258-
...createLlmCallRequest,
259-
id: randomUUID(),
260-
requestTime: Date.now(),
261-
};
262-
}
258+
const llmCall: LlmCall = await this.saveLlmCallRequest(createLlmCallRequest);
263259

264260
const requestTime = Date.now();
265261
try {
@@ -275,7 +271,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
275271
stopSequences: combinedOpts.stopSequences,
276272
maxRetries: combinedOpts.maxRetries,
277273
maxOutputTokens: combinedOpts.maxOutputTokens,
278-
providerOptions,
274+
providerOptions: combinedOpts.providerOptions,
279275
// abortSignal: combinedOpts.abortSignal,
280276
};
281277
// Messages can be large, and model property with schemas, so just log the reference to the LlmCall its saved in
@@ -384,20 +380,12 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
384380
cost,
385381
});
386382

387-
try {
388-
await appContext().llmCallService.saveResponse(llmCall);
389-
} catch (e) {
390-
logger.warn(e, `Error saving LlmCall response ${e.message}`);
391-
}
383+
this.saveLlmCallResponse(llmCall);
392384

393385
return message;
394386
} catch (error) {
395387
llmCall.error = errorToString(error);
396-
try {
397-
await appContext().llmCallService.saveResponse(llmCall);
398-
} catch (e) {
399-
logger.warn(e, `Error saving LlmCall response with error ${e.message}`);
400-
}
388+
this.saveLlmCallResponse(llmCall);
401389

402390
span.recordException(error);
403391
throw error;
@@ -409,13 +397,15 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
409397
override async streamText(
410398
llmMessages: LlmMessage[],
411399
onChunkCallback: (chunk: TextStreamPart<any>) => void,
412-
opts?: GenerateTextOptions,
400+
opts: GenerateTextOptions | GenerateJsonOptions = {},
413401
): Promise<GenerationStats> {
414402
const combinedOpts = { ...this.defaultOptions, ...opts };
415403
return withActiveSpan(`streamText ${combinedOpts?.id ?? ''}`, async (span) => {
416404
// The processMessages method now correctly returns CoreMessage[]
417405
const messages: CoreMessage[] = this.processMessages(llmMessages);
418406

407+
this.configureOptions(combinedOpts);
408+
419409
const prompt = messages.map((m) => (typeof m.content === 'string' ? m.content : m.content.map((p) => ('text' in p ? p.text : '')).join(''))).join('\n');
420410
span.setAttributes({
421411
inputChars: prompt.length,
@@ -431,17 +421,7 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
431421
callStack: callStack(),
432422
settings: combinedOpts,
433423
};
434-
let llmCall: LlmCall;
435-
try {
436-
llmCall = await appContext().llmCallService.saveRequest(createLlmCallRequest);
437-
} catch (e) {
438-
// If the initial save fails then we'll just save it later with the response
439-
llmCall = {
440-
...createLlmCallRequest,
441-
id: randomUUID(),
442-
requestTime: Date.now(),
443-
};
444-
}
424+
const llmCall: LlmCall = await this.saveLlmCallRequest(createLlmCallRequest);
445425

446426
const requestTime = Date.now();
447427

@@ -548,15 +528,32 @@ export abstract class AiLLM<Provider extends ProviderV2> extends BaseLLM {
548528
totalCost,
549529
});
550530

551-
try {
552-
await appContext().llmCallService.saveResponse(llmCall);
553-
} catch (e) {
554-
logger.error(e);
555-
}
531+
this.saveLlmCallResponse(llmCall);
556532

557533
if (finishReason !== 'stop') throw new Error(`Unexpected finish reason: ${finishReason}`);
558534

559535
return stats;
560536
});
561537
}
538+
539+
async saveLlmCallRequest(llmCall: CreateLlmRequest): Promise<LlmCall> {
540+
try {
541+
return await appContext().llmCallService.saveRequest(llmCall);
542+
} catch (e) {
543+
// If the initial save fails then we'll just save it later with the response
544+
return {
545+
...llmCall,
546+
id: randomUUID(),
547+
requestTime: Date.now(),
548+
};
549+
}
550+
}
551+
552+
async saveLlmCallResponse(llmCall: LlmCall) {
553+
try {
554+
await appContext().llmCallService.saveResponse(llmCall);
555+
} catch (e) {
556+
logger.error(e);
557+
}
558+
}
562559
}

0 commit comments

Comments
 (0)