11import { cerebrasQwen3_235b_Thinking } from '#llm/services/cerebras' ;
2+ import { openaiGPT5mini } from '#llm/services/openai' ;
23import { vertexGemini_2_5_Flash } from '#llm/services/vertexai' ;
34import { countTokens } from '#llm/tokens' ;
45import { logger } from '#o11y/logger' ;
@@ -12,17 +13,19 @@ import { BaseLLM } from '../base-llm';
1213export class FastMediumLLM extends BaseLLM {
1314 private readonly providers : LLM [ ] ;
1415 private readonly cerebras : LLM ;
16+ private readonly openai : LLM ;
1517 private readonly gemini : LLM ;
1618
1719 constructor ( ) {
18- super ( 'Fast Medium (Qwen3 235b (Cerebras) - Gemini 2.5 Flash)' , 'multi' , 'fast-medium' , 0 , ( ) => ( {
20+ super ( 'Fast Medium (Qwen3 235b (Cerebras) - GPT-5 Mini - Gemini 2.5 Flash)' , 'multi' , 'fast-medium' , 0 , ( ) => ( {
1921 inputCost : 0 ,
2022 outputCost : 0 ,
2123 totalCost : 0 ,
2224 } ) ) ;
23- this . providers = [ cerebrasQwen3_235b_Thinking ( ) , vertexGemini_2_5_Flash ( { thinking : 'high' } ) ] ;
25+ this . providers = [ cerebrasQwen3_235b_Thinking ( ) , openaiGPT5mini ( ) , vertexGemini_2_5_Flash ( { thinking : 'high' } ) ] ;
2426 this . cerebras = this . providers [ 0 ] ;
25- this . gemini = this . providers [ 1 ] ;
27+ this . openai = this . providers [ 1 ] ;
28+ this . gemini = this . providers [ 2 ] ;
2629
2730 this . maxInputTokens = Math . max ( ...this . providers . map ( ( p ) => p . getMaxInputTokens ( ) ) ) ;
2831 }
@@ -62,8 +65,10 @@ export class FastMediumLLM extends BaseLLM {
6265 if ( tokens && this . cerebras . isConfigured ( ) && tokens < this . cerebras . getMaxInputTokens ( ) * 0.4 )
6366 return await this . cerebras . generateMessage ( messages , opts ) ;
6467 } catch ( e ) {
65- logger . warn ( e , `Error calling fast medium LLM with ${ tokens } tokens: ${ e . message } ` ) ;
68+ logger . warn ( e , `Error calling ${ this . cerebras . getId ( ) } with ${ tokens } tokens: ${ e . message } ` ) ;
6669 }
70+ if ( this . openai . isConfigured ( ) ) return await this . openai . generateMessage ( messages , opts ) ;
71+
6772 return await this . gemini . generateMessage ( messages , opts ) ;
6873 }
6974}
0 commit comments