@@ -48,12 +48,86 @@ export type KiloExclusiveModel = {
4848 inference_provider_restriction : ReadonlyArray < OpenRouterInferenceProviderId > ;
4949} ;
5050
51+ type TokenLimitMutation = 'removed' | 'clamped' ;
52+
53+ function logMaxTokenMutation (
54+ requestToMutate : GatewayRequest ,
55+ kiloExclusiveModel : KiloExclusiveModel ,
56+ field : 'max_completion_tokens' | 'max_tokens' | 'max_output_tokens' ,
57+ requestedValue : number ,
58+ mutation : TokenLimitMutation
59+ ) {
60+ console . warn ( '[removeNonSensicalMaxTokens] mutated request with token limit above model cap' , {
61+ model : kiloExclusiveModel . public_id ,
62+ requestKind : requestToMutate . kind ,
63+ field,
64+ requestedValue,
65+ modelMaxCompletionTokens : kiloExclusiveModel . max_completion_tokens ,
66+ mutation,
67+ } ) ;
68+ }
69+
70+ function removeNonSensicalMaxTokens (
71+ requestToMutate : GatewayRequest ,
72+ kiloExclusiveModel : KiloExclusiveModel
73+ ) {
74+ // OpenClaw sometimes puts numbers in that are too large and some providers will reject the request.
75+ if ( requestToMutate . kind === 'chat_completions' ) {
76+ const maxCompletionTokens = requestToMutate . body . max_completion_tokens ;
77+ if (
78+ maxCompletionTokens !== undefined &&
79+ maxCompletionTokens !== null &&
80+ maxCompletionTokens > kiloExclusiveModel . max_completion_tokens
81+ ) {
82+ logMaxTokenMutation (
83+ requestToMutate ,
84+ kiloExclusiveModel ,
85+ 'max_completion_tokens' ,
86+ maxCompletionTokens ,
87+ 'removed'
88+ ) ;
89+ delete requestToMutate . body . max_completion_tokens ;
90+ }
91+
92+ const maxTokens = requestToMutate . body . max_tokens ;
93+ if ( maxTokens !== undefined && maxTokens > kiloExclusiveModel . max_completion_tokens ) {
94+ logMaxTokenMutation ( requestToMutate , kiloExclusiveModel , 'max_tokens' , maxTokens , 'removed' ) ;
95+ delete requestToMutate . body . max_tokens ;
96+ }
97+ }
98+ if ( requestToMutate . kind === 'responses' ) {
99+ const maxOutputTokens = requestToMutate . body . max_output_tokens ;
100+ if (
101+ maxOutputTokens !== undefined &&
102+ maxOutputTokens !== null &&
103+ maxOutputTokens > kiloExclusiveModel . max_completion_tokens
104+ ) {
105+ logMaxTokenMutation (
106+ requestToMutate ,
107+ kiloExclusiveModel ,
108+ 'max_output_tokens' ,
109+ maxOutputTokens ,
110+ 'removed'
111+ ) ;
112+ delete requestToMutate . body . max_output_tokens ;
113+ }
114+ }
115+ if ( requestToMutate . kind === 'messages' ) {
116+ const maxTokens = requestToMutate . body . max_tokens ;
117+ if ( maxTokens !== undefined && maxTokens > kiloExclusiveModel . max_completion_tokens ) {
118+ logMaxTokenMutation ( requestToMutate , kiloExclusiveModel , 'max_tokens' , maxTokens , 'clamped' ) ;
119+ requestToMutate . body . max_tokens = kiloExclusiveModel . max_completion_tokens ;
120+ }
121+ }
122+ }
123+
51124/** Rewrites a gateway request to target a Kilo-exclusive model. */
52125export function applyKiloExclusiveModelSettings (
53126 requestToMutate : GatewayRequest ,
54127 kiloExclusiveModel : KiloExclusiveModel
55128) {
56129 requestToMutate . body . model = kiloExclusiveModel . internal_id ;
130+ removeNonSensicalMaxTokens ( requestToMutate , kiloExclusiveModel ) ;
57131 const restriction = kiloExclusiveModel . inference_provider_restriction ;
58132 if ( restriction . length === 0 ) {
59133 return ;
0 commit comments