Skip to content

Commit edef8a3

Browse files
Remove max tokens property when too great for kilo-exclusive model (#3668)
* Remove max tokens property when too great for kilo-exclusive model * fix(ai-gateway): log over-limit token request mutations Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> * fix(ai-gateway): handle nullable token caps in mutation logging --------- Co-authored-by: chrarnoldus <12196001+chrarnoldus@users.noreply.github.com> Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com>
1 parent b14f97c commit edef8a3

1 file changed

Lines changed: 74 additions & 0 deletions

File tree

apps/web/src/lib/ai-gateway/providers/kilo-exclusive-model.ts

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,86 @@ export type KiloExclusiveModel = {
4848
inference_provider_restriction: ReadonlyArray<OpenRouterInferenceProviderId>;
4949
};
5050

51+
type TokenLimitMutation = 'removed' | 'clamped';
52+
53+
function logMaxTokenMutation(
54+
requestToMutate: GatewayRequest,
55+
kiloExclusiveModel: KiloExclusiveModel,
56+
field: 'max_completion_tokens' | 'max_tokens' | 'max_output_tokens',
57+
requestedValue: number,
58+
mutation: TokenLimitMutation
59+
) {
60+
console.warn('[removeNonSensicalMaxTokens] mutated request with token limit above model cap', {
61+
model: kiloExclusiveModel.public_id,
62+
requestKind: requestToMutate.kind,
63+
field,
64+
requestedValue,
65+
modelMaxCompletionTokens: kiloExclusiveModel.max_completion_tokens,
66+
mutation,
67+
});
68+
}
69+
70+
function removeNonSensicalMaxTokens(
71+
requestToMutate: GatewayRequest,
72+
kiloExclusiveModel: KiloExclusiveModel
73+
) {
74+
// OpenClaw sometimes puts numbers in that are too large and some providers will reject the request.
75+
if (requestToMutate.kind === 'chat_completions') {
76+
const maxCompletionTokens = requestToMutate.body.max_completion_tokens;
77+
if (
78+
maxCompletionTokens !== undefined &&
79+
maxCompletionTokens !== null &&
80+
maxCompletionTokens > kiloExclusiveModel.max_completion_tokens
81+
) {
82+
logMaxTokenMutation(
83+
requestToMutate,
84+
kiloExclusiveModel,
85+
'max_completion_tokens',
86+
maxCompletionTokens,
87+
'removed'
88+
);
89+
delete requestToMutate.body.max_completion_tokens;
90+
}
91+
92+
const maxTokens = requestToMutate.body.max_tokens;
93+
if (maxTokens !== undefined && maxTokens > kiloExclusiveModel.max_completion_tokens) {
94+
logMaxTokenMutation(requestToMutate, kiloExclusiveModel, 'max_tokens', maxTokens, 'removed');
95+
delete requestToMutate.body.max_tokens;
96+
}
97+
}
98+
if (requestToMutate.kind === 'responses') {
99+
const maxOutputTokens = requestToMutate.body.max_output_tokens;
100+
if (
101+
maxOutputTokens !== undefined &&
102+
maxOutputTokens !== null &&
103+
maxOutputTokens > kiloExclusiveModel.max_completion_tokens
104+
) {
105+
logMaxTokenMutation(
106+
requestToMutate,
107+
kiloExclusiveModel,
108+
'max_output_tokens',
109+
maxOutputTokens,
110+
'removed'
111+
);
112+
delete requestToMutate.body.max_output_tokens;
113+
}
114+
}
115+
if (requestToMutate.kind === 'messages') {
116+
const maxTokens = requestToMutate.body.max_tokens;
117+
if (maxTokens !== undefined && maxTokens > kiloExclusiveModel.max_completion_tokens) {
118+
logMaxTokenMutation(requestToMutate, kiloExclusiveModel, 'max_tokens', maxTokens, 'clamped');
119+
requestToMutate.body.max_tokens = kiloExclusiveModel.max_completion_tokens;
120+
}
121+
}
122+
}
123+
51124
/** Rewrites a gateway request to target a Kilo-exclusive model. */
52125
export function applyKiloExclusiveModelSettings(
53126
requestToMutate: GatewayRequest,
54127
kiloExclusiveModel: KiloExclusiveModel
55128
) {
56129
requestToMutate.body.model = kiloExclusiveModel.internal_id;
130+
removeNonSensicalMaxTokens(requestToMutate, kiloExclusiveModel);
57131
const restriction = kiloExclusiveModel.inference_provider_restriction;
58132
if (restriction.length === 0) {
59133
return;

0 commit comments

Comments
 (0)