Skip to content

Commit 15618ea

Browse files
authored
Merge branch 'main' into feature/openrouter-usage
2 parents 204ccc2 + 64f8dc3 commit 15618ea

15 files changed

Lines changed: 276 additions & 74 deletions

File tree

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@portkey-ai/gateway",
3-
"version": "1.9.15",
3+
"version": "1.9.17",
44
"description": "A fast AI gateway by Portkey",
55
"repository": {
66
"type": "git",

plugins/azure/contentSafety.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,7 @@ export const handler: PluginHandler<{
116116
);
117117

118118
// Check if any blocklist items were hit
119-
const hasBlocklistHit = response.blocklistsMatch?.some((match: any) => {
120-
return match.matchResults.length > 0;
121-
});
119+
const hasBlocklistHit = response.blocklistsMatch?.length > 0;
122120

123121
verdict = !(hasHarmfulContent || hasBlocklistHit);
124122
}

src/providers/anthropic/chatComplete.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import {
33
Params,
44
Message,
55
ContentType,
6-
AnthropicPromptCache,
76
SYSTEM_MESSAGE_ROLES,
7+
PromptCache,
88
} from '../../types/requestBody';
99
import {
1010
ChatCompletionResponse,
@@ -19,7 +19,7 @@ import { AnthropicStreamState } from './types';
1919

2020
// TODO: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.
2121

22-
interface AnthropicTool extends AnthropicPromptCache {
22+
interface AnthropicTool extends PromptCache {
2323
name: string;
2424
description: string;
2525
input_schema: {
@@ -69,7 +69,7 @@ type AnthropicMessageContentItem =
6969
| AnthropicUrlImageContentItem
7070
| AnthropicTextContentItem;
7171

72-
interface AnthropicMessage extends Message, AnthropicPromptCache {
72+
interface AnthropicMessage extends Message, PromptCache {
7373
content: AnthropicMessageContentItem[];
7474
}
7575

@@ -180,7 +180,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
180180
let messages: AnthropicMessage[] = [];
181181
// Transform the chat messages into a simple prompt
182182
if (!!params.messages) {
183-
params.messages.forEach((msg: Message & AnthropicPromptCache) => {
183+
params.messages.forEach((msg: Message & PromptCache) => {
184184
if (SYSTEM_MESSAGE_ROLES.includes(msg.role)) return;
185185

186186
if (msg.role === 'assistant') {
@@ -230,7 +230,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
230230
let systemMessages: AnthropicMessageContentItem[] = [];
231231
// Transform the chat messages into a simple prompt
232232
if (!!params.messages) {
233-
params.messages.forEach((msg: Message & AnthropicPromptCache) => {
233+
params.messages.forEach((msg: Message & PromptCache) => {
234234
if (
235235
SYSTEM_MESSAGE_ROLES.includes(msg.role) &&
236236
msg.content &&

src/providers/azure-openai/api.ts

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,8 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
7676
}
7777
}
7878

79-
const path = gatewayRequestURL.split('/v1')?.[1];
8079
const urlObj = new URL(gatewayRequestURL);
81-
const pathname = urlObj.pathname;
80+
const pathname = urlObj.pathname.replace('/v1', '');
8281
const searchParams = urlObj.searchParams;
8382
if (apiVersion) {
8483
searchParams.set('api-version', apiVersion);
@@ -110,7 +109,7 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
110109
return `/realtime?api-version=${apiVersion}&deployment=${deploymentId}`;
111110
}
112111
case 'createModelResponse': {
113-
return `/responses?${searchParams.toString()}`;
112+
return `${pathname}?${searchParams.toString()}`;
114113
}
115114
case 'getModelResponse': {
116115
return `${pathname}?${searchParams.toString()}`;
@@ -122,31 +121,19 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
122121
return `${pathname}?${searchParams.toString()}`;
123122
}
124123
case 'uploadFile':
125-
return `${path}?api-version=${apiVersion}`;
126124
case 'retrieveFile':
127-
return `${path}?api-version=${apiVersion}`;
128125
case 'listFiles':
129-
return `${path}?api-version=${apiVersion}`;
130126
case 'deleteFile':
131-
return `${path}?api-version=${apiVersion}`;
132127
case 'retrieveFileContent':
133-
return `${path}?api-version=${apiVersion}`;
134128
case 'createFinetune':
135-
return `${path}?api-version=${apiVersion}`;
136129
case 'retrieveFinetune':
137-
return `${path}?api-version=${apiVersion}`;
138130
case 'listFinetunes':
139-
return `${path}?api-version=${apiVersion}`;
140131
case 'cancelFinetune':
141-
return `${path}?api-version=${apiVersion}`;
142132
case 'createBatch':
143-
return `${path}?api-version=${apiVersion}`;
144133
case 'retrieveBatch':
145-
return `${path}?api-version=${apiVersion}`;
146134
case 'cancelBatch':
147-
return `${path}?api-version=${apiVersion}`;
148135
case 'listBatches':
149-
return `${path}?api-version=${apiVersion}`;
136+
return `${pathname}?api-version=${apiVersion}`;
150137
default:
151138
return '';
152139
}

src/providers/bedrock/chatComplete.ts

Lines changed: 92 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,30 @@ export interface BedrockConverseAI21ChatCompletionsParams
6969
countPenalty?: number;
7070
}
7171

72-
const getMessageTextContentArray = (message: Message): { text: string }[] => {
72+
const getMessageTextContentArray = (
73+
message: Message
74+
): Array<{ text: string } | { cachePoint: { type: string } }> => {
7375
if (message.content && typeof message.content === 'object') {
74-
return message.content
75-
.filter((item) => item.type === 'text')
76-
.map((item) => {
77-
return {
78-
text: item.text || '',
79-
};
76+
const filteredContentMessages = message.content.filter(
77+
(item) => item.type === 'text'
78+
);
79+
const finalContent: Array<
80+
{ text: string } | { cachePoint: { type: string } }
81+
> = [];
82+
filteredContentMessages.forEach((item) => {
83+
finalContent.push({
84+
text: item.text || '',
8085
});
86+
// push a cache point.
87+
if (item.cache_control) {
88+
finalContent.push({
89+
cachePoint: {
90+
type: 'default',
91+
},
92+
});
93+
}
94+
});
95+
return finalContent;
8196
}
8297
return [
8398
{
@@ -162,6 +177,15 @@ const getMessageContent = (message: Message) => {
162177
});
163178
}
164179
}
180+
181+
if (item.cache_control) {
182+
// if content item has `cache_control`, push the cache point to the out array
183+
out.push({
184+
cachePoint: {
185+
type: 'default',
186+
},
187+
});
188+
}
165189
});
166190
}
167191

@@ -219,7 +243,10 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
219243
transform: (params: BedrockChatCompletionsParams) => {
220244
if (!params.messages) return;
221245
const systemMessages = params.messages.reduce(
222-
(acc: { text: string }[], msg) => {
246+
(
247+
acc: Array<{ text: string } | { cachePoint: { type: string } }>,
248+
msg
249+
) => {
223250
if (SYSTEM_MESSAGE_ROLES.includes(msg.role))
224251
return acc.concat(...getMessageTextContentArray(msg));
225252
return acc;
@@ -234,17 +261,29 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
234261
tools: {
235262
param: 'toolConfig',
236263
transform: (params: BedrockChatCompletionsParams) => {
237-
const toolConfig = {
238-
tools: params.tools?.map((tool) => {
239-
if (!tool.function) return;
240-
return {
241-
toolSpec: {
242-
name: tool.function.name,
243-
description: tool.function.description,
244-
inputSchema: { json: tool.function.parameters },
264+
const canBeAmazonModel = params.model?.includes('amazon');
265+
const tools: Array<
266+
| { toolSpec: { name: string; description?: string; inputSchema: any } }
267+
| { cachePoint: { type: string } }
268+
> = [];
269+
params.tools?.forEach((tool) => {
270+
tools.push({
271+
toolSpec: {
272+
name: tool.function.name,
273+
description: tool.function.description,
274+
inputSchema: { json: tool.function.parameters },
275+
},
276+
});
277+
if (tool.cache_control && !canBeAmazonModel) {
278+
tools.push({
279+
cachePoint: {
280+
type: 'default',
245281
},
246-
};
247-
}),
282+
});
283+
}
284+
});
285+
const toolConfig = {
286+
tools: tools,
248287
};
249288
let toolChoice = undefined;
250289
if (params.tool_choice) {
@@ -312,6 +351,11 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
312351
transform: (params: BedrockChatCompletionsParams) =>
313352
transformAdditionalModelRequestFields(params),
314353
},
354+
response_format: {
355+
param: 'additionalModelRequestFields',
356+
transform: (params: BedrockChatCompletionsParams) =>
357+
transformAdditionalModelRequestFields(params),
358+
},
315359
};
316360

317361
type BedrockContentItem = {
@@ -341,6 +385,9 @@ type BedrockContentItem = {
341385
bytes: string;
342386
};
343387
};
388+
cachePoint?: {
389+
type: string;
390+
};
344391
};
345392

346393
interface BedrockChatCompletionResponse {
@@ -358,6 +405,10 @@ interface BedrockChatCompletionResponse {
358405
inputTokens: number;
359406
outputTokens: number;
360407
totalTokens: number;
408+
cacheReadInputTokenCount?: number;
409+
cacheReadInputTokens?: number;
410+
cacheWriteInputTokenCount?: number;
411+
cacheWriteInputTokens?: number;
361412
};
362413
}
363414

@@ -421,6 +472,10 @@ export const BedrockChatCompleteResponseTransform: (
421472
}
422473

423474
if ('output' in response) {
475+
const shouldSendCacheUsage =
476+
response.usage.cacheWriteInputTokens ||
477+
response.usage.cacheReadInputTokens;
478+
424479
let content: string = '';
425480
content = response.output.message.content
426481
.filter((item) => item.text)
@@ -453,6 +508,10 @@ export const BedrockChatCompleteResponseTransform: (
453508
prompt_tokens: response.usage.inputTokens,
454509
completion_tokens: response.usage.outputTokens,
455510
total_tokens: response.usage.totalTokens,
511+
...(shouldSendCacheUsage && {
512+
cache_read_input_tokens: response.usage.cacheReadInputTokens,
513+
cache_creation_input_tokens: response.usage.cacheWriteInputTokens,
514+
}),
456515
},
457516
};
458517
const toolCalls = response.output.message.content
@@ -503,6 +562,10 @@ export interface BedrockChatCompleteStreamChunk {
503562
inputTokens: number;
504563
outputTokens: number;
505564
totalTokens: number;
565+
cacheReadInputTokenCount?: number;
566+
cacheReadInputTokens?: number;
567+
cacheWriteInputTokenCount?: number;
568+
cacheWriteInputTokens?: number;
506569
};
507570
}
508571

@@ -534,6 +597,9 @@ export const BedrockChatCompleteStreamChunkTransform: (
534597
}
535598

536599
if (parsedChunk.usage) {
600+
const shouldSendCacheUsage =
601+
parsedChunk.usage.cacheWriteInputTokens ||
602+
parsedChunk.usage.cacheReadInputTokens;
537603
return [
538604
`data: ${JSON.stringify({
539605
id: fallbackId,
@@ -552,6 +618,11 @@ export const BedrockChatCompleteStreamChunkTransform: (
552618
prompt_tokens: parsedChunk.usage.inputTokens,
553619
completion_tokens: parsedChunk.usage.outputTokens,
554620
total_tokens: parsedChunk.usage.totalTokens,
621+
...(shouldSendCacheUsage && {
622+
cache_read_input_tokens: parsedChunk.usage.cacheReadInputTokens,
623+
cache_creation_input_tokens:
624+
parsedChunk.usage.cacheWriteInputTokens,
625+
}),
555626
},
556627
})}\n\n`,
557628
`data: [DONE]\n\n`,
@@ -650,6 +721,9 @@ export const BedrockConverseAnthropicChatCompleteConfig: ProviderConfig = {
650721
transform: (params: BedrockConverseAnthropicChatCompletionsParams) =>
651722
transformAnthropicAdditionalModelRequestFields(params),
652723
},
724+
anthropic_beta: {
725+
param: 'anthropic_beta',
726+
},
653727
};
654728

655729
export const BedrockConverseCohereChatCompleteConfig: ProviderConfig = {

src/providers/bedrock/utils.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ export const transformAdditionalModelRequestFields = (
9797
if (params['top_k']) {
9898
additionalModelRequestFields['top_k'] = params['top_k'];
9999
}
100+
if (params['response_format']) {
101+
additionalModelRequestFields['response_format'] = params['response_format'];
102+
}
100103
return additionalModelRequestFields;
101104
};
102105

src/providers/google-vertex-ai/chatComplete.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = {
337337
labels: {
338338
param: 'labels',
339339
},
340+
thinking: {
341+
param: 'generationConfig',
342+
transform: (params: Params) => transformGenerationConfig(params),
343+
},
340344
};
341345

342346
interface AnthorpicTextContentItem {

src/providers/google-vertex-ai/transformGenerationConfig.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,12 @@ export function transformGenerationConfig(params: Params) {
5050
generationConfig['responseSchema'] = schema;
5151
}
5252

53+
if (params?.thinking) {
54+
const thinkingConfig: Record<string, any> = {};
55+
thinkingConfig['include_thoughts'] = true;
56+
thinkingConfig['thinking_budget'] = params.thinking.budget_tokens;
57+
generationConfig['thinking_config'] = thinkingConfig;
58+
}
59+
5360
return generationConfig;
5461
}

src/providers/google/chatComplete.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ const transformGenerationConfig = (params: Params) => {
7171
}
7272
generationConfig['responseSchema'] = schema;
7373
}
74+
if (params?.thinking) {
75+
const thinkingConfig: Record<string, any> = {};
76+
thinkingConfig['include_thoughts'] = true;
77+
thinkingConfig['thinking_budget'] = params.thinking.budget_tokens;
78+
generationConfig['thinking_config'] = thinkingConfig;
79+
}
7480
return generationConfig;
7581
};
7682

@@ -405,6 +411,10 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
405411
}
406412
},
407413
},
414+
thinking: {
415+
param: 'generationConfig',
416+
transform: (params: Params) => transformGenerationConfig(params),
417+
},
408418
};
409419

410420
export interface GoogleErrorResponse {

0 commit comments

Comments
 (0)