Portkey-AI
diff --git a/‎package-lock.json‎
Lines changed: 2 additions & 2 deletions b/‎package-lock.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎plugins/azure/contentSafety.ts‎
Lines changed: 1 addition & 3 deletions b/‎plugins/azure/contentSafety.ts‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/providers/anthropic/chatComplete.ts‎
Lines changed: 5 additions & 5 deletions b/‎src/providers/anthropic/chatComplete.ts‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/providers/azure-openai/api.ts‎
Lines changed: 3 additions & 16 deletions b/‎src/providers/azure-openai/api.ts‎
Lines changed: 3 additions & 16 deletions
diff --git a/‎src/providers/bedrock/chatComplete.ts‎
Lines changed: 92 additions & 18 deletions b/‎src/providers/bedrock/chatComplete.ts‎
Lines changed: 92 additions & 18 deletions
diff --git a/‎src/providers/bedrock/utils.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/providers/bedrock/utils.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/providers/google-vertex-ai/chatComplete.ts‎
Lines changed: 4 additions & 0 deletions b/‎src/providers/google-vertex-ai/chatComplete.ts‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/providers/google-vertex-ai/transformGenerationConfig.ts‎
Lines changed: 7 additions & 0 deletions b/‎src/providers/google-vertex-ai/transformGenerationConfig.ts‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/providers/google/chatComplete.ts‎
Lines changed: 10 additions & 0 deletions b/‎src/providers/google/chatComplete.ts‎
Lines changed: 10 additions & 0 deletions
@@ -1,6 +1,6 @@
 {
   "name": "@portkey-ai/gateway",
-  "version": "1.9.15",
+  "version": "1.9.17",
   "description": "A fast AI gateway by Portkey",
   "repository": {
     "type": "git",
 
@@ -116,9 +116,7 @@ export const handler: PluginHandler<{
     );
 
     // Check if any blocklist items were hit
-    const hasBlocklistHit = response.blocklistsMatch?.some((match: any) => {
-      return match.matchResults.length > 0;
-    });
+    const hasBlocklistHit = response.blocklistsMatch?.length > 0;
 
     verdict = !(hasHarmfulContent || hasBlocklistHit);
   }
 
@@ -3,8 +3,8 @@ import {
   Params,
   Message,
   ContentType,
-  AnthropicPromptCache,
   SYSTEM_MESSAGE_ROLES,
+  PromptCache,
 } from '../../types/requestBody';
 import {
   ChatCompletionResponse,
@@ -19,7 +19,7 @@ import { AnthropicStreamState } from './types';
 
 // TODO: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.
 
-interface AnthropicTool extends AnthropicPromptCache {
+interface AnthropicTool extends PromptCache {
   name: string;
   description: string;
   input_schema: {
@@ -69,7 +69,7 @@ type AnthropicMessageContentItem =
   | AnthropicUrlImageContentItem
   | AnthropicTextContentItem;
 
-interface AnthropicMessage extends Message, AnthropicPromptCache {
+interface AnthropicMessage extends Message, PromptCache {
   content: AnthropicMessageContentItem[];
 }
 
@@ -180,7 +180,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
         let messages: AnthropicMessage[] = [];
         // Transform the chat messages into a simple prompt
         if (!!params.messages) {
-          params.messages.forEach((msg: Message & AnthropicPromptCache) => {
+          params.messages.forEach((msg: Message & PromptCache) => {
             if (SYSTEM_MESSAGE_ROLES.includes(msg.role)) return;
 
             if (msg.role === 'assistant') {
@@ -230,7 +230,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
         let systemMessages: AnthropicMessageContentItem[] = [];
         // Transform the chat messages into a simple prompt
         if (!!params.messages) {
-          params.messages.forEach((msg: Message & AnthropicPromptCache) => {
+          params.messages.forEach((msg: Message & PromptCache) => {
             if (
               SYSTEM_MESSAGE_ROLES.includes(msg.role) &&
               msg.content &&
 
@@ -76,9 +76,8 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
       }
     }
 
-    const path = gatewayRequestURL.split('/v1')?.[1];
     const urlObj = new URL(gatewayRequestURL);
-    const pathname = urlObj.pathname;
+    const pathname = urlObj.pathname.replace('/v1', '');
     const searchParams = urlObj.searchParams;
     if (apiVersion) {
       searchParams.set('api-version', apiVersion);
@@ -110,7 +109,7 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
         return `/realtime?api-version=${apiVersion}&deployment=${deploymentId}`;
       }
       case 'createModelResponse': {
-        return `/responses?${searchParams.toString()}`;
+        return `${pathname}?${searchParams.toString()}`;
       }
       case 'getModelResponse': {
         return `${pathname}?${searchParams.toString()}`;
@@ -122,31 +121,19 @@ const AzureOpenAIAPIConfig: ProviderAPIConfig = {
         return `${pathname}?${searchParams.toString()}`;
       }
       case 'uploadFile':
-        return `${path}?api-version=${apiVersion}`;
       case 'retrieveFile':
-        return `${path}?api-version=${apiVersion}`;
       case 'listFiles':
-        return `${path}?api-version=${apiVersion}`;
       case 'deleteFile':
-        return `${path}?api-version=${apiVersion}`;
       case 'retrieveFileContent':
-        return `${path}?api-version=${apiVersion}`;
       case 'createFinetune':
-        return `${path}?api-version=${apiVersion}`;
       case 'retrieveFinetune':
-        return `${path}?api-version=${apiVersion}`;
       case 'listFinetunes':
-        return `${path}?api-version=${apiVersion}`;
       case 'cancelFinetune':
-        return `${path}?api-version=${apiVersion}`;
       case 'createBatch':
-        return `${path}?api-version=${apiVersion}`;
       case 'retrieveBatch':
-        return `${path}?api-version=${apiVersion}`;
       case 'cancelBatch':
-        return `${path}?api-version=${apiVersion}`;
       case 'listBatches':
-        return `${path}?api-version=${apiVersion}`;
+        return `${pathname}?api-version=${apiVersion}`;
       default:
         return '';
     }
 
@@ -69,15 +69,30 @@ export interface BedrockConverseAI21ChatCompletionsParams
   countPenalty?: number;
 }
 
-const getMessageTextContentArray = (message: Message): { text: string }[] => {
+const getMessageTextContentArray = (
+  message: Message
+): Array<{ text: string } | { cachePoint: { type: string } }> => {
   if (message.content && typeof message.content === 'object') {
-    return message.content
-      .filter((item) => item.type === 'text')
-      .map((item) => {
-        return {
-          text: item.text || '',
-        };
+    const filteredContentMessages = message.content.filter(
+      (item) => item.type === 'text'
+    );
+    const finalContent: Array<
+      { text: string } | { cachePoint: { type: string } }
+    > = [];
+    filteredContentMessages.forEach((item) => {
+      finalContent.push({
+        text: item.text || '',
       });
+      // push a cache point.
+      if (item.cache_control) {
+        finalContent.push({
+          cachePoint: {
+            type: 'default',
+          },
+        });
+      }
+    });
+    return finalContent;
   }
   return [
     {
@@ -162,6 +177,15 @@ const getMessageContent = (message: Message) => {
           });
         }
       }
+
+      if (item.cache_control) {
+        // if content item has `cache_control`, push the cache point to the out array
+        out.push({
+          cachePoint: {
+            type: 'default',
+          },
+        });
+      }
     });
   }
 
@@ -219,7 +243,10 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
       transform: (params: BedrockChatCompletionsParams) => {
         if (!params.messages) return;
         const systemMessages = params.messages.reduce(
-          (acc: { text: string }[], msg) => {
+          (
+            acc: Array<{ text: string } | { cachePoint: { type: string } }>,
+            msg
+          ) => {
             if (SYSTEM_MESSAGE_ROLES.includes(msg.role))
               return acc.concat(...getMessageTextContentArray(msg));
             return acc;
@@ -234,17 +261,29 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
   tools: {
     param: 'toolConfig',
     transform: (params: BedrockChatCompletionsParams) => {
-      const toolConfig = {
-        tools: params.tools?.map((tool) => {
-          if (!tool.function) return;
-          return {
-            toolSpec: {
-              name: tool.function.name,
-              description: tool.function.description,
-              inputSchema: { json: tool.function.parameters },
+      const canBeAmazonModel = params.model?.includes('amazon');
+      const tools: Array<
+        | { toolSpec: { name: string; description?: string; inputSchema: any } }
+        | { cachePoint: { type: string } }
+      > = [];
+      params.tools?.forEach((tool) => {
+        tools.push({
+          toolSpec: {
+            name: tool.function.name,
+            description: tool.function.description,
+            inputSchema: { json: tool.function.parameters },
+          },
+        });
+        if (tool.cache_control && !canBeAmazonModel) {
+          tools.push({
+            cachePoint: {
+              type: 'default',
             },
-          };
-        }),
+          });
+        }
+      });
+      const toolConfig = {
+        tools: tools,
       };
       let toolChoice = undefined;
       if (params.tool_choice) {
@@ -312,6 +351,11 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
     transform: (params: BedrockChatCompletionsParams) =>
       transformAdditionalModelRequestFields(params),
   },
+  response_format: {
+    param: 'additionalModelRequestFields',
+    transform: (params: BedrockChatCompletionsParams) =>
+      transformAdditionalModelRequestFields(params),
+  },
 };
 
 type BedrockContentItem = {
@@ -341,6 +385,9 @@ type BedrockContentItem = {
       bytes: string;
     };
   };
+  cachePoint?: {
+    type: string;
+  };
 };
 
 interface BedrockChatCompletionResponse {
@@ -358,6 +405,10 @@ interface BedrockChatCompletionResponse {
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
+    cacheReadInputTokenCount?: number;
+    cacheReadInputTokens?: number;
+    cacheWriteInputTokenCount?: number;
+    cacheWriteInputTokens?: number;
   };
 }
 
@@ -421,6 +472,10 @@ export const BedrockChatCompleteResponseTransform: (
   }
 
   if ('output' in response) {
+    const shouldSendCacheUsage =
+      response.usage.cacheWriteInputTokens ||
+      response.usage.cacheReadInputTokens;
+
     let content: string = '';
     content = response.output.message.content
       .filter((item) => item.text)
@@ -453,6 +508,10 @@ export const BedrockChatCompleteResponseTransform: (
         prompt_tokens: response.usage.inputTokens,
         completion_tokens: response.usage.outputTokens,
         total_tokens: response.usage.totalTokens,
+        ...(shouldSendCacheUsage && {
+          cache_read_input_tokens: response.usage.cacheReadInputTokens,
+          cache_creation_input_tokens: response.usage.cacheWriteInputTokens,
+        }),
       },
     };
     const toolCalls = response.output.message.content
@@ -503,6 +562,10 @@ export interface BedrockChatCompleteStreamChunk {
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
+    cacheReadInputTokenCount?: number;
+    cacheReadInputTokens?: number;
+    cacheWriteInputTokenCount?: number;
+    cacheWriteInputTokens?: number;
   };
 }
 
@@ -534,6 +597,9 @@ export const BedrockChatCompleteStreamChunkTransform: (
   }
 
   if (parsedChunk.usage) {
+    const shouldSendCacheUsage =
+      parsedChunk.usage.cacheWriteInputTokens ||
+      parsedChunk.usage.cacheReadInputTokens;
     return [
       `data: ${JSON.stringify({
         id: fallbackId,
@@ -552,6 +618,11 @@ export const BedrockChatCompleteStreamChunkTransform: (
           prompt_tokens: parsedChunk.usage.inputTokens,
           completion_tokens: parsedChunk.usage.outputTokens,
           total_tokens: parsedChunk.usage.totalTokens,
+          ...(shouldSendCacheUsage && {
+            cache_read_input_tokens: parsedChunk.usage.cacheReadInputTokens,
+            cache_creation_input_tokens:
+              parsedChunk.usage.cacheWriteInputTokens,
+          }),
         },
       })}\n\n`,
       `data: [DONE]\n\n`,
@@ -650,6 +721,9 @@ export const BedrockConverseAnthropicChatCompleteConfig: ProviderConfig = {
     transform: (params: BedrockConverseAnthropicChatCompletionsParams) =>
       transformAnthropicAdditionalModelRequestFields(params),
   },
+  anthropic_beta: {
+    param: 'anthropic_beta',
+  },
 };
 
 export const BedrockConverseCohereChatCompleteConfig: ProviderConfig = {
 
@@ -97,6 +97,9 @@ export const transformAdditionalModelRequestFields = (
   if (params['top_k']) {
     additionalModelRequestFields['top_k'] = params['top_k'];
   }
+  if (params['response_format']) {
+    additionalModelRequestFields['response_format'] = params['response_format'];
+  }
   return additionalModelRequestFields;
 };
 
 
@@ -337,6 +337,10 @@ export const VertexGoogleChatCompleteConfig: ProviderConfig = {
   labels: {
     param: 'labels',
   },
+  thinking: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
 };
 
 interface AnthorpicTextContentItem {
 
@@ -50,5 +50,12 @@ export function transformGenerationConfig(params: Params) {
     generationConfig['responseSchema'] = schema;
   }
 
+  if (params?.thinking) {
+    const thinkingConfig: Record<string, any> = {};
+    thinkingConfig['include_thoughts'] = true;
+    thinkingConfig['thinking_budget'] = params.thinking.budget_tokens;
+    generationConfig['thinking_config'] = thinkingConfig;
+  }
+
   return generationConfig;
 }
@@ -71,6 +71,12 @@ const transformGenerationConfig = (params: Params) => {
     }
     generationConfig['responseSchema'] = schema;
   }
+  if (params?.thinking) {
+    const thinkingConfig: Record<string, any> = {};
+    thinkingConfig['include_thoughts'] = true;
+    thinkingConfig['thinking_budget'] = params.thinking.budget_tokens;
+    generationConfig['thinking_config'] = thinkingConfig;
+  }
   return generationConfig;
 };
 
@@ -405,6 +411,10 @@ export const GoogleChatCompleteConfig: ProviderConfig = {
       }
     },
   },
+  thinking: {
+    param: 'generationConfig',
+    transform: (params: Params) => transformGenerationConfig(params),
+  },
 };
 
 export interface GoogleErrorResponse {
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@portkey-ai/gateway",`
`3`		`- "version": "1.9.15",`
	`3`	`+ "version": "1.9.17",`
`4`	`4`	`"description": "A fast AI gateway by Portkey",`
`5`	`5`	`"repository": {`
`6`	`6`	`"type": "git",`
Original file line number	Diff line number	Diff line change
`@@ -116,9 +116,7 @@ export const handler: PluginHandler<{`
`116`	`116`	`);`
`117`	`117`
`118`	`118`	`// Check if any blocklist items were hit`
`119`		`- const hasBlocklistHit = response.blocklistsMatch?.some((match: any) => {`
`120`		`- return match.matchResults.length > 0;`
`121`		`- });`
	`119`	`+ const hasBlocklistHit = response.blocklistsMatch?.length > 0;`
`122`	`120`
`123`	`121`	`verdict = !(hasHarmfulContent \|\| hasBlocklistHit);`
`124`	`122`	`}`
Original file line number	Diff line number	Diff line change
`@@ -97,6 +97,9 @@ export const transformAdditionalModelRequestFields = (`
`97`	`97`	`if (params['top_k']) {`
`98`	`98`	`additionalModelRequestFields['top_k'] = params['top_k'];`
`99`	`99`	`}`
	`100`	`+ if (params['response_format']) {`
	`101`	`+ additionalModelRequestFields['response_format'] = params['response_format'];`
	`102`	`+ }`
`100`	`103`	`return additionalModelRequestFields;`
`101`	`104`	`};`
`102`	`105`
Original file line number	Diff line number	Diff line change
`@@ -50,5 +50,12 @@ export function transformGenerationConfig(params: Params) {`
`50`	`50`	`generationConfig['responseSchema'] = schema;`
`51`	`51`	`}`
`52`	`52`
	`53`	`+ if (params?.thinking) {`
	`54`	`+ const thinkingConfig: Record<string, any> = {};`
	`55`	`+ thinkingConfig['include_thoughts'] = true;`
	`56`	`+ thinkingConfig['thinking_budget'] = params.thinking.budget_tokens;`
	`57`	`+ generationConfig['thinking_config'] = thinkingConfig;`
	`58`	`+ }`
	`59`	`+`
`53`	`60`	`return generationConfig;`
`54`	`61`	`}`