Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/providers/anthropic/chatComplete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import {
Params,
Message,
ContentType,
AnthropicPromptCache,
SYSTEM_MESSAGE_ROLES,
PromptCache,
} from '../../types/requestBody';
import {
ChatCompletionResponse,
Expand All @@ -19,7 +19,7 @@ import { AnthropicStreamState } from './types';

// TODO: this configuration does not enforce the maximum token limit for the input parameter. If you want to enforce this, you might need to add a custom validation function or a max property to the ParameterConfig interface, and then use it in the input configuration. However, this might be complex because the token count is not a simple length check, but depends on the specific tokenization method used by the model.

interface AnthropicTool extends AnthropicPromptCache {
interface AnthropicTool extends PromptCache {
name: string;
description: string;
input_schema: {
Expand Down Expand Up @@ -69,7 +69,7 @@ type AnthropicMessageContentItem =
| AnthropicUrlImageContentItem
| AnthropicTextContentItem;

interface AnthropicMessage extends Message, AnthropicPromptCache {
interface AnthropicMessage extends Message, PromptCache {
content: AnthropicMessageContentItem[];
}

Expand Down Expand Up @@ -180,7 +180,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
let messages: AnthropicMessage[] = [];
// Transform the chat messages into a simple prompt
if (!!params.messages) {
params.messages.forEach((msg: Message & AnthropicPromptCache) => {
params.messages.forEach((msg: Message & PromptCache) => {
if (SYSTEM_MESSAGE_ROLES.includes(msg.role)) return;

if (msg.role === 'assistant') {
Expand Down Expand Up @@ -230,7 +230,7 @@ export const AnthropicChatCompleteConfig: ProviderConfig = {
let systemMessages: AnthropicMessageContentItem[] = [];
// Transform the chat messages into a simple prompt
if (!!params.messages) {
params.messages.forEach((msg: Message & AnthropicPromptCache) => {
params.messages.forEach((msg: Message & PromptCache) => {
if (
SYSTEM_MESSAGE_ROLES.includes(msg.role) &&
msg.content &&
Expand Down
102 changes: 84 additions & 18 deletions src/providers/bedrock/chatComplete.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,30 @@ export interface BedrockConverseAI21ChatCompletionsParams
countPenalty?: number;
}

const getMessageTextContentArray = (message: Message): { text: string }[] => {
const getMessageTextContentArray = (
message: Message
): Array<{ text: string } | { cachePoint: { type: string } }> => {
if (message.content && typeof message.content === 'object') {
return message.content
.filter((item) => item.type === 'text')
.map((item) => {
return {
text: item.text || '',
};
const filteredContentMessages = message.content.filter(
(item) => item.type === 'text'
);
const finalContent: Array<
{ text: string } | { cachePoint: { type: string } }
> = [];
filteredContentMessages.forEach((item) => {
finalContent.push({
text: item.text || '',
});
// push a cache point.
if (item.cache_control) {
finalContent.push({
cachePoint: {
type: 'default',
},
});
}
});
return finalContent;
}
return [
{
Expand Down Expand Up @@ -162,6 +177,15 @@ const getMessageContent = (message: Message) => {
});
}
}

if (item.cache_control) {
// if content item has `cache_control`, push the cache point to the out array
out.push({
cachePoint: {
type: 'default',
},
});
}
});
}

Expand Down Expand Up @@ -219,7 +243,10 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
transform: (params: BedrockChatCompletionsParams) => {
if (!params.messages) return;
const systemMessages = params.messages.reduce(
(acc: { text: string }[], msg) => {
(
acc: Array<{ text: string } | { cachePoint: { type: string } }>,
msg
) => {
if (SYSTEM_MESSAGE_ROLES.includes(msg.role))
return acc.concat(...getMessageTextContentArray(msg));
return acc;
Expand All @@ -234,17 +261,29 @@ export const BedrockConverseChatCompleteConfig: ProviderConfig = {
tools: {
param: 'toolConfig',
transform: (params: BedrockChatCompletionsParams) => {
const toolConfig = {
tools: params.tools?.map((tool) => {
if (!tool.function) return;
return {
toolSpec: {
name: tool.function.name,
description: tool.function.description,
inputSchema: { json: tool.function.parameters },
const canBeAmazonModel = params.model?.includes('amazon');
const tools: Array<
| { toolSpec: { name: string; description?: string; inputSchema: any } }
| { cachePoint: { type: string } }
> = [];
params.tools?.forEach((tool) => {
tools.push({
toolSpec: {
name: tool.function.name,
description: tool.function.description,
inputSchema: { json: tool.function.parameters },
},
});
if (tool.cache_control && !canBeAmazonModel) {
tools.push({
cachePoint: {
type: 'default',
},
};
}),
});
}
});
const toolConfig = {
tools: tools,
};
let toolChoice = undefined;
if (params.tool_choice) {
Expand Down Expand Up @@ -341,6 +380,9 @@ type BedrockContentItem = {
bytes: string;
};
};
cachePoint?: {
type: string;
};
};

interface BedrockChatCompletionResponse {
Expand All @@ -358,6 +400,10 @@ interface BedrockChatCompletionResponse {
inputTokens: number;
outputTokens: number;
totalTokens: number;
cacheReadInputTokenCount?: number;
cacheReadInputTokens?: number;
cacheWriteInputTokenCount?: number;
cacheWriteInputTokens?: number;
};
}

Expand Down Expand Up @@ -421,6 +467,10 @@ export const BedrockChatCompleteResponseTransform: (
}

if ('output' in response) {
const shouldSendCacheUsage =
response.usage.cacheWriteInputTokens ||
response.usage.cacheReadInputTokens;

let content: string = '';
content = response.output.message.content
.filter((item) => item.text)
Expand Down Expand Up @@ -453,6 +503,10 @@ export const BedrockChatCompleteResponseTransform: (
prompt_tokens: response.usage.inputTokens,
completion_tokens: response.usage.outputTokens,
total_tokens: response.usage.totalTokens,
...(shouldSendCacheUsage && {
cache_read_input_tokens: response.usage.cacheReadInputTokens,
cache_creation_input_tokens: response.usage.cacheWriteInputTokens,
}),
},
};
const toolCalls = response.output.message.content
Expand Down Expand Up @@ -503,6 +557,10 @@ export interface BedrockChatCompleteStreamChunk {
inputTokens: number;
outputTokens: number;
totalTokens: number;
cacheReadInputTokenCount?: number;
cacheReadInputTokens?: number;
cacheWriteInputTokenCount?: number;
cacheWriteInputTokens?: number;
};
}

Expand Down Expand Up @@ -534,6 +592,9 @@ export const BedrockChatCompleteStreamChunkTransform: (
}

if (parsedChunk.usage) {
const shouldSendCacheUsage =
parsedChunk.usage.cacheWriteInputTokens ||
parsedChunk.usage.cacheReadInputTokens;
return [
`data: ${JSON.stringify({
id: fallbackId,
Expand All @@ -552,6 +613,11 @@ export const BedrockChatCompleteStreamChunkTransform: (
prompt_tokens: parsedChunk.usage.inputTokens,
completion_tokens: parsedChunk.usage.outputTokens,
total_tokens: parsedChunk.usage.totalTokens,
...(shouldSendCacheUsage && {
cache_read_input_tokens: parsedChunk.usage.cacheReadInputTokens,
cache_creation_input_tokens:
parsedChunk.usage.cacheWriteInputTokens,
}),
},
})}\n\n`,
`data: [DONE]\n\n`,
Expand Down
6 changes: 3 additions & 3 deletions src/types/requestBody.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ export interface Config {
* A message content type.
* @interface
*/
export interface ContentType {
export interface ContentType extends PromptCache {
type: string;
text?: string;
thinking?: string;
Expand Down Expand Up @@ -285,7 +285,7 @@ export interface Message {
citationMetadata?: CitationMetadata;
}

export interface AnthropicPromptCache {
export interface PromptCache {
cache_control?: { type: 'ephemeral' };
}

Expand Down Expand Up @@ -340,7 +340,7 @@ export type ToolChoice = ToolChoiceObject | 'none' | 'auto' | 'required';
*
* @interface
*/
export interface Tool extends AnthropicPromptCache {
export interface Tool extends PromptCache {
/** The name of the function. */
type: string;
/** A description of the function. */
Expand Down