Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions containers/api-proxy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ COPY server.js logging.js metrics.js rate-limiter.js \
oidc-refresh-utils.js body-transform.js rate-limit.js websocket-proxy.js ./
COPY guards/ ./guards/
COPY providers/ ./providers/
COPY transforms/ ./transforms/

# Create non-root user
RUN addgroup -S apiproxy && adduser -S apiproxy -G apiproxy
Expand Down
318 changes: 13 additions & 305 deletions containers/api-proxy/anthropic-transforms.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,315 +15,23 @@
*
* All transforms are pure functions (no I/O, no side-effects) and are
* idempotent: applying them twice yields the same result as applying once.
*/

const path = require('path');

/** Maximum number of cache breakpoints Anthropic allows per request. */
const MAX_CACHE_BREAKPOINTS = 4;

/**
* The Anthropic beta-feature header value required to use 1-hour TTL caching.
* Must be added to the `anthropic-beta` request header when AWF_ANTHROPIC_AUTO_CACHE=1.
*/
const EXTENDED_CACHE_BETA = 'extended-cache-ttl-2025-04-11';

// ── Utility helpers ──────────────────────────────────────────────────────────

/**
* Strip ANSI SGR (Select Graphic Rendition) escape sequences from a string.
* These are the colour/formatting codes of the form ESC [ <params> m.
*
* @param {string} text
* @returns {string}
*/
function stripAnsi(text) {
// ESC [ followed by any mix of digits and semicolons, ending with 'm'
return text.replace(/\x1B\[[\d;]*m/g, '');
}

/**
* Return a new content block with `cache_control` set.
* Any existing cache_control on the block is replaced.
*
* @param {object} block - Anthropic content block
* @param {{ type: string, ttl: string }} cacheControl
* @returns {object}
*/
function withCacheControl(block, cacheControl) {
return { ...block, cache_control: cacheControl };
}

/**
* Build a regex that matches any of the given tool names as whole words
* (not as substrings of longer identifiers).
*
* Note: JavaScript's `g`-flag RegExp objects track `lastIndex` but
* `String.prototype.replace` resets it to 0 before each use, so the
* same compiled pattern is safe to reuse across multiple calls.
*
* @param {string[]} toolNames
* @returns {RegExp}
*/
function buildToolScrubPattern(toolNames) {
const escaped = toolNames.map(n => n.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
return new RegExp(`(?<![\\w])(?:${escaped.join('|')})(?![\\w])`, 'g');
}

// ── Feature 4: Strip ANSI from tool_result blocks ────────────────────────────

/**
* Walk every `tool_result` content block in a /v1/messages body and strip
* ANSI SGR escape sequences from text content.
*
* Roughly halves token counts in colour-heavy terminal outputs and enables
* cache hits across turns that differ only in escape codes.
*
* @param {object} body - Parsed /v1/messages request body
* @returns {object} New body object with ANSI stripped from tool_result blocks
*/
function applyAnsiStrip(body) {
if (!Array.isArray(body.messages)) return body;

const messages = body.messages.map(msg => {
if (!Array.isArray(msg.content)) return msg;

const content = msg.content.map(block => {
if (block.type !== 'tool_result') return block;

// tool_result.content may be a plain string …
if (typeof block.content === 'string') {
return { ...block, content: stripAnsi(block.content) };
}

// … or an array of typed sub-blocks
if (Array.isArray(block.content)) {
const inner = block.content.map(b => {
if (b.type === 'text' && typeof b.text === 'string') {
return { ...b, text: stripAnsi(b.text) };
}
return b;
});
return { ...block, content: inner };
}

return block;
});

return { ...msg, content };
});

return { ...body, messages };
}

// ── Feature 3: Drop unused tools ─────────────────────────────────────────────

/**
* Remove named tools from the `tools` array and scrub their names from
* `system` prompt text blocks.
*
* Independent of caching: with caching in place, dropping tools also shrinks
* each cache-write slot.
*
* @param {object} body - Parsed /v1/messages request body
* @param {string[]} toolNames - Tool names to drop (exact string match)
* @param {RegExp} [scrubPattern] - Pre-compiled regex for system-prompt scrubbing.
* When omitted the pattern is derived from toolNames on each call.
* Pass a pre-compiled pattern (from makeAnthropicTransform) to avoid per-request
* regex compilation overhead.
* @returns {object} New body object with the specified tools removed
*/
function applyToolDrop(body, toolNames, scrubPattern = null) {
if (!toolNames || toolNames.length === 0) return body;

const dropSet = new Set(toolNames);
let result = { ...body };

// Remove matching entries from the tools array
if (Array.isArray(result.tools)) {
const filtered = result.tools.filter(tool => !dropSet.has(tool.name));
if (filtered.length < result.tools.length) {
if (filtered.length === 0) {
result = { ...result };
delete result.tools;
} else {
result.tools = filtered;
}
}
}

// Scrub tool-name references from system-prompt text blocks.
// We remove bare occurrences; surrounding punctuation/whitespace is left intact
// to avoid corrupting sentence structure.
if (Array.isArray(result.system)) {
const pattern = scrubPattern || buildToolScrubPattern([...dropSet]);
result.system = result.system.map(block => {
if (block.type !== 'text' || typeof block.text !== 'string') return block;
const scrubbed = block.text.replace(pattern, '');
return scrubbed === block.text ? block : { ...block, text: scrubbed };
});
}

return result;
}

// ── Feature 1: Inject cache breakpoints ──────────────────────────────────────

/**
* Inject up to {@link MAX_CACHE_BREAKPOINTS} prompt-cache breakpoints into a
* /v1/messages request body.
*
* Slot allocation (high-value → low-value, in priority order):
*
* Slot 1 — last entry in `tools` → 1h TTL (~24 k tokens / turn)
* Slot 2 — last block in `system` → 1h TTL (~8 k tokens / turn)
* Slot 3 — last block of `messages[0]` → 1h TTL (~5 k tokens / turn)
* Slot 4 — last block of last message → tailTtl (~15 k tokens / turn)
* (rolling tail; skipped when same position as slot 3)
*
* Running this function twice on the same body produces the same result as
* running it once (idempotent).
*
* @param {object} body - Parsed /v1/messages request body
* @param {string} tailTtl - TTL for the rolling-tail slot ('5m' | '1h')
* @returns {object} New body with cache_control injected at the chosen slots
* Implementation is split across focused sub-modules:
* - transforms/ansi-strip.js — ANSI escape-code stripping
* - transforms/cache-control.js — cache breakpoint injection and TTL upgrading
* - transforms/tool-drop.js — tool removal and system-prompt scrubbing
*/
function injectCacheBreakpoints(body, tailTtl = '5m') {
let result = { ...body };
let slotsUsed = 0;

// Slot 1: last tools entry
if (slotsUsed < MAX_CACHE_BREAKPOINTS &&
Array.isArray(result.tools) && result.tools.length > 0) {
const tools = [...result.tools];
tools[tools.length - 1] = withCacheControl(tools[tools.length - 1], { type: 'ephemeral', ttl: '1h' });
result.tools = tools;
slotsUsed++;
}

// Slot 2: last system block
if (slotsUsed < MAX_CACHE_BREAKPOINTS &&
Array.isArray(result.system) && result.system.length > 0) {
const system = [...result.system];
system[system.length - 1] = withCacheControl(system[system.length - 1], { type: 'ephemeral', ttl: '1h' });
result.system = system;
slotsUsed++;
}

// Slot 3: last block of messages[0]
const msgs = result.messages;
if (slotsUsed < MAX_CACHE_BREAKPOINTS &&
Array.isArray(msgs) && msgs.length > 0 &&
Array.isArray(msgs[0].content) && msgs[0].content.length > 0) {
const content = [...msgs[0].content];
content[content.length - 1] = withCacheControl(content[content.length - 1], { type: 'ephemeral', ttl: '1h' });
const messages = [...msgs];
messages[0] = { ...msgs[0], content };
result.messages = messages;
slotsUsed++;
}

// Slot 4: last block of the last message (rolling tail)
// Only used when the last message is different from messages[0] (i.e. ≥2 messages).
if (slotsUsed < MAX_CACHE_BREAKPOINTS &&
Array.isArray(result.messages) && result.messages.length > 1) {
const messages = result.messages;
const lastMsg = messages[messages.length - 1];
if (Array.isArray(lastMsg.content) && lastMsg.content.length > 0) {
const content = [...lastMsg.content];
content[content.length - 1] = withCacheControl(
content[content.length - 1],
{ type: 'ephemeral', ttl: tailTtl }
);
const newMessages = [...messages];
newMessages[newMessages.length - 1] = { ...lastMsg, content };
result.messages = newMessages;
slotsUsed++;
}
}

return result;
}

// ── Feature 2: Upgrade existing ephemeral TTLs ────────────────────────────────

/**
* Upgrade any existing `{type: "ephemeral"}` cache breakpoints that lack a
* `ttl` field to use a 1-hour TTL — except for the rolling tail.
*
* The "rolling tail" is defined as the last cache_control block found in the
* `messages` array (scanning backwards). Because this breakpoint moves every
* turn it is kept at `tailTtl` to avoid paying the 2× cache-write surcharge
* on a breakpoint that never stabilises.
*
* Blocks that already have a `ttl` set are left unchanged.
*
* @param {object} body - Parsed /v1/messages request body
* @param {string} tailTtl - TTL for the rolling tail ('5m' | '1h')
* @returns {object} New body with upgraded ephemeral TTLs
*/
function upgradeEphemeralTtl(body, tailTtl = '5m') {
// Locate the rolling-tail position: last ephemeral cache_control in messages[]
let tailMsgIdx = -1;
let tailBlockIdx = -1;
if (Array.isArray(body.messages)) {
outer: for (let i = body.messages.length - 1; i >= 0; i--) {
const msg = body.messages[i];
if (!Array.isArray(msg.content)) continue;
for (let j = msg.content.length - 1; j >= 0; j--) {
const b = msg.content[j];
if (b && b.cache_control && b.cache_control.type === 'ephemeral') {
tailMsgIdx = i;
tailBlockIdx = j;
break outer;
}
}
}
}

let result = { ...body };

// Upgrade tools — these are always static, so always use 1h
if (Array.isArray(result.tools)) {
const tools = result.tools.map(tool => {
if (!tool.cache_control ||
tool.cache_control.type !== 'ephemeral' ||
tool.cache_control.ttl) return tool;
return withCacheControl(tool, { type: 'ephemeral', ttl: '1h' });
});
result.tools = tools;
}

// Upgrade system blocks — also static, always use 1h
if (Array.isArray(result.system)) {
const system = result.system.map(block => {
if (!block.cache_control ||
block.cache_control.type !== 'ephemeral' ||
block.cache_control.ttl) return block;
return withCacheControl(block, { type: 'ephemeral', ttl: '1h' });
});
result.system = system;
}

// Upgrade messages — tail keeps tailTtl; everything else gets 1h
if (Array.isArray(result.messages)) {
const messages = result.messages.map((msg, mi) => {
if (!Array.isArray(msg.content)) return msg;
const content = msg.content.map((block, bi) => {
if (!block ||
!block.cache_control ||
block.cache_control.type !== 'ephemeral' ||
block.cache_control.ttl) return block;
const isTail = (mi === tailMsgIdx && bi === tailBlockIdx);
return withCacheControl(block, { type: 'ephemeral', ttl: isTail ? tailTtl : '1h' });
});
return { ...msg, content };
});
result.messages = messages;
}
const path = require('path');

return result;
}
const { stripAnsi, applyAnsiStrip } = require('./transforms/ansi-strip');
const {
injectCacheBreakpoints,
upgradeEphemeralTtl,
MAX_CACHE_BREAKPOINTS,
EXTENDED_CACHE_BETA,
} = require('./transforms/cache-control');
const { buildToolScrubPattern, applyToolDrop } = require('./transforms/tool-drop');

Comment on lines +27 to 35
// ── Feature 5: Custom transform hook ─────────────────────────────────────────

Expand Down
Loading
Loading