PredictabilityAtScale · PredictabilityAtScale · Apr 24, 2026 · Apr 24, 2026 · chatgpt-codex-connector · Apr 24, 2026
diff --git a/README.md b/README.md
@@ -136,6 +136,7 @@ Supported values for `warnings.contextSize` are `auto`, `off`, `result-only`, `c
 - **Folder defaults** — `defaults.md` inheritance for shared provider, model, metadata, and system instructions
 - **Overrides** — Environment and tier-based overrides (base → env → tier → runtime)
 - **4 provider adapters** — OpenAI, Anthropic, Gemini, OpenRouter — body-only output
+- **Provider-aware input caching controls** — optional `cache` front matter maps to OpenAI prompt cache hints, Anthropic `cache_control`, and Gemini `cachedContent`
 - **Validation** — Zod schema validation, Levenshtein-based "did you mean?" for typos, variable usage checks
 - **Context hardening** — structured regexes with flags, `/pattern/i` convenience syntax, and built-in `non_empty` / `reject_secrets` validators
 - **Optional short-circuit messages** — validators can return a structured `returnMessage` instead of throwing when configured

diff --git a/docs/prompt-format.md b/docs/prompt-format.md
@@ -54,6 +54,7 @@ Supported default fields:
 
 - `provider` (front matter) — default provider for the folder
 - `model` (front matter) — default model for the folder
+- `cache` (front matter) — default provider-specific caching hints
 - `metadata` (front matter) — merged with prompt-local metadata
 - `# System instructions` (body section) — used when the prompt has none
 
@@ -75,6 +76,10 @@ prompts/
 ---
 provider: openai
 model: gpt-5.4
+cache:
+  openai:
+    prompt_cache_key: support-v1
+    retention: in_memory
 metadata:
   owner: platform
   review_required: true
@@ -101,10 +106,32 @@ Use support tone and escalation policy.
 `prompts/support/reply.md` (no local `metadata.owner` and no local system section) will use:
 - `provider: openai` (inherited from root defaults)
 - `model: gpt-5.4` (inherited from root defaults)
+- `cache.openai.prompt_cache_key: support-v1` (inherited from root defaults)
 - `metadata.owner: support` (nearest override)
 - `metadata.review_required: true` (inherited from parent defaults)
 - system instructions from `support/defaults.md`
 
+## Caching configuration
+
+Use the optional `cache` front matter block to pass vendor-specific caching hints:
+
+```yaml
+cache:
+  openai:
+    prompt_cache_key: support-v2
+    retention: 24h
+  anthropic:
+    mode: automatic
+    ttl: 5m
+  gemini:
+    cached_content: cachedContents/1234567890
+```
+
+- `openai.prompt_cache_key` and `openai.retention` map to OpenAI prompt caching fields.
+- `anthropic.mode: automatic` sets top-level `cache_control`; `explicit` applies block-level cache controls to configured sections/tools.
+- `gemini.cached_content` (or `google.cached_content`) maps to `cachedContent` for requests that reuse a previously created Gemini cache.
+- You can safely include multiple provider blocks in the same prompt. Each adapter only reads its own block (`openai`, `anthropic`, or `gemini`/`google`) and ignores the others.
+
 ## Sections
 
 The Markdown body is split on **H1 headings** into named sections. Three section names are recognized (case-insensitive):

diff --git a/docs/providers.md b/docs/providers.md
@@ -36,6 +36,7 @@ const { request } = result;
 ```
 
 The provider passed to `renderPrompt` determines which adapter shapes the body. The `provider` field in front matter is informational — the render-time provider controls output.
+When a prompt includes multiple cache blocks (for example `cache.openai` + `cache.anthropic`), adapters ignore non-matching blocks so cross-provider settings never leak into the wrong payload.
 
 ## Direct adapter imports
 
@@ -208,10 +209,17 @@ Field mapping:
 | `reasoning.effort` | `reasoning_effort` |
 | `response.format: json` | `response_format: { type: "json_object" }` |
 | `response.stream` | `stream` |
+| `cache.openai.prompt_cache_key` | `prompt_cache_key` |
+| `cache.openai.retention` | `prompt_cache_retention` |
 
 Warnings:
 - `reasoning.budget_tokens` is ignored (OpenAI uses `reasoning_effort` instead)
 
+Caching notes:
+- Prompt caching is already automatic for eligible OpenAI requests.
+- `cache.openai.prompt_cache_key` helps route similar prefixes together.
+- `cache.openai.retention` can be `in_memory` (default) or `24h`.
+
 ## Anthropic
 
 Body shape: [Messages API](https://docs.anthropic.com/en/api/messages)
@@ -233,6 +241,9 @@ Key differences from OpenAI:
 - `max_tokens` is **required** — defaults to `4096` if `sampling.max_output_tokens` is not set.
 - `sampling.stop` maps to `stop_sequences`.
 - `reasoning.budget_tokens` maps to `thinking: { type: "enabled", budget_tokens }`.
+- `cache.anthropic.mode: automatic` maps to top-level `cache_control`.
+- `cache.anthropic.mode: explicit` applies `cache_control` at block level for selected sections/tools.
+- `cache.anthropic.ttl` supports `5m` (default) or `1h`.
 
 Warnings:
 - `frequency_penalty` and `presence_penalty` are not supported — ignored with a warning.
@@ -266,6 +277,7 @@ Key differences:
 - `top_p` maps to `topP`, `max_output_tokens` maps to `maxOutputTokens`, `stop` maps to `stopSequences`.
 - `response.format: json` maps to `generationConfig.responseMimeType: "application/json"`.
 - `reasoning.effort` maps to `thinkingConfig.thinkingBudget` (high=8192, medium=4096, low=1024).
+- `cache.gemini.cached_content` (or `cache.google.cached_content`) maps to top-level `cachedContent`.
 
 Warnings:
 - `frequency_penalty` and `presence_penalty` are not supported — ignored with a warning.

diff --git a/docs/schema.md b/docs/schema.md
@@ -15,6 +15,7 @@ Prompt files use YAML front matter. This page documents every supported field.
 | `reasoning` | `object` | No | Reasoning/thinking configuration |
 | `sampling` | `object` | No | Sampling parameters |
 | `response` | `object` | No | Response format and streaming |
+| `cache` | `object` | No | Provider-specific prompt/context caching options |
 | `tools` | `array` | No | Tool references (strings or inline definitions) |
 | `mcp` | `object` | No | MCP server references |
 | `context` | `object` | No | Declare expected variables and history settings |
@@ -31,6 +32,7 @@ Prompt files use YAML front matter. This page documents every supported field.
 |-------|------|-------------|
 | `provider` | `enum` | Default provider (`openai`, `anthropic`, `google`, `gemini`, `openrouter`, `any`) |
 | `model` | `string` | Default model identifier |
+| `cache` | `object` | Same as prompt-level `cache` block |
 | `metadata` | `object` | Same as the prompt `metadata` block (`owner`, `tags`, `review_required`, `stable`) |
 | `# System instructions` | section | System instructions inherited by prompts in this folder |
 
@@ -114,6 +116,37 @@ Inline tool definition fields:
 | `description` | `string` | No | Tool description |
 | `input_schema` | `object` | No | JSON Schema for tool input |
 
+## `cache`
+
+```yaml
+cache:
+  openai:
+    prompt_cache_key: support-v1
+    retention: in_memory   # in_memory | 24h
+  anthropic:
+    mode: automatic        # automatic | explicit
+    ttl: 5m                # 5m | 1h
+    cache_system_instructions: true
+    cache_tools: true
+    cache_prompt_template: false
+  gemini:
+    cached_content: cachedContents/1234567890
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `openai.prompt_cache_key` | `string` | Optional routing key to improve cache-hit locality on shared prefixes |
+| `openai.retention` | `'in_memory' \| '24h'` | Prompt cache retention policy |
+| `anthropic.mode` | `'automatic' \| 'explicit'` | Automatic top-level caching or explicit block-level cache breakpoints |
+| `anthropic.type` | `'ephemeral'` | Cache type (currently only `ephemeral`) |
+| `anthropic.ttl` | `'5m' \| '1h'` | Anthropic cache duration |
+| `anthropic.cache_system_instructions` | `boolean` | In explicit mode, cache system instructions block |
+| `anthropic.cache_tools` | `boolean` | In explicit mode, cache tool declarations |
+| `anthropic.cache_prompt_template` | `boolean` | In explicit mode, cache prompt-template user block |
+| `gemini.cached_content` / `google.cached_content` | `string` | Previously created Gemini cache resource name used as `cachedContent` |
+
+You can define multiple provider cache blocks in one prompt; each adapter reads only its own cache settings.
+
 ## `mcp`
 
 ```yaml
@@ -190,7 +223,7 @@ tiers:
     model: gpt-5.4
 ```
 
-Each environment/tier key maps to an overrides object. Overridable fields: `model`, `fallback_models`, `reasoning`, `sampling`, `response`, `tools`. See [Overrides](./overrides.md).
+Each environment/tier key maps to an overrides object. Overridable fields: `model`, `fallback_models`, `reasoning`, `sampling`, `response`, `cache`, `tools`. See [Overrides](./overrides.md).
 
 ## `metadata`
 

diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts
@@ -22,6 +22,11 @@ context:
     - app_context
 includes:
   - ./shared/tone.md
+cache:
+  openai:
+    # Keep this stable across requests that share a long static prefix.
+    prompt_cache_key: hello-v1
+    retention: in_memory
 reasoning:
   effort: high
 environments:

diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts
@@ -46,6 +46,15 @@ export const anthropicAdapter: ProviderAdapter = withPromptInputSupport({
     });
 
     const messages: Array<Record<string, unknown>> = [];
+    const anthropicCacheConfig = resolvedAsset.cache?.anthropic;
+    const cacheType = anthropicCacheConfig?.type ?? 'ephemeral';
+    const cacheControl = anthropicCacheConfig
+      ? {
+        type: cacheType,
+        ...(anthropicCacheConfig.ttl ? { ttl: anthropicCacheConfig.ttl } : {}),
+      }
+      : undefined;
+    const cacheMode = anthropicCacheConfig?.mode ?? 'automatic';
 
     // History
     if (runtime.history) {
@@ -56,7 +65,14 @@ export const anthropicAdapter: ProviderAdapter = withPromptInputSupport({
 
     // User message (prompt template)
     if (sections.prompt_template) {
-      messages.push({ role: 'user', content: sections.prompt_template });
+      if (cacheControl && cacheMode === 'explicit' && anthropicCacheConfig?.cache_prompt_template) {
+        messages.push({
+          role: 'user',
+          content: [{ type: 'text', text: sections.prompt_template, cache_control: cacheControl }],
+        });
+      } else {
+        messages.push({ role: 'user', content: sections.prompt_template });
+      }
     }
 
     const body: Record<string, unknown> = {
@@ -66,7 +82,11 @@ export const anthropicAdapter: ProviderAdapter = withPromptInputSupport({
 
     // System goes as top-level field in Anthropic
     if (sections.system_instructions) {
-      body.system = sections.system_instructions;
+      if (cacheControl && cacheMode === 'explicit' && anthropicCacheConfig?.cache_system_instructions !== false) {
+        body.system = [{ type: 'text', text: sections.system_instructions, cache_control: cacheControl }];
+      } else {
+        body.system = sections.system_instructions;
+      }
     }
 
     // Sampling params
@@ -93,18 +113,35 @@ export const anthropicAdapter: ProviderAdapter = withPromptInputSupport({
       body.stream = resolvedAsset.response.stream;
     }
 
+    if (cacheControl && cacheMode === 'automatic') {
+      body.cache_control = cacheControl;
+    }
+
     // Tools
     if (resolvedAsset.tools && resolvedAsset.tools.length > 0) {
       body.tools = resolvedAsset.tools.map((tool) => {
         if (typeof tool === 'string') {
           const def = runtime.toolRegistry?.[tool];
-          if (def) return def;
-          return { name: tool };
+          if (def) {
+            if (cacheControl && cacheMode === 'explicit' && anthropicCacheConfig?.cache_tools) {
+              return { ...(def as Record<string, unknown>), cache_control: cacheControl };
+            }
+            return def;
+          }
+          return {
+            name: tool,
+            ...(cacheControl && cacheMode === 'explicit' && anthropicCacheConfig?.cache_tools
+              ? { cache_control: cacheControl }
+              : {}),
+          };
         }
         return {
           name: tool.name,
           description: tool.description,
           input_schema: tool.input_schema ?? { type: 'object', properties: {} },
+          ...(cacheControl && cacheMode === 'explicit' && anthropicCacheConfig?.cache_tools
+            ? { cache_control: cacheControl }
+            : {}),
         };
       });
     }

diff --git a/src/providers/gemini.ts b/src/providers/gemini.ts
@@ -20,6 +20,8 @@ export const geminiAdapter: ProviderAdapter = withPromptInputSupport({
     const resolvedAsset = resolveAssetForProvider(asset, runtime);
     const errors: string[] = [];
     const warnings: string[] = [];
+    const geminiCache = resolvedAsset.cache?.gemini?.cached_content;
+    const googleCache = resolvedAsset.cache?.google?.cached_content;
 
     if (!resolvedAsset.model) {
       errors.push('Gemini adapter requires a model to be specified.');
@@ -31,6 +33,9 @@ export const geminiAdapter: ProviderAdapter = withPromptInputSupport({
     if (resolvedAsset.sampling?.presence_penalty !== undefined) {
       warnings.push('Gemini does not support presence_penalty. It will be ignored.');
     }
+    if (geminiCache && googleCache && geminiCache !== googleCache) {
+      warnings.push('Both cache.gemini.cached_content and cache.google.cached_content are set. Gemini uses cache.gemini.cached_content.');
+    }
 
     return { valid: errors.length === 0, errors, warnings };
   },
@@ -65,6 +70,7 @@ export const geminiAdapter: ProviderAdapter = withPromptInputSupport({
     const body: Record<string, unknown> = {
       contents,
     };
+    const geminiCacheConfig = resolvedAsset.cache?.gemini ?? resolvedAsset.cache?.google;
 
     // System instruction
     if (sections.system_instructions) {
@@ -96,6 +102,10 @@ export const geminiAdapter: ProviderAdapter = withPromptInputSupport({
       body.generationConfig = generationConfig;
     }
 
+    if (geminiCacheConfig?.cached_content) {
+      body.cachedContent = geminiCacheConfig.cached_content;
+    }
+
     // Tools
     if (resolvedAsset.tools && resolvedAsset.tools.length > 0) {
       const functionDeclarations = resolvedAsset.tools.map((tool) => {

diff --git a/src/providers/openai.ts b/src/providers/openai.ts
@@ -62,6 +62,7 @@ export const openaiAdapter: ProviderAdapter = withPromptInputSupport({
       model: resolvedAsset.model,
       messages,
     };
+    const openaiCacheConfig = resolvedAsset.cache?.openai;
 
     // Sampling params
     if (resolvedAsset.sampling?.temperature !== undefined) body.temperature = resolvedAsset.sampling.temperature;
@@ -86,6 +87,13 @@ export const openaiAdapter: ProviderAdapter = withPromptInputSupport({
       body.stream = resolvedAsset.response.stream;
     }
 
+    if (openaiCacheConfig?.prompt_cache_key) {
+      body.prompt_cache_key = openaiCacheConfig.prompt_cache_key;
+    }
+    if (openaiCacheConfig?.retention) {
+      body.prompt_cache_retention = openaiCacheConfig.retention;
+    }
+
     // Tools
     if (resolvedAsset.tools && resolvedAsset.tools.length > 0) {
       body.tools = resolvedAsset.tools.map((tool) => {

diff --git a/src/schema/index.ts b/src/schema/index.ts
@@ -4,6 +4,10 @@ export {
   ReasoningSchema,
   SamplingSchema,
   ResponseSchema,
+  CacheSchema,
+  OpenAICacheSchema,
+  AnthropicCacheSchema,
+  GeminiCacheSchema,
   ContextSchema,
   ContextInputDefinitionSchema,
   ContextInputDefinitionObjectSchema,

diff --git a/src/schema/schema.ts b/src/schema/schema.ts
@@ -49,6 +49,33 @@ export const ResponseSchema = z.object({
   stream: z.boolean().optional(),
 });
 
+// --- Cache controls ---
+
+export const OpenAICacheSchema = z.object({
+  prompt_cache_key: z.string().min(1).optional(),
+  retention: z.enum(['in_memory', '24h']).optional(),
+});
+
+export const AnthropicCacheSchema = z.object({
+  mode: z.enum(['automatic', 'explicit']).optional(),
+  type: z.literal('ephemeral').optional(),
+  ttl: z.enum(['5m', '1h']).optional(),
+  cache_system_instructions: z.boolean().optional(),
+  cache_tools: z.boolean().optional(),
+  cache_prompt_template: z.boolean().optional(),
+});
+
+export const GeminiCacheSchema = z.object({
+  cached_content: z.string().min(1).optional(),
+});
+
+export const CacheSchema = z.object({
+  openai: OpenAICacheSchema.optional(),
+  anthropic: AnthropicCacheSchema.optional(),
+  gemini: GeminiCacheSchema.optional(),
+  google: GeminiCacheSchema.optional(),
+});
+
 // --- Context ---
 
 export const HistorySchema = z.object({
@@ -118,6 +145,7 @@ export const PromptAssetOverridesSchema = z.object({
   reasoning: ReasoningSchema.optional(),
   sampling: SamplingSchema.optional(),
   response: ResponseSchema.optional(),
+  cache: CacheSchema.optional(),
   tools: z.array(ToolRefSchema).optional(),
 });
 
@@ -143,6 +171,7 @@ export const SectionsSchema = z.object({
 export const PromptDefaultsSchema = z.object({
   provider: z.enum(['openai', 'anthropic', 'google', 'gemini', 'openrouter', 'any']).optional(),
   model: z.string().optional(),
+  cache: CacheSchema.optional(),
   metadata: MetadataSchema.optional(),
   sections: z.object({
     system_instructions: z.string().optional(),
@@ -165,6 +194,7 @@ export const PromptAssetSchema = z.object({
   reasoning: ReasoningSchema.optional(),
   sampling: SamplingSchema.optional(),
   response: ResponseSchema.optional(),
+  cache: CacheSchema.optional(),
 
   tools: z.array(ToolRefSchema).optional(),
   mcp: MCPSchema.optional(),

diff --git a/src/validation/validate.ts b/src/validation/validate.ts
@@ -26,7 +26,7 @@ export interface PromptValidationResult {
 const KNOWN_FRONT_MATTER_KEYS = new Set([
   'id', 'schema_version', 'description', 'provider', 'model', 'fallback_models',
   'reasoning', 'sampling', 'response', 'tools', 'mcp', 'context', 'includes',
-  'environments', 'tiers', 'metadata',
+  'environments', 'tiers', 'metadata', 'cache',
 ]);
 
 /**