diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c350de..d74fefc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.14.1] - 2026-04-09 + +### Fixed +- **Thinking configuration**: Use `--thinking adaptive` / `--thinking disabled` CLI flags instead of mapping to `--max-thinking-tokens`. Previously, `ThinkingConfigAdaptive` was mapped to `--max-thinking-tokens 32000` (fixed budget) and `ThinkingConfigDisabled` to `--max-thinking-tokens 0`, which put the CLI into the wrong mode. Only `ThinkingConfigEnabled` now uses `--max-thinking-tokens`. (Parity with [Python SDK #796](https://github.com/anthropics/claude-agent-sdk-python/pull/796)) + +### Added +- **`exclude_dynamic_sections`** on `SystemPromptPreset`: When set to `true`, the CLI strips per-user dynamic sections (working directory, auto-memory, git status) from the preset system prompt and re-injects them into the first user message. This makes the system prompt byte-identical across users, enabling cross-user prompt-caching hits. Sent via `excludeDynamicSections` in the initialize control message; older CLIs silently ignore it. (Parity with [Python SDK #797](https://github.com/anthropics/claude-agent-sdk-python/pull/797)) + ## [0.14.0] - 2026-04-08 — Python SDK v0.1.51–0.1.56 Parity ### Added diff --git a/README.md b/README.md index 2c4252d..ebeb26e 100644 --- a/README.md +++ b/README.md @@ -724,12 +724,12 @@ For complete examples, see [examples/structured_output_example.rb](examples/stru Control extended thinking behavior with typed configuration objects. The `thinking` option takes precedence over the deprecated `max_thinking_tokens`. ```ruby -# Adaptive thinking — uses a default budget of 32,000 tokens +# Adaptive thinking — CLI dynamically adjusts budget based on task complexity options = ClaudeAgentSDK::ClaudeAgentOptions.new( thinking: ClaudeAgentSDK::ThinkingConfigAdaptive.new ) -# Enabled thinking with custom budget +# Enabled thinking with explicit token budget options = ClaudeAgentSDK::ClaudeAgentOptions.new( thinking: ClaudeAgentSDK::ThinkingConfigEnabled.new(budget_tokens: 50_000) ) @@ -750,6 +750,22 @@ options = ClaudeAgentSDK::ClaudeAgentOptions.new( > **Note:** When `system_prompt` is `nil` (the default), the SDK passes `--system-prompt ""` to the CLI, which suppresses the default Claude Code system prompt. To use the default system prompt, use a `SystemPromptPreset`. +### Cross-User Prompt Caching + +When running a multi-user fleet with shared preset prompts, enable `exclude_dynamic_sections` to make the system prompt byte-identical across users for prompt-caching hits: + +```ruby +options = ClaudeAgentSDK::ClaudeAgentOptions.new( + system_prompt: ClaudeAgentSDK::SystemPromptPreset.new( + preset: 'claude_code', + append: '...your shared domain instructions...', + exclude_dynamic_sections: true + ) +) +``` + +When set, the CLI strips per-user dynamic sections (working directory, auto-memory, git status) from the system prompt and re-injects them into the first user message instead. Older CLIs silently ignore this option. + ## Budget Control Use `max_budget_usd` to set a spending cap for your queries: @@ -1565,9 +1581,9 @@ end | `PermissionResultAllow` | Permission callback result to allow tool use | | `PermissionResultDeny` | Permission callback result to deny tool use | | `AgentDefinition` | Agent definition with description, prompt, tools, model, skills, memory, mcp_servers | -| `ThinkingConfigAdaptive` | Adaptive thinking mode (32,000 token default budget) | +| `ThinkingConfigAdaptive` | Adaptive thinking mode (CLI dynamically adjusts budget) | | `ThinkingConfigEnabled` | Enabled thinking with explicit `budget_tokens` | -| `ThinkingConfigDisabled` | Disabled thinking (0 tokens) | +| `ThinkingConfigDisabled` | Disabled thinking | | `SdkMcpTool` | SDK MCP tool definition with name, description, input_schema, handler, annotations | | `McpStdioServerConfig` | MCP server config for stdio transport | | `McpSSEServerConfig` | MCP server config for SSE transport | diff --git a/lib/claude_agent_sdk.rb b/lib/claude_agent_sdk.rb index ea44ddc..17d4c57 100644 --- a/lib/claude_agent_sdk.rb +++ b/lib/claude_agent_sdk.rb @@ -340,6 +340,10 @@ def connect(prompt = nil) # Convert hooks to internal format hooks = convert_hooks_to_internal_format(configured_options.hooks) if configured_options.hooks + # Extract exclude_dynamic_sections from preset system prompt for the + # initialize request (older CLIs ignore unknown initialize fields) + exclude_dynamic_sections = extract_exclude_dynamic_sections(configured_options.system_prompt) + # Create Query handler @query_handler = Query.new( transport: @transport, @@ -347,7 +351,8 @@ def connect(prompt = nil) can_use_tool: configured_options.can_use_tool, hooks: hooks, sdk_mcp_servers: sdk_mcp_servers, - agents: configured_options.agents + agents: configured_options.agents, + exclude_dynamic_sections: exclude_dynamic_sections ) # Start query handler and initialize @@ -527,5 +532,19 @@ def convert_hooks_to_internal_format(hooks) end internal_hooks end + + def extract_exclude_dynamic_sections(system_prompt) + if system_prompt.is_a?(SystemPromptPreset) + eds = system_prompt.exclude_dynamic_sections + return eds if [true, false].include?(eds) + elsif system_prompt.is_a?(Hash) + type = system_prompt[:type] || system_prompt['type'] + if type == 'preset' + eds = system_prompt.fetch(:exclude_dynamic_sections) { system_prompt['exclude_dynamic_sections'] } + return eds if [true, false].include?(eds) + end + end + nil + end end end diff --git a/lib/claude_agent_sdk/query.rb b/lib/claude_agent_sdk/query.rb index 4a190e3..060f5e7 100644 --- a/lib/claude_agent_sdk/query.rb +++ b/lib/claude_agent_sdk/query.rb @@ -25,13 +25,15 @@ class Query STREAM_CLOSE_TIMEOUT_ENV_VAR = 'CLAUDE_CODE_STREAM_CLOSE_TIMEOUT' DEFAULT_STREAM_CLOSE_TIMEOUT_SECONDS = 60.0 - def initialize(transport:, is_streaming_mode:, can_use_tool: nil, hooks: nil, sdk_mcp_servers: nil, agents: nil) + def initialize(transport:, is_streaming_mode:, can_use_tool: nil, hooks: nil, sdk_mcp_servers: nil, agents: nil, + exclude_dynamic_sections: nil) @transport = transport @is_streaming_mode = is_streaming_mode @can_use_tool = can_use_tool @hooks = hooks || {} @sdk_mcp_servers = sdk_mcp_servers || {} @agents = agents + @exclude_dynamic_sections = exclude_dynamic_sections # Control protocol state @pending_control_responses = {} @@ -109,6 +111,7 @@ def initialize_protocol hooks: hooks_config.empty? ? nil : hooks_config, agents: agents_dict } + request[:excludeDynamicSections] = @exclude_dynamic_sections unless @exclude_dynamic_sections.nil? response = send_control_request(request) @initialized = true diff --git a/lib/claude_agent_sdk/subprocess_cli_transport.rb b/lib/claude_agent_sdk/subprocess_cli_transport.rb index 2cf8f0d..ffd01bf 100644 --- a/lib/claude_agent_sdk/subprocess_cli_transport.rb +++ b/lib/claude_agent_sdk/subprocess_cli_transport.rb @@ -120,8 +120,7 @@ def build_command end # Thinking configuration (takes precedence over deprecated max_thinking_tokens) - thinking_tokens = resolve_thinking_tokens - cmd.concat(['--max-thinking-tokens', thinking_tokens.to_s]) unless thinking_tokens.nil? + build_thinking_args(cmd) # Effort level (valid values: low, medium, high, max) cmd.concat(['--effort', @options.effort.to_s]) if @options.effort @@ -494,8 +493,6 @@ def ready? @ready end - DEFAULT_ADAPTIVE_THINKING_TOKENS = 32_000 - private def build_settings_args(cmd) @@ -599,18 +596,18 @@ def load_settings_file(path) JSON.parse(File.read(path)) end - def resolve_thinking_tokens + def build_thinking_args(cmd) if @options.thinking case @options.thinking when ThinkingConfigAdaptive - DEFAULT_ADAPTIVE_THINKING_TOKENS + cmd.concat(['--thinking', 'adaptive']) when ThinkingConfigEnabled - @options.thinking.budget_tokens + cmd.concat(['--max-thinking-tokens', @options.thinking.budget_tokens.to_s]) when ThinkingConfigDisabled - 0 + cmd.concat(['--thinking', 'disabled']) end elsif @options.max_thinking_tokens - @options.max_thinking_tokens + cmd.concat(['--max-thinking-tokens', @options.max_thinking_tokens.to_s]) end end end diff --git a/lib/claude_agent_sdk/types.rb b/lib/claude_agent_sdk/types.rb index 5892c3e..64a3155 100644 --- a/lib/claude_agent_sdk/types.rb +++ b/lib/claude_agent_sdk/types.rb @@ -1735,17 +1735,19 @@ def to_h # System prompt preset configuration class SystemPromptPreset - attr_accessor :type, :preset, :append + attr_accessor :type, :preset, :append, :exclude_dynamic_sections - def initialize(preset:, append: nil) + def initialize(preset:, append: nil, exclude_dynamic_sections: nil) @type = 'preset' @preset = preset @append = append + @exclude_dynamic_sections = exclude_dynamic_sections end def to_h result = { type: @type, preset: @preset } result[:append] = @append if @append + result[:exclude_dynamic_sections] = @exclude_dynamic_sections unless @exclude_dynamic_sections.nil? result end end diff --git a/lib/claude_agent_sdk/version.rb b/lib/claude_agent_sdk/version.rb index 51c1c31..a7dc44c 100644 --- a/lib/claude_agent_sdk/version.rb +++ b/lib/claude_agent_sdk/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module ClaudeAgentSDK - VERSION = '0.14.0' + VERSION = '0.14.1' end diff --git a/plugins/claude-agent-ruby/skills/claude-agent-ruby/references/options.md b/plugins/claude-agent-ruby/skills/claude-agent-ruby/references/options.md index 56e9589..cb1597b 100644 --- a/plugins/claude-agent-ruby/skills/claude-agent-ruby/references/options.md +++ b/plugins/claude-agent-ruby/skills/claude-agent-ruby/references/options.md @@ -22,14 +22,14 @@ Notes: ## Core knobs -- `system_prompt`: Set an overall instruction as a string, or use `ClaudeAgentSDK::SystemPromptPreset.new(preset: 'claude_code', append: '...')` to extend a preset prompt. +- `system_prompt`: Set an overall instruction as a string, use `ClaudeAgentSDK::SystemPromptPreset.new(preset: 'claude_code', append: '...', exclude_dynamic_sections: true)` to extend a preset (with optional cross-user caching), or use `ClaudeAgentSDK::SystemPromptFile.new(path: '/path/to/prompt.txt')` to load from a file. - `model`: Select the model. - `fallback_model`: Use when the primary model is unavailable. - `max_turns`: Cap the number of turns. - `max_budget_usd`: Cap total spend (USD). - `include_partial_messages`: Include partial assistant messages in the stream when supported. - `cwd`: Run Claude Code in a specific working directory. -- `max_thinking_tokens`: Stored for API parity, but not currently passed through to Claude CLI. +- `max_thinking_tokens`: Deprecated — use `thinking:` instead (`ThinkingConfigAdaptive`, `ThinkingConfigEnabled`, or `ThinkingConfigDisabled`). Falls back to `--max-thinking-tokens` when `thinking` is unset. ## Tools and permissions diff --git a/skills/references/options.md b/skills/references/options.md index 646eee6..b212412 100644 --- a/skills/references/options.md +++ b/skills/references/options.md @@ -22,7 +22,7 @@ Notes: ## Core knobs -- `system_prompt`: Set an overall instruction as a string, use `ClaudeAgentSDK::SystemPromptPreset.new(preset: 'claude_code', append: '...')` to extend a preset, or use `ClaudeAgentSDK::SystemPromptFile.new(path: '/path/to/prompt.txt')` to load from a file. +- `system_prompt`: Set an overall instruction as a string, use `ClaudeAgentSDK::SystemPromptPreset.new(preset: 'claude_code', append: '...', exclude_dynamic_sections: true)` to extend a preset (with optional cross-user caching), or use `ClaudeAgentSDK::SystemPromptFile.new(path: '/path/to/prompt.txt')` to load from a file. - `model`: Select the model. - `fallback_model`: Use when the primary model is unavailable. - `max_turns`: Cap the number of turns. @@ -31,7 +31,7 @@ Notes: - `session_id`: Specify a custom session ID upfront (string). - `include_partial_messages`: Include partial assistant messages in the stream when supported. - `cwd`: Run Claude Code in a specific working directory. -- `max_thinking_tokens`: Stored for API parity, but not currently passed through to Claude CLI. +- `max_thinking_tokens`: Deprecated — use `thinking:` instead (`ThinkingConfigAdaptive`, `ThinkingConfigEnabled`, or `ThinkingConfigDisabled`). Falls back to `--max-thinking-tokens` when `thinking` is unset. ## Tools and permissions diff --git a/spec/unit/client_spec.rb b/spec/unit/client_spec.rb index 9db059a..09a4c5b 100644 --- a/spec/unit/client_spec.rb +++ b/spec/unit/client_spec.rb @@ -276,6 +276,76 @@ def build_transport_class(&on_initialize) end end + context 'with exclude_dynamic_sections' do + let(:transport) { instance_double(ClaudeAgentSDK::SubprocessCLITransport, connect: true, write: nil) } + let(:query_handler) { instance_double(ClaudeAgentSDK::Query, start: true, initialize_protocol: true) } + + before do + allow(ClaudeAgentSDK::SubprocessCLITransport).to receive(:new).and_return(transport) + end + + it 'passes exclude_dynamic_sections from SystemPromptPreset to Query' do + received_kwargs = nil + allow(ClaudeAgentSDK::Query).to receive(:new) do |**kwargs| + received_kwargs = kwargs + query_handler + end + + preset = ClaudeAgentSDK::SystemPromptPreset.new(preset: 'claude_code', exclude_dynamic_sections: true) + options = ClaudeAgentSDK::ClaudeAgentOptions.new(system_prompt: preset) + client = described_class.new(options: options) + client.connect + + expect(received_kwargs[:exclude_dynamic_sections]).to eq(true) + end + + it 'passes exclude_dynamic_sections from Hash with symbol keys to Query' do + received_kwargs = nil + allow(ClaudeAgentSDK::Query).to receive(:new) do |**kwargs| + received_kwargs = kwargs + query_handler + end + + options = ClaudeAgentSDK::ClaudeAgentOptions.new( + system_prompt: { type: 'preset', preset: 'claude_code', exclude_dynamic_sections: true } + ) + client = described_class.new(options: options) + client.connect + + expect(received_kwargs[:exclude_dynamic_sections]).to eq(true) + end + + it 'handles false correctly from Hash with symbol keys' do + received_kwargs = nil + allow(ClaudeAgentSDK::Query).to receive(:new) do |**kwargs| + received_kwargs = kwargs + query_handler + end + + options = ClaudeAgentSDK::ClaudeAgentOptions.new( + system_prompt: { type: 'preset', preset: 'claude_code', exclude_dynamic_sections: false } + ) + client = described_class.new(options: options) + client.connect + + expect(received_kwargs[:exclude_dynamic_sections]).to eq(false) + end + + it 'passes nil when system_prompt is a plain string' do + received_kwargs = nil + allow(ClaudeAgentSDK::Query).to receive(:new) do |**kwargs| + received_kwargs = kwargs + query_handler + end + + options = ClaudeAgentSDK::ClaudeAgentOptions.new(system_prompt: 'You are a helper') + client = described_class.new(options: options) + client.connect + + expect(received_kwargs[:exclude_dynamic_sections]).to be_nil + end + end + context 'with default configuration' do after { ClaudeAgentSDK.reset_configuration } diff --git a/spec/unit/subprocess_cli_transport_spec.rb b/spec/unit/subprocess_cli_transport_spec.rb index 1d750b8..ef39f52 100644 --- a/spec/unit/subprocess_cli_transport_spec.rb +++ b/spec/unit/subprocess_cli_transport_spec.rb @@ -80,7 +80,7 @@ expect(cmd).not_to include('--agents') end - it 'passes --max-thinking-tokens for ThinkingConfigAdaptive' do + it 'passes --thinking adaptive for ThinkingConfigAdaptive' do options = ClaudeAgentSDK::ClaudeAgentOptions.new( cli_path: '/usr/bin/claude', thinking: ClaudeAgentSDK::ThinkingConfigAdaptive.new @@ -89,9 +89,10 @@ transport = described_class.new('hi', options) cmd = transport.build_command - idx = cmd.index('--max-thinking-tokens') + idx = cmd.index('--thinking') expect(idx).not_to be_nil - expect(cmd[idx + 1]).to eq('32000') + expect(cmd[idx + 1]).to eq('adaptive') + expect(cmd).not_to include('--max-thinking-tokens') end it 'passes --max-thinking-tokens for ThinkingConfigEnabled' do @@ -106,9 +107,10 @@ idx = cmd.index('--max-thinking-tokens') expect(idx).not_to be_nil expect(cmd[idx + 1]).to eq('50000') + expect(cmd).not_to include('--thinking') end - it 'passes --max-thinking-tokens 0 for ThinkingConfigDisabled' do + it 'passes --thinking disabled for ThinkingConfigDisabled' do options = ClaudeAgentSDK::ClaudeAgentOptions.new( cli_path: '/usr/bin/claude', thinking: ClaudeAgentSDK::ThinkingConfigDisabled.new @@ -117,24 +119,26 @@ transport = described_class.new('hi', options) cmd = transport.build_command - idx = cmd.index('--max-thinking-tokens') + idx = cmd.index('--thinking') expect(idx).not_to be_nil - expect(cmd[idx + 1]).to eq('0') + expect(cmd[idx + 1]).to eq('disabled') + expect(cmd).not_to include('--max-thinking-tokens') end it 'thinking takes precedence over deprecated max_thinking_tokens' do options = ClaudeAgentSDK::ClaudeAgentOptions.new( cli_path: '/usr/bin/claude', - thinking: ClaudeAgentSDK::ThinkingConfigEnabled.new(budget_tokens: 10_000), + thinking: ClaudeAgentSDK::ThinkingConfigAdaptive.new, max_thinking_tokens: 99_999 ) transport = described_class.new('hi', options) cmd = transport.build_command - idx = cmd.index('--max-thinking-tokens') + idx = cmd.index('--thinking') expect(idx).not_to be_nil - expect(cmd[idx + 1]).to eq('10000') + expect(cmd[idx + 1]).to eq('adaptive') + expect(cmd).not_to include('--max-thinking-tokens') end it 'falls back to max_thinking_tokens when thinking is nil' do diff --git a/spec/unit/types_spec.rb b/spec/unit/types_spec.rb index ebbea04..8e57d30 100644 --- a/spec/unit/types_spec.rb +++ b/spec/unit/types_spec.rb @@ -1318,6 +1318,25 @@ expect(hash.key?(:append)).to eq(false) end + + it 'stores exclude_dynamic_sections' do + preset = described_class.new(preset: 'claude_code', exclude_dynamic_sections: true) + expect(preset.exclude_dynamic_sections).to eq(true) + end + + it 'includes exclude_dynamic_sections in to_h when set' do + preset = described_class.new(preset: 'claude_code', exclude_dynamic_sections: true) + hash = preset.to_h + + expect(hash[:exclude_dynamic_sections]).to eq(true) + end + + it 'omits exclude_dynamic_sections from to_h when nil' do + preset = described_class.new(preset: 'claude_code') + hash = preset.to_h + + expect(hash.key?(:exclude_dynamic_sections)).to eq(false) + end end describe 'ClaudeAgentOptions new options' do