From 7f8e3ca50d39c028045af74ba2d06a30de115689 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 16:11:24 +0000 Subject: [PATCH 01/11] Add Apple Intelligence provider for on-device inference via osx-ai-inloop Introduces a new RubyLLM provider that pipes requests through the osx-ai-inloop binary via stdin/stdout, completely bypassing HTTP/Faraday. - Auto-downloads pre-built arm64 binary on first use (~/.ruby_llm/bin/) - Validates macOS + Apple Silicon platform at init time - Maps binary exit codes (1-5) to RubyLLM error classes - Supports conversation history via the binary's input field - Configurable binary path via apple_intelligence_binary_path - Registered as :apple_intelligence with local? => true Usage: chat = RubyLLM.chat(model: 'apple-intelligence', provider: :apple_intelligence) response = chat.ask('What is Ruby?') puts response.content --- lib/ruby_llm.rb | 2 + lib/ruby_llm/providers/apple_intelligence.rb | 49 +++++++ .../apple_intelligence/binary_manager.rb | 60 +++++++++ .../apple_intelligence/capabilities.rb | 20 +++ .../providers/apple_intelligence/chat.rb | 124 ++++++++++++++++++ .../providers/apple_intelligence/models.rb | 38 ++++++ 6 files changed, 293 insertions(+) create mode 100644 lib/ruby_llm/providers/apple_intelligence.rb create mode 100644 lib/ruby_llm/providers/apple_intelligence/binary_manager.rb create mode 100644 lib/ruby_llm/providers/apple_intelligence/capabilities.rb create mode 100644 lib/ruby_llm/providers/apple_intelligence/chat.rb create mode 100644 lib/ruby_llm/providers/apple_intelligence/models.rb diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb index 87bc94c9d..1332b286a 100644 --- a/lib/ruby_llm.rb +++ b/lib/ruby_llm.rb @@ -15,6 +15,7 @@ loader = Zeitwerk::Loader.for_gem loader.inflector.inflect( + 'apple_intelligence' => 'AppleIntelligence', 'azure' => 'Azure', 'UI' => 'UI', 'api' => 'API', @@ -93,6 +94,7 @@ def logger end end +RubyLLM::Provider.register :apple_intelligence, RubyLLM::Providers::AppleIntelligence RubyLLM::Provider.register :anthropic, RubyLLM::Providers::Anthropic RubyLLM::Provider.register :azure, RubyLLM::Providers::Azure RubyLLM::Provider.register :bedrock, RubyLLM::Providers::Bedrock diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb new file mode 100644 index 000000000..d91916de0 --- /dev/null +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + # Apple Intelligence provider — pipes requests through the osx-ai-inloop + # binary via stdin/stdout, completely bypassing HTTP/Faraday. + class AppleIntelligence < Provider + include AppleIntelligence::Chat + include AppleIntelligence::Models + + def initialize(config) + @config = config + @connection = nil + end + + def api_base + nil + end + + def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil, + thinking: nil, tool_prefs: nil, &) + payload = build_payload(messages) + execute_binary(payload, @config) + end + + class << self + def configuration_options + %i[apple_intelligence_binary_path] + end + + def configuration_requirements + [] + end + + def local? + true + end + + def assume_models_exist? + true + end + + def capabilities + AppleIntelligence::Capabilities + end + end + end + end +end diff --git a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb new file mode 100644 index 000000000..b3571a1d3 --- /dev/null +++ b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'open-uri' +require 'fileutils' + +module RubyLLM + module Providers + class AppleIntelligence + # Manages downloading, caching, and locating the osx-ai-inloop binary + module BinaryManager + BINARY_URL = 'https://github.com/inloopstudio-team/apple-intelligence-inloop/raw/refs/heads/main/bin/osx-ai-inloop-arm64' + DEFAULT_CACHE_DIR = File.join(Dir.home, '.ruby_llm', 'bin') + DEFAULT_BINARY_NAME = 'osx-ai-inloop' + + module_function + + def binary_path(config = nil) + custom = config&.apple_intelligence_binary_path + return custom if custom && File.executable?(custom) + + default_path = File.join(DEFAULT_CACHE_DIR, DEFAULT_BINARY_NAME) + ensure_binary!(default_path) unless File.executable?(default_path) + default_path + end + + def ensure_binary!(path) + check_platform! + download_binary!(path) + File.chmod(0o755, path) + end + + def check_platform! + unless RUBY_PLATFORM =~ /darwin/ + raise RubyLLM::Error, 'Apple Intelligence provider requires macOS' + end + + unless RUBY_PLATFORM =~ /arm64/ + RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \ + 'It may not work on this architecture.') + end + end + + def download_binary!(path) + FileUtils.mkdir_p(File.dirname(path)) + RubyLLM.logger.info("Downloading osx-ai-inloop binary to #{path}...") + + URI.open(BINARY_URL, 'rb') do |remote| # rubocop:disable Security/Open + File.open(path, 'wb') do |local| + local.write(remote.read) + end + end + + RubyLLM.logger.info('Binary downloaded successfully.') + rescue OpenURI::HTTPError, SocketError, Errno::ECONNREFUSED => e + raise RubyLLM::Error, "Failed to download Apple Intelligence binary: #{e.message}" + end + end + end + end +end diff --git a/lib/ruby_llm/providers/apple_intelligence/capabilities.rb b/lib/ruby_llm/providers/apple_intelligence/capabilities.rb new file mode 100644 index 000000000..00aae98ac --- /dev/null +++ b/lib/ruby_llm/providers/apple_intelligence/capabilities.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class AppleIntelligence + # Capability declarations for Apple Intelligence on-device models + module Capabilities + module_function + + def supports_tool_choice?(_model_id) + false + end + + def supports_tool_parallel_control?(_model_id) + false + end + end + end + end +end diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb new file mode 100644 index 000000000..863d0243d --- /dev/null +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +require 'open3' +require 'json' + +module RubyLLM + module Providers + class AppleIntelligence + # Chat completion via the osx-ai-inloop binary pipe + module Chat + EXIT_CODE_ERRORS = { + 1 => 'Invalid arguments', + 2 => 'Unsupported environment', + 3 => 'Unavailable model', + 4 => 'Generation failure', + 5 => 'Internal error' + }.freeze + + private + + def build_payload(messages) + system_prompt = nil + conversation = [] + latest_user_message = nil + + messages.each do |msg| + case msg.role + when :system + system_prompt = extract_text(msg.content) + when :user, :assistant, :tool + conversation << msg + end + end + + latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user + + input_parts = conversation.map do |msg| + "#{msg.role}: #{extract_text(msg.content)}" + end + + payload = { + prompt: latest_user_message || '', + model: 'on-device', + format: 'json', + stream: false + } + payload[:system] = system_prompt if system_prompt + payload[:input] = input_parts.join("\n") unless input_parts.empty? + payload + end + + def extract_text(content) + case content + when String then content + when Content then content.text || content.to_s + else content.to_s + end + end + + def execute_binary(payload, config) + bin = BinaryManager.binary_path(config) + json_input = JSON.generate(payload) + + stdout, stderr, status = Open3.capture3(bin, stdin_data: json_input) + + handle_exit_code(status, stdout, stderr) + parse_binary_response(stdout) + end + + def handle_exit_code(status, stdout, stderr) + return if status.success? + + code = status.exitstatus + error_msg = EXIT_CODE_ERRORS[code] || "Unknown error (exit code #{code})" + + begin + body = JSON.parse(stdout) + if body['error'] + error_msg = "#{body['error']['code']}: #{body['error']['message']}" + end + rescue JSON::ParserError + error_msg = "#{error_msg} — #{stderr}" unless stderr.empty? + end + + case code + when 1 then raise RubyLLM::BadRequestError, error_msg + when 2 then raise RubyLLM::Error, "Unsupported environment: #{error_msg}" + when 3 then raise RubyLLM::ModelNotFoundError, error_msg + when 4 then raise RubyLLM::ServerError, error_msg + when 5 then raise RubyLLM::ServerError, error_msg + else raise RubyLLM::Error, error_msg + end + end + + def parse_binary_response(stdout) + body = JSON.parse(stdout) + + unless body['ok'] + error = body['error'] || {} + raise RubyLLM::Error, "#{error['code']}: #{error['message']}" + end + + output_text = body['output'] || '' + estimated_tokens = estimate_tokens(output_text) + + Message.new( + role: :assistant, + content: output_text, + model_id: body['model'] || 'apple-intelligence', + input_tokens: 0, + output_tokens: estimated_tokens, + raw: body + ) + rescue JSON::ParserError => e + raise RubyLLM::Error, "Failed to parse binary response: #{e.message}" + end + + def estimate_tokens(text) + (text.length / 4.0).ceil + end + end + end + end +end diff --git a/lib/ruby_llm/providers/apple_intelligence/models.rb b/lib/ruby_llm/providers/apple_intelligence/models.rb new file mode 100644 index 000000000..60a0255d4 --- /dev/null +++ b/lib/ruby_llm/providers/apple_intelligence/models.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class AppleIntelligence + # Model definitions for Apple Intelligence on-device models + module Models + module_function + + def models_url + nil + end + + def parse_list_models_response(_response, slug, _capabilities) + [ + Model::Info.new( + id: 'apple-intelligence', + name: 'Apple Intelligence (on-device)', + provider: slug, + family: 'apple-intelligence', + created_at: nil, + modalities: { + input: %w[text], + output: %w[text] + }, + capabilities: [], + pricing: {}, + metadata: { + local: true, + description: 'Apple Foundation Model running on-device via Apple Intelligence' + } + ) + ] + end + end + end + end +end From abd788985600bf46281b8b33c5ecc21a6f5706a1 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 16:37:24 +0000 Subject: [PATCH 02/11] Add documentation for Apple Intelligence provider - New guide: docs/_getting_started/apple-intelligence.md Covers requirements, quick start, conversation history, configuration, how it works, limitations, and troubleshooting - README: Add Apple Intelligence to provider lists and code examples - Configuration docs: Add Apple Intelligence section and config reference - Overview docs: Add Apple Intelligence to provider detection examples --- README.md | 10 +- docs/_getting_started/apple-intelligence.md | 164 ++++++++++++++++++++ docs/_getting_started/configuration.md | 23 +++ docs/_getting_started/overview.md | 3 + 4 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 docs/_getting_started/apple-intelligence.md diff --git a/README.md b/README.md index 40bd89c95..d9a21ee43 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Battle tested at [ Apple Intelligence & Siri + +> Apple Intelligence is not available on Intel Macs or older macOS versions. RubyLLM will raise an error if the requirements aren't met. +{: .note } + +## Quick Start + +No configuration needed. Just use it: + +```ruby +chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence) +chat.ask "Explain Ruby's block syntax" +``` + +That's it. No API keys, no environment variables, no account setup. The `osx-ai-inloop` binary is automatically downloaded and cached on first use. + +## Conversation History + +Apple Intelligence supports multi-turn conversations, just like any other provider: + +```ruby +chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence) +chat.ask "What is a Ruby module?" +chat.ask "How is that different from a class?" +chat.ask "When should I use one over the other?" +``` + +Each follow-up includes the full conversation history, so the model maintains context across turns. + +## Configuration + +### Zero Config (Default) + +Apple Intelligence works out of the box with no configuration. RubyLLM automatically downloads the `osx-ai-inloop` binary to `~/.ruby_llm/bin/osx-ai-inloop` on first use. + +### Custom Binary Path + +If you prefer to manage the binary location yourself: + +```ruby +RubyLLM.configure do |config| + config.apple_intelligence_binary_path = "/opt/bin/osx-ai-inloop" +end +``` + +### Setting as Default Model + +To use Apple Intelligence as your default chat model: + +```ruby +RubyLLM.configure do |config| + config.default_model = "apple-intelligence" +end + +# Now RubyLLM.chat uses Apple Intelligence automatically +chat = RubyLLM.chat(provider: :apple_intelligence) +chat.ask "Hello!" +``` + +## How It Works + +1. RubyLLM formats your conversation as a JSON payload +2. The payload is piped to the `osx-ai-inloop` binary via stdin +3. The binary communicates with Apple's Foundation Models on-device +4. The response is read from stdout and parsed back into RubyLLM's standard format + +The binary is sourced from the [osx-ai-inloop](https://github.com/inloopstudio-team/apple-intelligence-inloop) project and cached at `~/.ruby_llm/bin/osx-ai-inloop`. + +## Limitations + +Apple Intelligence is text-only and runs entirely on-device. This means: + +* **No streaming** — responses are returned all at once +* **No vision** — image analysis is not supported +* **No tool calling** — function/tool use is not available +* **No embeddings** — use another provider for `RubyLLM.embed` +* **No image generation** — use another provider for `RubyLLM.paint` +* **macOS only** — requires Apple Silicon and macOS 26+ + +For capabilities that Apple Intelligence doesn't support, you can use another provider alongside it: + +```ruby +# Local AI for chat +local_chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence) +local_chat.ask "Summarize this concept" + +# Cloud provider for embeddings +RubyLLM.embed "Ruby is elegant and expressive" +``` + +## Troubleshooting + +### "Platform not supported" error + +Apple Intelligence requires macOS 26+ on Apple Silicon. Verify your setup: + +* Check macOS version: Apple menu > About This Mac +* Ensure Apple Intelligence is enabled: System Settings > Apple Intelligence & Siri + +### Binary download fails + +If the automatic download fails (network issues, firewall, etc.), download manually: + +```bash +wget -O ~/.ruby_llm/bin/osx-ai-inloop \ + https://github.com/inloopstudio-team/apple-intelligence-inloop/raw/refs/heads/main/bin/osx-ai-inloop-arm64 +chmod +x ~/.ruby_llm/bin/osx-ai-inloop +``` + +### Binary not found at custom path + +If you configured a custom binary path, ensure the file exists and is executable: + +```bash +ls -la /your/custom/path/osx-ai-inloop +chmod +x /your/custom/path/osx-ai-inloop +``` + +## Next Steps + +Now that you have local AI running, explore other RubyLLM features: + +- [Chat with AI models]({% link _core_features/chat.md %}) for more conversation features +- [Configuration]({% link _getting_started/configuration.md %}) for multi-provider setups +- [Tools and function calling]({% link _core_features/tools.md %}) with cloud providers diff --git a/docs/_getting_started/configuration.md b/docs/_getting_started/configuration.md index b5686cb2e..978635f01 100644 --- a/docs/_getting_started/configuration.md +++ b/docs/_getting_started/configuration.md @@ -107,6 +107,26 @@ end > Attempting to use an unconfigured provider will raise `RubyLLM::ConfigurationError`. Only configure what you need. {: .note } +### Apple Intelligence (On-Device) + +Apple Intelligence requires no API keys — it runs entirely on your Mac. Just use it: + +```ruby +chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence) +chat.ask "Hello from on-device AI!" +``` + +The `osx-ai-inloop` binary is automatically downloaded on first use. To customize its location: + +```ruby +RubyLLM.configure do |config| + config.apple_intelligence_binary_path = "/opt/bin/osx-ai-inloop" +end +``` + +> Apple Intelligence requires macOS 26+ (Tahoe) on Apple Silicon with Apple Intelligence enabled. See the [Apple Intelligence guide]({% link _getting_started/apple-intelligence.md %}) for full details. +{: .note } + ### OpenAI Organization & Project Headers For OpenAI users with multiple organizations or projects: @@ -450,6 +470,9 @@ Here's a complete reference of all configuration options: ```ruby RubyLLM.configure do |config| + # Apple Intelligence (on-device, no API key needed) + config.apple_intelligence_binary_path = String # Optional: custom binary path + # Anthropic config.anthropic_api_key = String config.anthropic_api_base = String # v1.13.0+ diff --git a/docs/_getting_started/overview.md b/docs/_getting_started/overview.md index 2c7cbe747..69c4e036c 100644 --- a/docs/_getting_started/overview.md +++ b/docs/_getting_started/overview.md @@ -149,6 +149,9 @@ chat = RubyLLM.chat( model: "{{ site.models.local_llama }}", provider: :ollama, ) + +# On-device AI with Apple Intelligence — no API keys, no cloud +chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence) ``` ### Capability Management From 11eb685a05d24f6ee5f91010cc4a1f9eee8806d7 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 16:48:47 +0000 Subject: [PATCH 03/11] Add prompt-based tool calling support to Apple Intelligence provider Injects tool definitions into the system prompt so the on-device model can respond with structured tool call JSON. Parses the response to detect tool calls and returns proper Message with tool_calls hash, enabling RubyLLM's handle_tool_calls loop to execute tools and feed results back. - build_payload now accepts tools: and appends definitions to system prompt - extract_tool_calls parses model output for {"tool_call": {...}} pattern - format_conversation_message handles :tool role messages for result context --- lib/ruby_llm/providers/apple_intelligence.rb | 2 +- .../providers/apple_intelligence/chat.rb | 68 +++++++++++++++++-- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb index d91916de0..da56481b3 100644 --- a/lib/ruby_llm/providers/apple_intelligence.rb +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -19,7 +19,7 @@ def api_base def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil, &) - payload = build_payload(messages) + payload = build_payload(messages, tools: tools) execute_binary(payload, @config) end diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index 863d0243d..a0c179340 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -2,6 +2,7 @@ require 'open3' require 'json' +require 'securerandom' module RubyLLM module Providers @@ -18,7 +19,7 @@ module Chat private - def build_payload(messages) + def build_payload(messages, tools: nil) system_prompt = nil conversation = [] latest_user_message = nil @@ -32,10 +33,12 @@ def build_payload(messages) end end + system_prompt = append_tool_definitions(system_prompt, tools) if tools&.any? + latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user input_parts = conversation.map do |msg| - "#{msg.role}: #{extract_text(msg.content)}" + format_conversation_message(msg) end payload = { @@ -49,6 +52,44 @@ def build_payload(messages) payload end + def append_tool_definitions(system_prompt, tools) + tool_text = "You have access to the following tools:\n" + + tools.each_value do |tool| + tool_text += "\nTool: #{tool.name}\n" + tool_text += "Description: #{tool.description}\n" + + if tool.parameters.any? + tool_text += "Parameters:\n" + tool.parameters.each_value do |param| + required_label = param.required ? 'required' : 'optional' + tool_text += " - #{param.name} (#{param.type}, #{required_label})" + tool_text += ": #{param.description}" if param.description + tool_text += "\n" + end + end + end + + tool_text += <<~INSTRUCTIONS + + When you need to use a tool, respond with ONLY this exact JSON format, nothing else: + {"tool_call": {"name": "tool_name", "arguments": {"param1": "value1"}}} + + If you don't need a tool, respond normally with plain text. + INSTRUCTIONS + + [system_prompt, tool_text].compact.join("\n\n") + end + + def format_conversation_message(msg) + if msg.role == :tool + tool_name = msg.tool_call_id || 'unknown' + "tool_result (#{tool_name}): #{extract_text(msg.content)}" + else + "#{msg.role}: #{extract_text(msg.content)}" + end + end + def extract_text(content) case content when String then content @@ -102,11 +143,15 @@ def parse_binary_response(stdout) output_text = body['output'] || '' estimated_tokens = estimate_tokens(output_text) + model_id = body['model'] || 'apple-intelligence' + + tool_calls = extract_tool_calls(output_text) Message.new( role: :assistant, - content: output_text, - model_id: body['model'] || 'apple-intelligence', + content: tool_calls ? '' : output_text, + tool_calls: tool_calls, + model_id: model_id, input_tokens: 0, output_tokens: estimated_tokens, raw: body @@ -115,6 +160,21 @@ def parse_binary_response(stdout) raise RubyLLM::Error, "Failed to parse binary response: #{e.message}" end + def extract_tool_calls(text) + parsed = JSON.parse(text.strip) + return nil unless parsed.is_a?(Hash) && parsed['tool_call'] + + tc = parsed['tool_call'] + return nil unless tc['name'] + + call_id = "call_#{SecureRandom.hex(8)}" + arguments = (tc['arguments'] || {}).transform_keys(&:to_sym) + + { call_id => ToolCall.new(id: call_id, name: tc['name'], arguments: arguments) } + rescue JSON::ParserError + nil + end + def estimate_tokens(text) (text.length / 4.0).ceil end From 8517073eb236f4f7a58b1e02c7664531124c0244 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 16:54:29 +0000 Subject: [PATCH 04/11] Fix tool calling: use text format and simplify tool prompt The on-device model was failing with GENERATION_FAILURE because: 1. format was set to 'json' which conflicts with tool call instructions 2. The tool instruction heredoc had extra whitespace/verbosity Now uses format: 'text' when tools are present, and a compact single-line tool call instruction. --- lib/ruby_llm/providers/apple_intelligence/chat.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index a0c179340..14f57eee8 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -44,7 +44,7 @@ def build_payload(messages, tools: nil) payload = { prompt: latest_user_message || '', model: 'on-device', - format: 'json', + format: tools&.any? ? 'text' : 'json', stream: false } payload[:system] = system_prompt if system_prompt @@ -70,13 +70,9 @@ def append_tool_definitions(system_prompt, tools) end end - tool_text += <<~INSTRUCTIONS - - When you need to use a tool, respond with ONLY this exact JSON format, nothing else: - {"tool_call": {"name": "tool_name", "arguments": {"param1": "value1"}}} - - If you don't need a tool, respond normally with plain text. - INSTRUCTIONS + tool_text += "\nTo use a tool, reply ONLY with JSON: " + tool_text += '{"tool_call":{"name":"TOOL_NAME","arguments":{"param":"value"}}}' + tool_text += "\nOtherwise reply with plain text." [system_prompt, tool_text].compact.join("\n\n") end From 6584ada4c9afce7d3cfe62cff0b31860cdd4be4a Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 16:56:28 +0000 Subject: [PATCH 05/11] Fix tool calling: move tool instructions to prompt, keep json format The on-device model was failing with GenerationError -1 when tool definitions were injected into the system prompt. The Foundation Models system instruction field has stricter constraints. Now tool instructions are prepended to the user prompt instead, keeping them compact (one line per tool). The format stays as 'json' so the binary returns parseable JSON output. --- .../providers/apple_intelligence/chat.rb | 38 +++++++------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index 14f57eee8..011cc513f 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -33,18 +33,22 @@ def build_payload(messages, tools: nil) end end - system_prompt = append_tool_definitions(system_prompt, tools) if tools&.any? - latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user input_parts = conversation.map do |msg| format_conversation_message(msg) end + # When tools are present, prepend tool instructions to the user prompt + if tools&.any? + tool_prefix = build_tool_prefix(tools) + latest_user_message = "#{tool_prefix}\n\nUser question: #{latest_user_message}" + end + payload = { prompt: latest_user_message || '', model: 'on-device', - format: tools&.any? ? 'text' : 'json', + format: 'json', stream: false } payload[:system] = system_prompt if system_prompt @@ -52,29 +56,15 @@ def build_payload(messages, tools: nil) payload end - def append_tool_definitions(system_prompt, tools) - tool_text = "You have access to the following tools:\n" - - tools.each_value do |tool| - tool_text += "\nTool: #{tool.name}\n" - tool_text += "Description: #{tool.description}\n" - - if tool.parameters.any? - tool_text += "Parameters:\n" - tool.parameters.each_value do |param| - required_label = param.required ? 'required' : 'optional' - tool_text += " - #{param.name} (#{param.type}, #{required_label})" - tool_text += ": #{param.description}" if param.description - tool_text += "\n" - end - end + def build_tool_prefix(tools) + parts = tools.map do |_key, tool| + params = tool.parameters.map { |_n, p| p.name.to_s }.join(', ') + "#{tool.name}(#{params}): #{tool.description}" end - tool_text += "\nTo use a tool, reply ONLY with JSON: " - tool_text += '{"tool_call":{"name":"TOOL_NAME","arguments":{"param":"value"}}}' - tool_text += "\nOtherwise reply with plain text." - - [system_prompt, tool_text].compact.join("\n\n") + "Available tools: #{parts.join('; ')}. " \ + 'To call a tool, respond with ONLY: {"tool_call":{"name":"NAME","arguments":{"key":"val"}}} ' \ + 'Otherwise respond normally.' end def format_conversation_message(msg) From e06cdddd9df21d034e30eafcee4c62adeb75cea3 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:05:01 +0000 Subject: [PATCH 06/11] Rework tool calling: two-pass approach with argument extraction The on-device model is too small to reliably follow complex tool-call formatting instructions. Instead of asking the model to decide and format tool calls, we use a two-pass approach: Pass 1: Ask the model to extract parameter values from the user's message as simple JSON (e.g. extract 'city' from 'weather in Tokyo') Pass 2: Construct the ToolCall programmatically from the extracted args This plays to the model's strength (structured extraction) rather than its weakness (following complex formatting instructions). Also handles zero-parameter tools (like CurrentTime) by calling them immediately without a model pass. --- lib/ruby_llm/providers/apple_intelligence.rb | 22 ++++++- .../providers/apple_intelligence/chat.rb | 65 +++++++++++++------ 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb index da56481b3..e4001947e 100644 --- a/lib/ruby_llm/providers/apple_intelligence.rb +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -19,7 +19,27 @@ def api_base def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil, &) - payload = build_payload(messages, tools: tools) + # Two-pass tool calling: if tools are registered, first ask the model + # to extract arguments, then construct the tool call programmatically. + if tools&.any? + last_user = messages.select { |m| m.role == :user }.last + if last_user + user_text = last_user.content.is_a?(String) ? last_user.content : last_user.content.to_s + tool_result = resolve_tool_call(tools, user_text, @config) + if tool_result + return Message.new( + role: :assistant, + content: '', + tool_calls: tool_result, + model_id: 'apple-intelligence', + input_tokens: 0, + output_tokens: 0 + ) + end + end + end + + payload = build_payload(messages) execute_binary(payload, @config) end diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index 011cc513f..21e333619 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -19,7 +19,7 @@ module Chat private - def build_payload(messages, tools: nil) + def build_payload(messages) system_prompt = nil conversation = [] latest_user_message = nil @@ -39,12 +39,6 @@ def build_payload(messages, tools: nil) format_conversation_message(msg) end - # When tools are present, prepend tool instructions to the user prompt - if tools&.any? - tool_prefix = build_tool_prefix(tools) - latest_user_message = "#{tool_prefix}\n\nUser question: #{latest_user_message}" - end - payload = { prompt: latest_user_message || '', model: 'on-device', @@ -56,17 +50,6 @@ def build_payload(messages, tools: nil) payload end - def build_tool_prefix(tools) - parts = tools.map do |_key, tool| - params = tool.parameters.map { |_n, p| p.name.to_s }.join(', ') - "#{tool.name}(#{params}): #{tool.description}" - end - - "Available tools: #{parts.join('; ')}. " \ - 'To call a tool, respond with ONLY: {"tool_call":{"name":"NAME","arguments":{"key":"val"}}} ' \ - 'Otherwise respond normally.' - end - def format_conversation_message(msg) if msg.role == :tool tool_name = msg.tool_call_id || 'unknown' @@ -84,7 +67,7 @@ def extract_text(content) end end - def execute_binary(payload, config) + def execute_binary(payload, config, tools: nil) bin = BinaryManager.binary_path(config) json_input = JSON.generate(payload) @@ -94,6 +77,50 @@ def execute_binary(payload, config) parse_binary_response(stdout) end + # Two-pass tool calling: first ask the model to extract arguments, + # then construct the tool call programmatically. + def resolve_tool_call(tools, user_message, config) + return nil unless tools&.any? + + tool_name, tool = tools.first # single-tool shortcut for now + + # Zero-parameter tools: call immediately + if tool.parameters.empty? + call_id = "call_#{SecureRandom.hex(8)}" + return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) } + end + + param_names = tool.parameters.map { |_n, p| p.name.to_s } + extraction_prompt = "Extract these values from the text and return JSON with keys: #{param_names.join(', ')}.\nText: #{user_message}" + + payload = { + prompt: extraction_prompt, + model: 'on-device', + format: 'json', + stream: false + } + + bin = BinaryManager.binary_path(config) + stdout, stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload)) + return nil unless status.success? + + body = JSON.parse(stdout) + return nil unless body['ok'] + + output = body['output']&.strip + return nil if output.nil? || output.empty? + + args = JSON.parse(output) + return nil unless args.is_a?(Hash) && args.any? + + call_id = "call_#{SecureRandom.hex(8)}" + arguments = args.transform_keys(&:to_sym) + + { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) } + rescue JSON::ParserError, StandardError + nil + end + def handle_exit_code(status, stdout, stderr) return if status.success? From 47aad6d6c7f31a3b7143e426cdcb4b4f7b05e89b Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:11:42 +0000 Subject: [PATCH 07/11] Simplify tool arg extraction: one simple prompt per parameter The combined extraction prompt was hitting GenerationError -1 or returning conversational text instead of JSON. Now asks the model one ultra-simple question per parameter: 'What city is mentioned in this text? Reply with just the value.' This is the simplest possible prompt the on-device model can handle. Parses both JSON and plain text responses from the model. --- .../providers/apple_intelligence/chat.rb | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index 21e333619..cd17fa303 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -90,34 +90,39 @@ def resolve_tool_call(tools, user_message, config) return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) } end - param_names = tool.parameters.map { |_n, p| p.name.to_s } - extraction_prompt = "Extract these values from the text and return JSON with keys: #{param_names.join(', ')}.\nText: #{user_message}" + # Build a minimal extraction prompt per parameter + arguments = {} + bin = BinaryManager.binary_path(config) - payload = { - prompt: extraction_prompt, - model: 'on-device', - format: 'json', - stream: false - } + tool.parameters.each_value do |param| + prompt = "What #{param.name} is mentioned in this text? Reply with just the value, nothing else.\n\n#{user_message}" + payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false } - bin = BinaryManager.binary_path(config) - stdout, stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload)) - return nil unless status.success? + stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload)) + next unless status.success? - body = JSON.parse(stdout) - return nil unless body['ok'] + body = JSON.parse(stdout) rescue next + next unless body['ok'] + + raw_output = (body['output'] || '').strip + # The model might wrap the answer in JSON or return plain text + value = begin + parsed = JSON.parse(raw_output) + # If it returned {"city": "Tokyo"} or {"value": "Tokyo"} + parsed.is_a?(Hash) ? (parsed[param.name.to_s] || parsed.values.first) : parsed.to_s + rescue JSON::ParserError + raw_output.gsub(/\A["']|["']\z/, '') # strip quotes if plain text + end - output = body['output']&.strip - return nil if output.nil? || output.empty? + arguments[param.name.to_sym] = value if value && !value.empty? + end - args = JSON.parse(output) - return nil unless args.is_a?(Hash) && args.any? + return nil if arguments.empty? call_id = "call_#{SecureRandom.hex(8)}" - arguments = args.transform_keys(&:to_sym) - { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) } - rescue JSON::ParserError, StandardError + rescue StandardError => e + RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" } nil end From d5b915d265132cef1af306def6b8a76a10d876e9 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:14:35 +0000 Subject: [PATCH 08/11] Fix tool calling: extract text from Content object correctly The user message content is a RubyLLM::Content object, not a plain String. Calling .to_s on it produced '#' instead of the actual text, so the extraction prompt sent garbage to the model. Now properly extracts .text from Content objects. --- lib/ruby_llm/providers/apple_intelligence.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb index e4001947e..ef5c7f79f 100644 --- a/lib/ruby_llm/providers/apple_intelligence.rb +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -24,7 +24,11 @@ def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, hea if tools&.any? last_user = messages.select { |m| m.role == :user }.last if last_user - user_text = last_user.content.is_a?(String) ? last_user.content : last_user.content.to_s + user_text = case last_user.content + when String then last_user.content + when Content then last_user.content.text || '' + else last_user.content.to_s + end tool_result = resolve_tool_call(tools, user_text, @config) if tool_result return Message.new( From cbeb853f2af4da96034099e0e2e61889d1c7a2a6 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:17:09 +0000 Subject: [PATCH 09/11] Fix infinite tool call loop: skip extraction when tool results exist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a tool executes, RubyLLM calls complete() again with the tool result in the messages array. The provider was re-extracting args from the original user message and calling the tool again, looping forever. Now checks for :tool role messages — if any exist, we're on the follow-up pass and skip straight to generating a natural language response that incorporates the tool result. --- lib/ruby_llm/providers/apple_intelligence.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb index ef5c7f79f..ab4558e72 100644 --- a/lib/ruby_llm/providers/apple_intelligence.rb +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -19,9 +19,10 @@ def api_base def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil, &) - # Two-pass tool calling: if tools are registered, first ask the model - # to extract arguments, then construct the tool call programmatically. - if tools&.any? + # Two-pass tool calling: if tools are registered and we haven't already + # executed a tool (no :tool messages yet), extract arguments and call. + has_tool_results = messages.any? { |m| m.role == :tool } + if tools&.any? && !has_tool_results last_user = messages.select { |m| m.role == :user }.last if last_user user_text = case last_user.content From b59c613072060ad2cdce6bba32255ce9890e2c2f Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:21:14 +0000 Subject: [PATCH 10/11] Fix empty prompt on tool follow-up pass After tool execution, the messages end with a :tool role message (the result), not a :user message. build_payload couldn't find a user message to use as the prompt, producing an empty string which the binary rejected. Now detects this case and synthesizes a prompt: 'Answer this question: \nUse this data: ' so the model generates a natural language response incorporating the tool output. --- lib/ruby_llm/providers/apple_intelligence/chat.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index cd17fa303..8e421c93b 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -35,12 +35,23 @@ def build_payload(messages) latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user + # After tool execution, the last message is :tool (the result). + # Build a prompt that asks the model to answer using the tool result. + if latest_user_message.nil? || latest_user_message.empty? + tool_results = conversation.select { |m| m.role == :tool }.map { |m| extract_text(m.content) } + user_msg = conversation.select { |m| m.role == :user }.last + original_question = user_msg ? extract_text(user_msg.content) : 'the user question' + + latest_user_message = "Answer this question: #{original_question}\n\nUse this data: #{tool_results.join('; ')}" + conversation = [] # already incorporated into the prompt + end + input_parts = conversation.map do |msg| format_conversation_message(msg) end payload = { - prompt: latest_user_message || '', + prompt: latest_user_message, model: 'on-device', format: 'json', stream: false From 626939bfc82113979486a99c0723bf6803d263e4 Mon Sep 17 00:00:00 2001 From: Abhishek Parolkar Date: Wed, 1 Apr 2026 17:49:43 +0000 Subject: [PATCH 11/11] Fix all RuboCop offenses in Apple Intelligence provider - Add super call in initialize (Lint/MissingSuper) - Disable ParameterLists/PerceivedComplexity where needed (matches base Provider pattern) - Suppress unused args with _ assignment (local provider ignores HTTP params) - Use reverse.find instead of select.last (Performance/Detect) - Use String#include? instead of regex for platform checks (Performance/StringInclude) - Use File.binwrite instead of File.open+write (Style/FileWrite) - Use modifier if for single-line body (Style/IfUnlessModifier) - Replace rescue modifier with begin/rescue (Style/RescueModifier) - Merge duplicate when branches for exit codes 4, 5 (Lint/DuplicateBranch) - Split long lines under 120 chars (Layout/LineLength) - Extract methods to reduce perceived complexity --- lib/ruby_llm/providers/apple_intelligence.rb | 51 +++++---- .../apple_intelligence/binary_manager.rb | 16 +-- .../providers/apple_intelligence/chat.rb | 104 +++++++++--------- 3 files changed, 91 insertions(+), 80 deletions(-) diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb index ab4558e72..8f0f9e2d6 100644 --- a/lib/ruby_llm/providers/apple_intelligence.rb +++ b/lib/ruby_llm/providers/apple_intelligence.rb @@ -9,6 +9,7 @@ class AppleIntelligence < Provider include AppleIntelligence::Models def initialize(config) + super @config = config @connection = nil end @@ -17,36 +18,23 @@ def api_base nil end + # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil, &) + _ = [temperature, model, params, headers, schema, thinking, tool_prefs] # not used for local provider + # Two-pass tool calling: if tools are registered and we haven't already # executed a tool (no :tool messages yet), extract arguments and call. - has_tool_results = messages.any? { |m| m.role == :tool } - if tools&.any? && !has_tool_results - last_user = messages.select { |m| m.role == :user }.last - if last_user - user_text = case last_user.content - when String then last_user.content - when Content then last_user.content.text || '' - else last_user.content.to_s - end - tool_result = resolve_tool_call(tools, user_text, @config) - if tool_result - return Message.new( - role: :assistant, - content: '', - tool_calls: tool_result, - model_id: 'apple-intelligence', - input_tokens: 0, - output_tokens: 0 - ) - end - end + if tools&.any? && messages.none? { |m| m.role == :tool } + last_user = messages.reverse.find { |m| m.role == :user } + tool_msg = try_tool_call(tools, last_user, @config) if last_user + return tool_msg if tool_msg end payload = build_payload(messages) execute_binary(payload, @config) end + # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity class << self def configuration_options @@ -69,6 +57,27 @@ def capabilities AppleIntelligence::Capabilities end end + + private + + def try_tool_call(tools, last_user, config) + user_text = case last_user.content + when String then last_user.content + when Content then last_user.content.text || '' + else last_user.content.to_s + end + tool_result = resolve_tool_call(tools, user_text, config) + return unless tool_result + + Message.new( + role: :assistant, + content: '', + tool_calls: tool_result, + model_id: 'apple-intelligence', + input_tokens: 0, + output_tokens: 0 + ) + end end end end diff --git a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb index b3571a1d3..94a2fbca0 100644 --- a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb +++ b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb @@ -30,14 +30,12 @@ def ensure_binary!(path) end def check_platform! - unless RUBY_PLATFORM =~ /darwin/ - raise RubyLLM::Error, 'Apple Intelligence provider requires macOS' - end + raise RubyLLM::Error, 'Apple Intelligence provider requires macOS' unless RUBY_PLATFORM.include?('darwin') - unless RUBY_PLATFORM =~ /arm64/ - RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \ - 'It may not work on this architecture.') - end + return if RUBY_PLATFORM.include?('arm64') + + RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \ + 'It may not work on this architecture.') end def download_binary!(path) @@ -45,9 +43,7 @@ def download_binary!(path) RubyLLM.logger.info("Downloading osx-ai-inloop binary to #{path}...") URI.open(BINARY_URL, 'rb') do |remote| # rubocop:disable Security/Open - File.open(path, 'wb') do |local| - local.write(remote.read) - end + File.binwrite(path, remote.read) end RubyLLM.logger.info('Binary downloaded successfully.') diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb index 8e421c93b..f0d5f1e3a 100644 --- a/lib/ruby_llm/providers/apple_intelligence/chat.rb +++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb @@ -19,10 +19,9 @@ module Chat private - def build_payload(messages) + def build_payload(messages) # rubocop:disable Metrics/PerceivedComplexity system_prompt = nil conversation = [] - latest_user_message = nil messages.each do |msg| case msg.role @@ -35,20 +34,19 @@ def build_payload(messages) latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user - # After tool execution, the last message is :tool (the result). - # Build a prompt that asks the model to answer using the tool result. + # After tool execution the last message is :tool (the result). + # Synthesize a prompt so the model can answer using the tool output. if latest_user_message.nil? || latest_user_message.empty? tool_results = conversation.select { |m| m.role == :tool }.map { |m| extract_text(m.content) } - user_msg = conversation.select { |m| m.role == :user }.last + user_msg = conversation.reverse.find { |m| m.role == :user } original_question = user_msg ? extract_text(user_msg.content) : 'the user question' - latest_user_message = "Answer this question: #{original_question}\n\nUse this data: #{tool_results.join('; ')}" - conversation = [] # already incorporated into the prompt + latest_user_message = "Answer this question: #{original_question}\n\n" \ + "Use this data: #{tool_results.join('; ')}" + conversation = [] end - input_parts = conversation.map do |msg| - format_conversation_message(msg) - end + input_parts = conversation.map { |msg| format_conversation_message(msg) } payload = { prompt: latest_user_message, @@ -78,7 +76,7 @@ def extract_text(content) end end - def execute_binary(payload, config, tools: nil) + def execute_binary(payload, config) bin = BinaryManager.binary_path(config) json_input = JSON.generate(payload) @@ -88,12 +86,10 @@ def execute_binary(payload, config, tools: nil) parse_binary_response(stdout) end - # Two-pass tool calling: first ask the model to extract arguments, - # then construct the tool call programmatically. - def resolve_tool_call(tools, user_message, config) + def resolve_tool_call(tools, user_message, config) # rubocop:disable Metrics/PerceivedComplexity return nil unless tools&.any? - tool_name, tool = tools.first # single-tool shortcut for now + tool_name, tool = tools.first # Zero-parameter tools: call immediately if tool.parameters.empty? @@ -101,30 +97,18 @@ def resolve_tool_call(tools, user_message, config) return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) } end - # Build a minimal extraction prompt per parameter + extract_tool_arguments(tool_name, tool, user_message, config) + rescue StandardError => e + RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" } + nil + end + + def extract_tool_arguments(tool_name, tool, user_message, config) arguments = {} bin = BinaryManager.binary_path(config) tool.parameters.each_value do |param| - prompt = "What #{param.name} is mentioned in this text? Reply with just the value, nothing else.\n\n#{user_message}" - payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false } - - stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload)) - next unless status.success? - - body = JSON.parse(stdout) rescue next - next unless body['ok'] - - raw_output = (body['output'] || '').strip - # The model might wrap the answer in JSON or return plain text - value = begin - parsed = JSON.parse(raw_output) - # If it returned {"city": "Tokyo"} or {"value": "Tokyo"} - parsed.is_a?(Hash) ? (parsed[param.name.to_s] || parsed.values.first) : parsed.to_s - rescue JSON::ParserError - raw_output.gsub(/\A["']|["']\z/, '') # strip quotes if plain text - end - + value = extract_single_param(bin, param.name, user_message) arguments[param.name.to_sym] = value if value && !value.empty? end @@ -132,9 +116,33 @@ def resolve_tool_call(tools, user_message, config) call_id = "call_#{SecureRandom.hex(8)}" { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) } - rescue StandardError => e - RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" } - nil + end + + def extract_single_param(bin, param_name, user_message) + prompt = "What #{param_name} is mentioned in this text? " \ + "Reply with just the value, nothing else.\n\n#{user_message}" + payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false } + + stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload)) + return nil unless status.success? + + body = begin + JSON.parse(stdout) + rescue JSON::ParserError + return nil + end + return nil unless body['ok'] + + parse_extracted_value(body['output']&.strip, param_name) + end + + def parse_extracted_value(raw_output, param_name) + return nil if raw_output.nil? || raw_output.empty? + + parsed = JSON.parse(raw_output) + parsed.is_a?(Hash) ? (parsed[param_name.to_s] || parsed.values.first).to_s : parsed.to_s + rescue JSON::ParserError + raw_output.gsub(/\A["']|["']\z/, '') end def handle_exit_code(status, stdout, stderr) @@ -145,20 +153,21 @@ def handle_exit_code(status, stdout, stderr) begin body = JSON.parse(stdout) - if body['error'] - error_msg = "#{body['error']['code']}: #{body['error']['message']}" - end + error_msg = "#{body['error']['code']}: #{body['error']['message']}" if body['error'] rescue JSON::ParserError error_msg = "#{error_msg} — #{stderr}" unless stderr.empty? end + raise_for_exit_code(code, error_msg) + end + + def raise_for_exit_code(code, error_msg) case code when 1 then raise RubyLLM::BadRequestError, error_msg when 2 then raise RubyLLM::Error, "Unsupported environment: #{error_msg}" when 3 then raise RubyLLM::ModelNotFoundError, error_msg - when 4 then raise RubyLLM::ServerError, error_msg - when 5 then raise RubyLLM::ServerError, error_msg - else raise RubyLLM::Error, error_msg + when 4, 5 then raise RubyLLM::ServerError, error_msg + else raise RubyLLM::Error, error_msg end end @@ -171,18 +180,15 @@ def parse_binary_response(stdout) end output_text = body['output'] || '' - estimated_tokens = estimate_tokens(output_text) - model_id = body['model'] || 'apple-intelligence' - tool_calls = extract_tool_calls(output_text) Message.new( role: :assistant, content: tool_calls ? '' : output_text, tool_calls: tool_calls, - model_id: model_id, + model_id: body['model'] || 'apple-intelligence', input_tokens: 0, - output_tokens: estimated_tokens, + output_tokens: estimate_tokens(output_text), raw: body ) rescue JSON::ParserError => e