From 7f8e3ca50d39c028045af74ba2d06a30de115689 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 16:11:24 +0000
Subject: [PATCH 01/11] Add Apple Intelligence provider for on-device inference
 via osx-ai-inloop

Introduces a new RubyLLM provider that pipes requests through the
osx-ai-inloop binary via stdin/stdout, completely bypassing HTTP/Faraday.

- Auto-downloads pre-built arm64 binary on first use (~/.ruby_llm/bin/)
- Validates macOS + Apple Silicon platform at init time
- Maps binary exit codes (1-5) to RubyLLM error classes
- Supports conversation history via the binary's input field
- Configurable binary path via apple_intelligence_binary_path
- Registered as :apple_intelligence with local? => true

Usage:
  chat = RubyLLM.chat(model: 'apple-intelligence', provider: :apple_intelligence)
  response = chat.ask('What is Ruby?')
  puts response.content
---
 lib/ruby_llm.rb                               |   2 +
 lib/ruby_llm/providers/apple_intelligence.rb  |  49 +++++++
 .../apple_intelligence/binary_manager.rb      |  60 +++++++++
 .../apple_intelligence/capabilities.rb        |  20 +++
 .../providers/apple_intelligence/chat.rb      | 124 ++++++++++++++++++
 .../providers/apple_intelligence/models.rb    |  38 ++++++
 6 files changed, 293 insertions(+)
 create mode 100644 lib/ruby_llm/providers/apple_intelligence.rb
 create mode 100644 lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
 create mode 100644 lib/ruby_llm/providers/apple_intelligence/capabilities.rb
 create mode 100644 lib/ruby_llm/providers/apple_intelligence/chat.rb
 create mode 100644 lib/ruby_llm/providers/apple_intelligence/models.rb

diff --git a/lib/ruby_llm.rb b/lib/ruby_llm.rb
index 87bc94c9d..1332b286a 100644
--- a/lib/ruby_llm.rb
+++ b/lib/ruby_llm.rb
@@ -15,6 +15,7 @@
 
 loader = Zeitwerk::Loader.for_gem
 loader.inflector.inflect(
+  'apple_intelligence' => 'AppleIntelligence',
   'azure' => 'Azure',
   'UI' => 'UI',
   'api' => 'API',
@@ -93,6 +94,7 @@ def logger
   end
 end
 
+RubyLLM::Provider.register :apple_intelligence, RubyLLM::Providers::AppleIntelligence
 RubyLLM::Provider.register :anthropic, RubyLLM::Providers::Anthropic
 RubyLLM::Provider.register :azure, RubyLLM::Providers::Azure
 RubyLLM::Provider.register :bedrock, RubyLLM::Providers::Bedrock
diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
new file mode 100644
index 000000000..d91916de0
--- /dev/null
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    # Apple Intelligence provider — pipes requests through the osx-ai-inloop
+    # binary via stdin/stdout, completely bypassing HTTP/Faraday.
+    class AppleIntelligence < Provider
+      include AppleIntelligence::Chat
+      include AppleIntelligence::Models
+
+      def initialize(config)
+        @config = config
+        @connection = nil
+      end
+
+      def api_base
+        nil
+      end
+
+      def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil,
+                   thinking: nil, tool_prefs: nil, &)
+        payload = build_payload(messages)
+        execute_binary(payload, @config)
+      end
+
+      class << self
+        def configuration_options
+          %i[apple_intelligence_binary_path]
+        end
+
+        def configuration_requirements
+          []
+        end
+
+        def local?
+          true
+        end
+
+        def assume_models_exist?
+          true
+        end
+
+        def capabilities
+          AppleIntelligence::Capabilities
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
new file mode 100644
index 000000000..b3571a1d3
--- /dev/null
+++ b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+require 'open-uri'
+require 'fileutils'
+
+module RubyLLM
+  module Providers
+    class AppleIntelligence
+      # Manages downloading, caching, and locating the osx-ai-inloop binary
+      module BinaryManager
+        BINARY_URL = 'https://github.com/inloopstudio-team/apple-intelligence-inloop/raw/refs/heads/main/bin/osx-ai-inloop-arm64'
+        DEFAULT_CACHE_DIR = File.join(Dir.home, '.ruby_llm', 'bin')
+        DEFAULT_BINARY_NAME = 'osx-ai-inloop'
+
+        module_function
+
+        def binary_path(config = nil)
+          custom = config&.apple_intelligence_binary_path
+          return custom if custom && File.executable?(custom)
+
+          default_path = File.join(DEFAULT_CACHE_DIR, DEFAULT_BINARY_NAME)
+          ensure_binary!(default_path) unless File.executable?(default_path)
+          default_path
+        end
+
+        def ensure_binary!(path)
+          check_platform!
+          download_binary!(path)
+          File.chmod(0o755, path)
+        end
+
+        def check_platform!
+          unless RUBY_PLATFORM =~ /darwin/
+            raise RubyLLM::Error, 'Apple Intelligence provider requires macOS'
+          end
+
+          unless RUBY_PLATFORM =~ /arm64/
+            RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \
+                                'It may not work on this architecture.')
+          end
+        end
+
+        def download_binary!(path)
+          FileUtils.mkdir_p(File.dirname(path))
+          RubyLLM.logger.info("Downloading osx-ai-inloop binary to #{path}...")
+
+          URI.open(BINARY_URL, 'rb') do |remote| # rubocop:disable Security/Open
+            File.open(path, 'wb') do |local|
+              local.write(remote.read)
+            end
+          end
+
+          RubyLLM.logger.info('Binary downloaded successfully.')
+        rescue OpenURI::HTTPError, SocketError, Errno::ECONNREFUSED => e
+          raise RubyLLM::Error, "Failed to download Apple Intelligence binary: #{e.message}"
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/apple_intelligence/capabilities.rb b/lib/ruby_llm/providers/apple_intelligence/capabilities.rb
new file mode 100644
index 000000000..00aae98ac
--- /dev/null
+++ b/lib/ruby_llm/providers/apple_intelligence/capabilities.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class AppleIntelligence
+      # Capability declarations for Apple Intelligence on-device models
+      module Capabilities
+        module_function
+
+        def supports_tool_choice?(_model_id)
+          false
+        end
+
+        def supports_tool_parallel_control?(_model_id)
+          false
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
new file mode 100644
index 000000000..863d0243d
--- /dev/null
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+
+require 'open3'
+require 'json'
+
+module RubyLLM
+  module Providers
+    class AppleIntelligence
+      # Chat completion via the osx-ai-inloop binary pipe
+      module Chat
+        EXIT_CODE_ERRORS = {
+          1 => 'Invalid arguments',
+          2 => 'Unsupported environment',
+          3 => 'Unavailable model',
+          4 => 'Generation failure',
+          5 => 'Internal error'
+        }.freeze
+
+        private
+
+        def build_payload(messages)
+          system_prompt = nil
+          conversation = []
+          latest_user_message = nil
+
+          messages.each do |msg|
+            case msg.role
+            when :system
+              system_prompt = extract_text(msg.content)
+            when :user, :assistant, :tool
+              conversation << msg
+            end
+          end
+
+          latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user
+
+          input_parts = conversation.map do |msg|
+            "#{msg.role}: #{extract_text(msg.content)}"
+          end
+
+          payload = {
+            prompt: latest_user_message || '',
+            model: 'on-device',
+            format: 'json',
+            stream: false
+          }
+          payload[:system] = system_prompt if system_prompt
+          payload[:input] = input_parts.join("\n") unless input_parts.empty?
+          payload
+        end
+
+        def extract_text(content)
+          case content
+          when String then content
+          when Content then content.text || content.to_s
+          else content.to_s
+          end
+        end
+
+        def execute_binary(payload, config)
+          bin = BinaryManager.binary_path(config)
+          json_input = JSON.generate(payload)
+
+          stdout, stderr, status = Open3.capture3(bin, stdin_data: json_input)
+
+          handle_exit_code(status, stdout, stderr)
+          parse_binary_response(stdout)
+        end
+
+        def handle_exit_code(status, stdout, stderr)
+          return if status.success?
+
+          code = status.exitstatus
+          error_msg = EXIT_CODE_ERRORS[code] || "Unknown error (exit code #{code})"
+
+          begin
+            body = JSON.parse(stdout)
+            if body['error']
+              error_msg = "#{body['error']['code']}: #{body['error']['message']}"
+            end
+          rescue JSON::ParserError
+            error_msg = "#{error_msg} — #{stderr}" unless stderr.empty?
+          end
+
+          case code
+          when 1 then raise RubyLLM::BadRequestError, error_msg
+          when 2 then raise RubyLLM::Error, "Unsupported environment: #{error_msg}"
+          when 3 then raise RubyLLM::ModelNotFoundError, error_msg
+          when 4 then raise RubyLLM::ServerError, error_msg
+          when 5 then raise RubyLLM::ServerError, error_msg
+          else        raise RubyLLM::Error, error_msg
+          end
+        end
+
+        def parse_binary_response(stdout)
+          body = JSON.parse(stdout)
+
+          unless body['ok']
+            error = body['error'] || {}
+            raise RubyLLM::Error, "#{error['code']}: #{error['message']}"
+          end
+
+          output_text = body['output'] || ''
+          estimated_tokens = estimate_tokens(output_text)
+
+          Message.new(
+            role: :assistant,
+            content: output_text,
+            model_id: body['model'] || 'apple-intelligence',
+            input_tokens: 0,
+            output_tokens: estimated_tokens,
+            raw: body
+          )
+        rescue JSON::ParserError => e
+          raise RubyLLM::Error, "Failed to parse binary response: #{e.message}"
+        end
+
+        def estimate_tokens(text)
+          (text.length / 4.0).ceil
+        end
+      end
+    end
+  end
+end
diff --git a/lib/ruby_llm/providers/apple_intelligence/models.rb b/lib/ruby_llm/providers/apple_intelligence/models.rb
new file mode 100644
index 000000000..60a0255d4
--- /dev/null
+++ b/lib/ruby_llm/providers/apple_intelligence/models.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class AppleIntelligence
+      # Model definitions for Apple Intelligence on-device models
+      module Models
+        module_function
+
+        def models_url
+          nil
+        end
+
+        def parse_list_models_response(_response, slug, _capabilities)
+          [
+            Model::Info.new(
+              id: 'apple-intelligence',
+              name: 'Apple Intelligence (on-device)',
+              provider: slug,
+              family: 'apple-intelligence',
+              created_at: nil,
+              modalities: {
+                input: %w[text],
+                output: %w[text]
+              },
+              capabilities: [],
+              pricing: {},
+              metadata: {
+                local: true,
+                description: 'Apple Foundation Model running on-device via Apple Intelligence'
+              }
+            )
+          ]
+        end
+      end
+    end
+  end
+end

From abd788985600bf46281b8b33c5ecc21a6f5706a1 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 16:37:24 +0000
Subject: [PATCH 02/11] Add documentation for Apple Intelligence provider

- New guide: docs/_getting_started/apple-intelligence.md
  Covers requirements, quick start, conversation history, configuration,
  how it works, limitations, and troubleshooting
- README: Add Apple Intelligence to provider lists and code examples
- Configuration docs: Add Apple Intelligence section and config reference
- Overview docs: Add Apple Intelligence to provider detection examples
---
 README.md                                   |  10 +-
 docs/_getting_started/apple-intelligence.md | 164 ++++++++++++++++++++
 docs/_getting_started/configuration.md      |  23 +++
 docs/_getting_started/overview.md           |   3 +
 4 files changed, 198 insertions(+), 2 deletions(-)
 create mode 100644 docs/_getting_started/apple-intelligence.md

diff --git a/README.md b/README.md
index 40bd89c95..d9a21ee43 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ Battle tested at [<picture><source media="(prefers-color-scheme: dark)" srcset="
 
 ---
 
-Build chatbots, AI agents, RAG applications. Works with OpenAI, xAI, Anthropic, Google, AWS, local models, and any OpenAI-compatible API.
+Build chatbots, AI agents, RAG applications. Works with OpenAI, xAI, Anthropic, Google, AWS, Apple Intelligence, local models, and any OpenAI-compatible API.
 
 ## From zero to AI chat app in under two minutes
 
@@ -123,6 +123,12 @@ end
 response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "product.txt"
 ```
 
+```ruby
+# Totally local AI with Apple Intelligence — no API keys, no cloud
+chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
+chat.ask "Explain this code", with: "app.rb"
+```
+
 ## Features
 
 * **Chat:** Conversational AI with `RubyLLM.chat`
@@ -140,7 +146,7 @@ response = chat.with_schema(ProductSchema).ask "Analyze this product", with: "pr
 * **Async:** Fiber-based concurrency
 * **Model registry:** 800+ models with capability detection and pricing
 * **Extended thinking:** Control, view, and persist model deliberation
-* **Providers:** OpenAI, xAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, and any OpenAI-compatible API
+* **Providers:** OpenAI, xAI, Anthropic, Gemini, VertexAI, Bedrock, DeepSeek, Mistral, Ollama, OpenRouter, Perplexity, GPUStack, Apple Intelligence, and any OpenAI-compatible API
 
 ## Installation
 
diff --git a/docs/_getting_started/apple-intelligence.md b/docs/_getting_started/apple-intelligence.md
new file mode 100644
index 000000000..4eff0ec6b
--- /dev/null
+++ b/docs/_getting_started/apple-intelligence.md
@@ -0,0 +1,164 @@
+---
+layout: default
+title: Apple Intelligence
+nav_order: 4
+description: Run AI completely on-device with Apple Intelligence — no API keys, no cloud, fully private.
+---
+
+# {{ page.title }}
+{: .no_toc }
+
+{{ page.description }}
+{: .fs-6 .fw-300 }
+
+## Table of contents
+{: .no_toc .text-delta }
+
+1. TOC
+{:toc}
+
+---
+
+After reading this guide, you will know:
+
+* How to use Apple Intelligence for completely local, on-device AI
+* The system requirements and how to verify your setup
+* How to configure and customize the provider
+* How it works under the hood
+* Current limitations and troubleshooting tips
+
+## What is Apple Intelligence?
+
+Apple Intelligence brings on-device AI to RubyLLM through Apple's Foundation Models. Your prompts and responses never leave your Mac — no API keys, no cloud services, no data sharing. It's the most private way to use AI with RubyLLM.
+
+Under the hood, RubyLLM communicates with the `osx-ai-inloop` binary, which pipes JSON requests to Apple's on-device language model via stdin/stdout.
+
+## Requirements
+
+* **macOS 26** (Tahoe) or later
+* **Apple Silicon** (M1 or later)
+* **Apple Intelligence** enabled in System Settings > Apple Intelligence & Siri
+
+> Apple Intelligence is not available on Intel Macs or older macOS versions. RubyLLM will raise an error if the requirements aren't met.
+{: .note }
+
+## Quick Start
+
+No configuration needed. Just use it:
+
+```ruby
+chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
+chat.ask "Explain Ruby's block syntax"
+```
+
+That's it. No API keys, no environment variables, no account setup. The `osx-ai-inloop` binary is automatically downloaded and cached on first use.
+
+## Conversation History
+
+Apple Intelligence supports multi-turn conversations, just like any other provider:
+
+```ruby
+chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
+chat.ask "What is a Ruby module?"
+chat.ask "How is that different from a class?"
+chat.ask "When should I use one over the other?"
+```
+
+Each follow-up includes the full conversation history, so the model maintains context across turns.
+
+## Configuration
+
+### Zero Config (Default)
+
+Apple Intelligence works out of the box with no configuration. RubyLLM automatically downloads the `osx-ai-inloop` binary to `~/.ruby_llm/bin/osx-ai-inloop` on first use.
+
+### Custom Binary Path
+
+If you prefer to manage the binary location yourself:
+
+```ruby
+RubyLLM.configure do |config|
+  config.apple_intelligence_binary_path = "/opt/bin/osx-ai-inloop"
+end
+```
+
+### Setting as Default Model
+
+To use Apple Intelligence as your default chat model:
+
+```ruby
+RubyLLM.configure do |config|
+  config.default_model = "apple-intelligence"
+end
+
+# Now RubyLLM.chat uses Apple Intelligence automatically
+chat = RubyLLM.chat(provider: :apple_intelligence)
+chat.ask "Hello!"
+```
+
+## How It Works
+
+1. RubyLLM formats your conversation as a JSON payload
+2. The payload is piped to the `osx-ai-inloop` binary via stdin
+3. The binary communicates with Apple's Foundation Models on-device
+4. The response is read from stdout and parsed back into RubyLLM's standard format
+
+The binary is sourced from the [osx-ai-inloop](https://github.com/inloopstudio-team/apple-intelligence-inloop) project and cached at `~/.ruby_llm/bin/osx-ai-inloop`.
+
+## Limitations
+
+Apple Intelligence is text-only and runs entirely on-device. This means:
+
+* **No streaming** — responses are returned all at once
+* **No vision** — image analysis is not supported
+* **No tool calling** — function/tool use is not available
+* **No embeddings** — use another provider for `RubyLLM.embed`
+* **No image generation** — use another provider for `RubyLLM.paint`
+* **macOS only** — requires Apple Silicon and macOS 26+
+
+For capabilities that Apple Intelligence doesn't support, you can use another provider alongside it:
+
+```ruby
+# Local AI for chat
+local_chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
+local_chat.ask "Summarize this concept"
+
+# Cloud provider for embeddings
+RubyLLM.embed "Ruby is elegant and expressive"
+```
+
+## Troubleshooting
+
+### "Platform not supported" error
+
+Apple Intelligence requires macOS 26+ on Apple Silicon. Verify your setup:
+
+* Check macOS version: Apple menu > About This Mac
+* Ensure Apple Intelligence is enabled: System Settings > Apple Intelligence & Siri
+
+### Binary download fails
+
+If the automatic download fails (network issues, firewall, etc.), download manually:
+
+```bash
+wget -O ~/.ruby_llm/bin/osx-ai-inloop \
+  https://github.com/inloopstudio-team/apple-intelligence-inloop/raw/refs/heads/main/bin/osx-ai-inloop-arm64
+chmod +x ~/.ruby_llm/bin/osx-ai-inloop
+```
+
+### Binary not found at custom path
+
+If you configured a custom binary path, ensure the file exists and is executable:
+
+```bash
+ls -la /your/custom/path/osx-ai-inloop
+chmod +x /your/custom/path/osx-ai-inloop
+```
+
+## Next Steps
+
+Now that you have local AI running, explore other RubyLLM features:
+
+- [Chat with AI models]({% link _core_features/chat.md %}) for more conversation features
+- [Configuration]({% link _getting_started/configuration.md %}) for multi-provider setups
+- [Tools and function calling]({% link _core_features/tools.md %}) with cloud providers
diff --git a/docs/_getting_started/configuration.md b/docs/_getting_started/configuration.md
index b5686cb2e..978635f01 100644
--- a/docs/_getting_started/configuration.md
+++ b/docs/_getting_started/configuration.md
@@ -107,6 +107,26 @@ end
 > Attempting to use an unconfigured provider will raise `RubyLLM::ConfigurationError`. Only configure what you need.
 {: .note }
 
+### Apple Intelligence (On-Device)
+
+Apple Intelligence requires no API keys — it runs entirely on your Mac. Just use it:
+
+```ruby
+chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
+chat.ask "Hello from on-device AI!"
+```
+
+The `osx-ai-inloop` binary is automatically downloaded on first use. To customize its location:
+
+```ruby
+RubyLLM.configure do |config|
+  config.apple_intelligence_binary_path = "/opt/bin/osx-ai-inloop"
+end
+```
+
+> Apple Intelligence requires macOS 26+ (Tahoe) on Apple Silicon with Apple Intelligence enabled. See the [Apple Intelligence guide]({% link _getting_started/apple-intelligence.md %}) for full details.
+{: .note }
+
 ### OpenAI Organization & Project Headers
 
 For OpenAI users with multiple organizations or projects:
@@ -450,6 +470,9 @@ Here's a complete reference of all configuration options:
 
 ```ruby
 RubyLLM.configure do |config|
+  # Apple Intelligence (on-device, no API key needed)
+  config.apple_intelligence_binary_path = String  # Optional: custom binary path
+
   # Anthropic
   config.anthropic_api_key = String
   config.anthropic_api_base = String  # v1.13.0+
diff --git a/docs/_getting_started/overview.md b/docs/_getting_started/overview.md
index 2c7cbe747..69c4e036c 100644
--- a/docs/_getting_started/overview.md
+++ b/docs/_getting_started/overview.md
@@ -149,6 +149,9 @@ chat = RubyLLM.chat(
   model: "{{ site.models.local_llama }}",
   provider: :ollama,
 )
+
+# On-device AI with Apple Intelligence — no API keys, no cloud
+chat = RubyLLM.chat(model: "apple-intelligence", provider: :apple_intelligence)
 ```
 
 ### Capability Management

From 11eb685a05d24f6ee5f91010cc4a1f9eee8806d7 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 16:48:47 +0000
Subject: [PATCH 03/11] Add prompt-based tool calling support to Apple
 Intelligence provider

Injects tool definitions into the system prompt so the on-device model
can respond with structured tool call JSON. Parses the response to
detect tool calls and returns proper Message with tool_calls hash,
enabling RubyLLM's handle_tool_calls loop to execute tools and
feed results back.

- build_payload now accepts tools: and appends definitions to system prompt
- extract_tool_calls parses model output for {"tool_call": {...}} pattern
- format_conversation_message handles :tool role messages for result context
---
 lib/ruby_llm/providers/apple_intelligence.rb  |  2 +-
 .../providers/apple_intelligence/chat.rb      | 68 +++++++++++++++++--
 2 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
index d91916de0..da56481b3 100644
--- a/lib/ruby_llm/providers/apple_intelligence.rb
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -19,7 +19,7 @@ def api_base
 
       def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil,
                    thinking: nil, tool_prefs: nil, &)
-        payload = build_payload(messages)
+        payload = build_payload(messages, tools: tools)
         execute_binary(payload, @config)
       end
 
diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index 863d0243d..a0c179340 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -2,6 +2,7 @@
 
 require 'open3'
 require 'json'
+require 'securerandom'
 
 module RubyLLM
   module Providers
@@ -18,7 +19,7 @@ module Chat
 
         private
 
-        def build_payload(messages)
+        def build_payload(messages, tools: nil)
           system_prompt = nil
           conversation = []
           latest_user_message = nil
@@ -32,10 +33,12 @@ def build_payload(messages)
             end
           end
 
+          system_prompt = append_tool_definitions(system_prompt, tools) if tools&.any?
+
           latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user
 
           input_parts = conversation.map do |msg|
-            "#{msg.role}: #{extract_text(msg.content)}"
+            format_conversation_message(msg)
           end
 
           payload = {
@@ -49,6 +52,44 @@ def build_payload(messages)
           payload
         end
 
+        def append_tool_definitions(system_prompt, tools)
+          tool_text = "You have access to the following tools:\n"
+
+          tools.each_value do |tool|
+            tool_text += "\nTool: #{tool.name}\n"
+            tool_text += "Description: #{tool.description}\n"
+
+            if tool.parameters.any?
+              tool_text += "Parameters:\n"
+              tool.parameters.each_value do |param|
+                required_label = param.required ? 'required' : 'optional'
+                tool_text += "  - #{param.name} (#{param.type}, #{required_label})"
+                tool_text += ": #{param.description}" if param.description
+                tool_text += "\n"
+              end
+            end
+          end
+
+          tool_text += <<~INSTRUCTIONS
+
+            When you need to use a tool, respond with ONLY this exact JSON format, nothing else:
+            {"tool_call": {"name": "tool_name", "arguments": {"param1": "value1"}}}
+
+            If you don't need a tool, respond normally with plain text.
+          INSTRUCTIONS
+
+          [system_prompt, tool_text].compact.join("\n\n")
+        end
+
+        def format_conversation_message(msg)
+          if msg.role == :tool
+            tool_name = msg.tool_call_id || 'unknown'
+            "tool_result (#{tool_name}): #{extract_text(msg.content)}"
+          else
+            "#{msg.role}: #{extract_text(msg.content)}"
+          end
+        end
+
         def extract_text(content)
           case content
           when String then content
@@ -102,11 +143,15 @@ def parse_binary_response(stdout)
 
           output_text = body['output'] || ''
           estimated_tokens = estimate_tokens(output_text)
+          model_id = body['model'] || 'apple-intelligence'
+
+          tool_calls = extract_tool_calls(output_text)
 
           Message.new(
             role: :assistant,
-            content: output_text,
-            model_id: body['model'] || 'apple-intelligence',
+            content: tool_calls ? '' : output_text,
+            tool_calls: tool_calls,
+            model_id: model_id,
             input_tokens: 0,
             output_tokens: estimated_tokens,
             raw: body
@@ -115,6 +160,21 @@ def parse_binary_response(stdout)
           raise RubyLLM::Error, "Failed to parse binary response: #{e.message}"
         end
 
+        def extract_tool_calls(text)
+          parsed = JSON.parse(text.strip)
+          return nil unless parsed.is_a?(Hash) && parsed['tool_call']
+
+          tc = parsed['tool_call']
+          return nil unless tc['name']
+
+          call_id = "call_#{SecureRandom.hex(8)}"
+          arguments = (tc['arguments'] || {}).transform_keys(&:to_sym)
+
+          { call_id => ToolCall.new(id: call_id, name: tc['name'], arguments: arguments) }
+        rescue JSON::ParserError
+          nil
+        end
+
         def estimate_tokens(text)
           (text.length / 4.0).ceil
         end

From 8517073eb236f4f7a58b1e02c7664531124c0244 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 16:54:29 +0000
Subject: [PATCH 04/11] Fix tool calling: use text format and simplify tool
 prompt

The on-device model was failing with GENERATION_FAILURE because:
1. format was set to 'json' which conflicts with tool call instructions
2. The tool instruction heredoc had extra whitespace/verbosity

Now uses format: 'text' when tools are present, and a compact
single-line tool call instruction.
---
 lib/ruby_llm/providers/apple_intelligence/chat.rb | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index a0c179340..14f57eee8 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -44,7 +44,7 @@ def build_payload(messages, tools: nil)
           payload = {
             prompt: latest_user_message || '',
             model: 'on-device',
-            format: 'json',
+            format: tools&.any? ? 'text' : 'json',
             stream: false
           }
           payload[:system] = system_prompt if system_prompt
@@ -70,13 +70,9 @@ def append_tool_definitions(system_prompt, tools)
             end
           end
 
-          tool_text += <<~INSTRUCTIONS
-
-            When you need to use a tool, respond with ONLY this exact JSON format, nothing else:
-            {"tool_call": {"name": "tool_name", "arguments": {"param1": "value1"}}}
-
-            If you don't need a tool, respond normally with plain text.
-          INSTRUCTIONS
+          tool_text += "\nTo use a tool, reply ONLY with JSON: "
+          tool_text += '{"tool_call":{"name":"TOOL_NAME","arguments":{"param":"value"}}}'
+          tool_text += "\nOtherwise reply with plain text."
 
           [system_prompt, tool_text].compact.join("\n\n")
         end

From 6584ada4c9afce7d3cfe62cff0b31860cdd4be4a Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 16:56:28 +0000
Subject: [PATCH 05/11] Fix tool calling: move tool instructions to prompt,
 keep json format

The on-device model was failing with GenerationError -1 when tool
definitions were injected into the system prompt. The Foundation
Models system instruction field has stricter constraints.

Now tool instructions are prepended to the user prompt instead,
keeping them compact (one line per tool). The format stays as
'json' so the binary returns parseable JSON output.
---
 .../providers/apple_intelligence/chat.rb      | 38 +++++++------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index 14f57eee8..011cc513f 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -33,18 +33,22 @@ def build_payload(messages, tools: nil)
             end
           end
 
-          system_prompt = append_tool_definitions(system_prompt, tools) if tools&.any?
-
           latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user
 
           input_parts = conversation.map do |msg|
             format_conversation_message(msg)
           end
 
+          # When tools are present, prepend tool instructions to the user prompt
+          if tools&.any?
+            tool_prefix = build_tool_prefix(tools)
+            latest_user_message = "#{tool_prefix}\n\nUser question: #{latest_user_message}"
+          end
+
           payload = {
             prompt: latest_user_message || '',
             model: 'on-device',
-            format: tools&.any? ? 'text' : 'json',
+            format: 'json',
             stream: false
           }
           payload[:system] = system_prompt if system_prompt
@@ -52,29 +56,15 @@ def build_payload(messages, tools: nil)
           payload
         end
 
-        def append_tool_definitions(system_prompt, tools)
-          tool_text = "You have access to the following tools:\n"
-
-          tools.each_value do |tool|
-            tool_text += "\nTool: #{tool.name}\n"
-            tool_text += "Description: #{tool.description}\n"
-
-            if tool.parameters.any?
-              tool_text += "Parameters:\n"
-              tool.parameters.each_value do |param|
-                required_label = param.required ? 'required' : 'optional'
-                tool_text += "  - #{param.name} (#{param.type}, #{required_label})"
-                tool_text += ": #{param.description}" if param.description
-                tool_text += "\n"
-              end
-            end
+        def build_tool_prefix(tools)
+          parts = tools.map do |_key, tool|
+            params = tool.parameters.map { |_n, p| p.name.to_s }.join(', ')
+            "#{tool.name}(#{params}): #{tool.description}"
           end
 
-          tool_text += "\nTo use a tool, reply ONLY with JSON: "
-          tool_text += '{"tool_call":{"name":"TOOL_NAME","arguments":{"param":"value"}}}'
-          tool_text += "\nOtherwise reply with plain text."
-
-          [system_prompt, tool_text].compact.join("\n\n")
+          "Available tools: #{parts.join('; ')}. " \
+            'To call a tool, respond with ONLY: {"tool_call":{"name":"NAME","arguments":{"key":"val"}}} ' \
+            'Otherwise respond normally.'
         end
 
         def format_conversation_message(msg)

From e06cdddd9df21d034e30eafcee4c62adeb75cea3 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:05:01 +0000
Subject: [PATCH 06/11] Rework tool calling: two-pass approach with argument
 extraction

The on-device model is too small to reliably follow complex tool-call
formatting instructions. Instead of asking the model to decide and
format tool calls, we use a two-pass approach:

Pass 1: Ask the model to extract parameter values from the user's
message as simple JSON (e.g. extract 'city' from 'weather in Tokyo')
Pass 2: Construct the ToolCall programmatically from the extracted args

This plays to the model's strength (structured extraction) rather than
its weakness (following complex formatting instructions).

Also handles zero-parameter tools (like CurrentTime) by calling
them immediately without a model pass.
---
 lib/ruby_llm/providers/apple_intelligence.rb  | 22 ++++++-
 .../providers/apple_intelligence/chat.rb      | 65 +++++++++++++------
 2 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
index da56481b3..e4001947e 100644
--- a/lib/ruby_llm/providers/apple_intelligence.rb
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -19,7 +19,27 @@ def api_base
 
       def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil,
                    thinking: nil, tool_prefs: nil, &)
-        payload = build_payload(messages, tools: tools)
+        # Two-pass tool calling: if tools are registered, first ask the model
+        # to extract arguments, then construct the tool call programmatically.
+        if tools&.any?
+          last_user = messages.select { |m| m.role == :user }.last
+          if last_user
+            user_text = last_user.content.is_a?(String) ? last_user.content : last_user.content.to_s
+            tool_result = resolve_tool_call(tools, user_text, @config)
+            if tool_result
+              return Message.new(
+                role: :assistant,
+                content: '',
+                tool_calls: tool_result,
+                model_id: 'apple-intelligence',
+                input_tokens: 0,
+                output_tokens: 0
+              )
+            end
+          end
+        end
+
+        payload = build_payload(messages)
         execute_binary(payload, @config)
       end
 
diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index 011cc513f..21e333619 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -19,7 +19,7 @@ module Chat
 
         private
 
-        def build_payload(messages, tools: nil)
+        def build_payload(messages)
           system_prompt = nil
           conversation = []
           latest_user_message = nil
@@ -39,12 +39,6 @@ def build_payload(messages, tools: nil)
             format_conversation_message(msg)
           end
 
-          # When tools are present, prepend tool instructions to the user prompt
-          if tools&.any?
-            tool_prefix = build_tool_prefix(tools)
-            latest_user_message = "#{tool_prefix}\n\nUser question: #{latest_user_message}"
-          end
-
           payload = {
             prompt: latest_user_message || '',
             model: 'on-device',
@@ -56,17 +50,6 @@ def build_payload(messages, tools: nil)
           payload
         end
 
-        def build_tool_prefix(tools)
-          parts = tools.map do |_key, tool|
-            params = tool.parameters.map { |_n, p| p.name.to_s }.join(', ')
-            "#{tool.name}(#{params}): #{tool.description}"
-          end
-
-          "Available tools: #{parts.join('; ')}. " \
-            'To call a tool, respond with ONLY: {"tool_call":{"name":"NAME","arguments":{"key":"val"}}} ' \
-            'Otherwise respond normally.'
-        end
-
         def format_conversation_message(msg)
           if msg.role == :tool
             tool_name = msg.tool_call_id || 'unknown'
@@ -84,7 +67,7 @@ def extract_text(content)
           end
         end
 
-        def execute_binary(payload, config)
+        def execute_binary(payload, config, tools: nil)
           bin = BinaryManager.binary_path(config)
           json_input = JSON.generate(payload)
 
@@ -94,6 +77,50 @@ def execute_binary(payload, config)
           parse_binary_response(stdout)
         end
 
+        # Two-pass tool calling: first ask the model to extract arguments,
+        # then construct the tool call programmatically.
+        def resolve_tool_call(tools, user_message, config)
+          return nil unless tools&.any?
+
+          tool_name, tool = tools.first # single-tool shortcut for now
+
+          # Zero-parameter tools: call immediately
+          if tool.parameters.empty?
+            call_id = "call_#{SecureRandom.hex(8)}"
+            return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) }
+          end
+
+          param_names = tool.parameters.map { |_n, p| p.name.to_s }
+          extraction_prompt = "Extract these values from the text and return JSON with keys: #{param_names.join(', ')}.\nText: #{user_message}"
+
+          payload = {
+            prompt: extraction_prompt,
+            model: 'on-device',
+            format: 'json',
+            stream: false
+          }
+
+          bin = BinaryManager.binary_path(config)
+          stdout, stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload))
+          return nil unless status.success?
+
+          body = JSON.parse(stdout)
+          return nil unless body['ok']
+
+          output = body['output']&.strip
+          return nil if output.nil? || output.empty?
+
+          args = JSON.parse(output)
+          return nil unless args.is_a?(Hash) && args.any?
+
+          call_id = "call_#{SecureRandom.hex(8)}"
+          arguments = args.transform_keys(&:to_sym)
+
+          { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) }
+        rescue JSON::ParserError, StandardError
+          nil
+        end
+
         def handle_exit_code(status, stdout, stderr)
           return if status.success?
 

From 47aad6d6c7f31a3b7143e426cdcb4b4f7b05e89b Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:11:42 +0000
Subject: [PATCH 07/11] Simplify tool arg extraction: one simple prompt per
 parameter

The combined extraction prompt was hitting GenerationError -1 or
returning conversational text instead of JSON. Now asks the model
one ultra-simple question per parameter:

  'What city is mentioned in this text? Reply with just the value.'

This is the simplest possible prompt the on-device model can handle.
Parses both JSON and plain text responses from the model.
---
 .../providers/apple_intelligence/chat.rb      | 45 ++++++++++---------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index 21e333619..cd17fa303 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -90,34 +90,39 @@ def resolve_tool_call(tools, user_message, config)
             return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) }
           end
 
-          param_names = tool.parameters.map { |_n, p| p.name.to_s }
-          extraction_prompt = "Extract these values from the text and return JSON with keys: #{param_names.join(', ')}.\nText: #{user_message}"
+          # Build a minimal extraction prompt per parameter
+          arguments = {}
+          bin = BinaryManager.binary_path(config)
 
-          payload = {
-            prompt: extraction_prompt,
-            model: 'on-device',
-            format: 'json',
-            stream: false
-          }
+          tool.parameters.each_value do |param|
+            prompt = "What #{param.name} is mentioned in this text? Reply with just the value, nothing else.\n\n#{user_message}"
+            payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false }
 
-          bin = BinaryManager.binary_path(config)
-          stdout, stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload))
-          return nil unless status.success?
+            stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload))
+            next unless status.success?
 
-          body = JSON.parse(stdout)
-          return nil unless body['ok']
+            body = JSON.parse(stdout) rescue next
+            next unless body['ok']
+
+            raw_output = (body['output'] || '').strip
+            # The model might wrap the answer in JSON or return plain text
+            value = begin
+              parsed = JSON.parse(raw_output)
+              # If it returned {"city": "Tokyo"} or {"value": "Tokyo"}
+              parsed.is_a?(Hash) ? (parsed[param.name.to_s] || parsed.values.first) : parsed.to_s
+            rescue JSON::ParserError
+              raw_output.gsub(/\A["']|["']\z/, '') # strip quotes if plain text
+            end
 
-          output = body['output']&.strip
-          return nil if output.nil? || output.empty?
+            arguments[param.name.to_sym] = value if value && !value.empty?
+          end
 
-          args = JSON.parse(output)
-          return nil unless args.is_a?(Hash) && args.any?
+          return nil if arguments.empty?
 
           call_id = "call_#{SecureRandom.hex(8)}"
-          arguments = args.transform_keys(&:to_sym)
-
           { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) }
-        rescue JSON::ParserError, StandardError
+        rescue StandardError => e
+          RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" }
           nil
         end
 

From d5b915d265132cef1af306def6b8a76a10d876e9 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:14:35 +0000
Subject: [PATCH 08/11] Fix tool calling: extract text from Content object
 correctly

The user message content is a RubyLLM::Content object, not a plain
String. Calling .to_s on it produced '#<RubyLLM::Content:0x...>'
instead of the actual text, so the extraction prompt sent garbage
to the model. Now properly extracts .text from Content objects.
---
 lib/ruby_llm/providers/apple_intelligence.rb | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
index e4001947e..ef5c7f79f 100644
--- a/lib/ruby_llm/providers/apple_intelligence.rb
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -24,7 +24,11 @@ def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, hea
         if tools&.any?
           last_user = messages.select { |m| m.role == :user }.last
           if last_user
-            user_text = last_user.content.is_a?(String) ? last_user.content : last_user.content.to_s
+            user_text = case last_user.content
+                        when String then last_user.content
+                        when Content then last_user.content.text || ''
+                        else last_user.content.to_s
+                        end
             tool_result = resolve_tool_call(tools, user_text, @config)
             if tool_result
               return Message.new(

From cbeb853f2af4da96034099e0e2e61889d1c7a2a6 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:17:09 +0000
Subject: [PATCH 09/11] Fix infinite tool call loop: skip extraction when tool
 results exist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After a tool executes, RubyLLM calls complete() again with the tool
result in the messages array. The provider was re-extracting args
from the original user message and calling the tool again, looping
forever.

Now checks for :tool role messages — if any exist, we're on the
follow-up pass and skip straight to generating a natural language
response that incorporates the tool result.
---
 lib/ruby_llm/providers/apple_intelligence.rb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
index ef5c7f79f..ab4558e72 100644
--- a/lib/ruby_llm/providers/apple_intelligence.rb
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -19,9 +19,10 @@ def api_base
 
       def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil,
                    thinking: nil, tool_prefs: nil, &)
-        # Two-pass tool calling: if tools are registered, first ask the model
-        # to extract arguments, then construct the tool call programmatically.
-        if tools&.any?
+        # Two-pass tool calling: if tools are registered and we haven't already
+        # executed a tool (no :tool messages yet), extract arguments and call.
+        has_tool_results = messages.any? { |m| m.role == :tool }
+        if tools&.any? && !has_tool_results
           last_user = messages.select { |m| m.role == :user }.last
           if last_user
             user_text = case last_user.content

From b59c613072060ad2cdce6bba32255ce9890e2c2f Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:21:14 +0000
Subject: [PATCH 10/11] Fix empty prompt on tool follow-up pass

After tool execution, the messages end with a :tool role message
(the result), not a :user message. build_payload couldn't find a
user message to use as the prompt, producing an empty string which
the binary rejected.

Now detects this case and synthesizes a prompt:
'Answer this question: <original question>\nUse this data: <tool result>'
so the model generates a natural language response incorporating
the tool output.
---
 lib/ruby_llm/providers/apple_intelligence/chat.rb | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index cd17fa303..8e421c93b 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -35,12 +35,23 @@ def build_payload(messages)
 
           latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user
 
+          # After tool execution, the last message is :tool (the result).
+          # Build a prompt that asks the model to answer using the tool result.
+          if latest_user_message.nil? || latest_user_message.empty?
+            tool_results = conversation.select { |m| m.role == :tool }.map { |m| extract_text(m.content) }
+            user_msg = conversation.select { |m| m.role == :user }.last
+            original_question = user_msg ? extract_text(user_msg.content) : 'the user question'
+
+            latest_user_message = "Answer this question: #{original_question}\n\nUse this data: #{tool_results.join('; ')}"
+            conversation = [] # already incorporated into the prompt
+          end
+
           input_parts = conversation.map do |msg|
             format_conversation_message(msg)
           end
 
           payload = {
-            prompt: latest_user_message || '',
+            prompt: latest_user_message,
             model: 'on-device',
             format: 'json',
             stream: false

From 626939bfc82113979486a99c0723bf6803d263e4 Mon Sep 17 00:00:00 2001
From: Abhishek Parolkar <abhishek@parolkar.com>
Date: Wed, 1 Apr 2026 17:49:43 +0000
Subject: [PATCH 11/11] Fix all RuboCop offenses in Apple Intelligence provider

- Add super call in initialize (Lint/MissingSuper)
- Disable ParameterLists/PerceivedComplexity where needed (matches base Provider pattern)
- Suppress unused args with _ assignment (local provider ignores HTTP params)
- Use reverse.find instead of select.last (Performance/Detect)
- Use String#include? instead of regex for platform checks (Performance/StringInclude)
- Use File.binwrite instead of File.open+write (Style/FileWrite)
- Use modifier if for single-line body (Style/IfUnlessModifier)
- Replace rescue modifier with begin/rescue (Style/RescueModifier)
- Merge duplicate when branches for exit codes 4, 5 (Lint/DuplicateBranch)
- Split long lines under 120 chars (Layout/LineLength)
- Extract methods to reduce perceived complexity
---
 lib/ruby_llm/providers/apple_intelligence.rb  |  51 +++++----
 .../apple_intelligence/binary_manager.rb      |  16 +--
 .../providers/apple_intelligence/chat.rb      | 104 +++++++++---------
 3 files changed, 91 insertions(+), 80 deletions(-)

diff --git a/lib/ruby_llm/providers/apple_intelligence.rb b/lib/ruby_llm/providers/apple_intelligence.rb
index ab4558e72..8f0f9e2d6 100644
--- a/lib/ruby_llm/providers/apple_intelligence.rb
+++ b/lib/ruby_llm/providers/apple_intelligence.rb
@@ -9,6 +9,7 @@ class AppleIntelligence < Provider
       include AppleIntelligence::Models
 
       def initialize(config)
+        super
         @config = config
         @connection = nil
       end
@@ -17,36 +18,23 @@ def api_base
         nil
       end
 
+      # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity
       def complete(messages, tools: nil, temperature: nil, model: nil, params: {}, headers: {}, schema: nil,
                    thinking: nil, tool_prefs: nil, &)
+        _ = [temperature, model, params, headers, schema, thinking, tool_prefs] # not used for local provider
+
         # Two-pass tool calling: if tools are registered and we haven't already
         # executed a tool (no :tool messages yet), extract arguments and call.
-        has_tool_results = messages.any? { |m| m.role == :tool }
-        if tools&.any? && !has_tool_results
-          last_user = messages.select { |m| m.role == :user }.last
-          if last_user
-            user_text = case last_user.content
-                        when String then last_user.content
-                        when Content then last_user.content.text || ''
-                        else last_user.content.to_s
-                        end
-            tool_result = resolve_tool_call(tools, user_text, @config)
-            if tool_result
-              return Message.new(
-                role: :assistant,
-                content: '',
-                tool_calls: tool_result,
-                model_id: 'apple-intelligence',
-                input_tokens: 0,
-                output_tokens: 0
-              )
-            end
-          end
+        if tools&.any? && messages.none? { |m| m.role == :tool }
+          last_user = messages.reverse.find { |m| m.role == :user }
+          tool_msg = try_tool_call(tools, last_user, @config) if last_user
+          return tool_msg if tool_msg
         end
 
         payload = build_payload(messages)
         execute_binary(payload, @config)
       end
+      # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity
 
       class << self
         def configuration_options
@@ -69,6 +57,27 @@ def capabilities
           AppleIntelligence::Capabilities
         end
       end
+
+      private
+
+      def try_tool_call(tools, last_user, config)
+        user_text = case last_user.content
+                    when String then last_user.content
+                    when Content then last_user.content.text || ''
+                    else last_user.content.to_s
+                    end
+        tool_result = resolve_tool_call(tools, user_text, config)
+        return unless tool_result
+
+        Message.new(
+          role: :assistant,
+          content: '',
+          tool_calls: tool_result,
+          model_id: 'apple-intelligence',
+          input_tokens: 0,
+          output_tokens: 0
+        )
+      end
     end
   end
 end
diff --git a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
index b3571a1d3..94a2fbca0 100644
--- a/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/binary_manager.rb
@@ -30,14 +30,12 @@ def ensure_binary!(path)
         end
 
         def check_platform!
-          unless RUBY_PLATFORM =~ /darwin/
-            raise RubyLLM::Error, 'Apple Intelligence provider requires macOS'
-          end
+          raise RubyLLM::Error, 'Apple Intelligence provider requires macOS' unless RUBY_PLATFORM.include?('darwin')
 
-          unless RUBY_PLATFORM =~ /arm64/
-            RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \
-                                'It may not work on this architecture.')
-          end
+          return if RUBY_PLATFORM.include?('arm64')
+
+          RubyLLM.logger.warn('Apple Intelligence binary is built for arm64. ' \
+                              'It may not work on this architecture.')
         end
 
         def download_binary!(path)
@@ -45,9 +43,7 @@ def download_binary!(path)
           RubyLLM.logger.info("Downloading osx-ai-inloop binary to #{path}...")
 
           URI.open(BINARY_URL, 'rb') do |remote| # rubocop:disable Security/Open
-            File.open(path, 'wb') do |local|
-              local.write(remote.read)
-            end
+            File.binwrite(path, remote.read)
           end
 
           RubyLLM.logger.info('Binary downloaded successfully.')
diff --git a/lib/ruby_llm/providers/apple_intelligence/chat.rb b/lib/ruby_llm/providers/apple_intelligence/chat.rb
index 8e421c93b..f0d5f1e3a 100644
--- a/lib/ruby_llm/providers/apple_intelligence/chat.rb
+++ b/lib/ruby_llm/providers/apple_intelligence/chat.rb
@@ -19,10 +19,9 @@ module Chat
 
         private
 
-        def build_payload(messages)
+        def build_payload(messages) # rubocop:disable Metrics/PerceivedComplexity
           system_prompt = nil
           conversation = []
-          latest_user_message = nil
 
           messages.each do |msg|
             case msg.role
@@ -35,20 +34,19 @@ def build_payload(messages)
 
           latest_user_message = extract_text(conversation.pop.content) if conversation.last&.role == :user
 
-          # After tool execution, the last message is :tool (the result).
-          # Build a prompt that asks the model to answer using the tool result.
+          # After tool execution the last message is :tool (the result).
+          # Synthesize a prompt so the model can answer using the tool output.
           if latest_user_message.nil? || latest_user_message.empty?
             tool_results = conversation.select { |m| m.role == :tool }.map { |m| extract_text(m.content) }
-            user_msg = conversation.select { |m| m.role == :user }.last
+            user_msg = conversation.reverse.find { |m| m.role == :user }
             original_question = user_msg ? extract_text(user_msg.content) : 'the user question'
 
-            latest_user_message = "Answer this question: #{original_question}\n\nUse this data: #{tool_results.join('; ')}"
-            conversation = [] # already incorporated into the prompt
+            latest_user_message = "Answer this question: #{original_question}\n\n" \
+                                  "Use this data: #{tool_results.join('; ')}"
+            conversation = []
           end
 
-          input_parts = conversation.map do |msg|
-            format_conversation_message(msg)
-          end
+          input_parts = conversation.map { |msg| format_conversation_message(msg) }
 
           payload = {
             prompt: latest_user_message,
@@ -78,7 +76,7 @@ def extract_text(content)
           end
         end
 
-        def execute_binary(payload, config, tools: nil)
+        def execute_binary(payload, config)
           bin = BinaryManager.binary_path(config)
           json_input = JSON.generate(payload)
 
@@ -88,12 +86,10 @@ def execute_binary(payload, config, tools: nil)
           parse_binary_response(stdout)
         end
 
-        # Two-pass tool calling: first ask the model to extract arguments,
-        # then construct the tool call programmatically.
-        def resolve_tool_call(tools, user_message, config)
+        def resolve_tool_call(tools, user_message, config) # rubocop:disable Metrics/PerceivedComplexity
           return nil unless tools&.any?
 
-          tool_name, tool = tools.first # single-tool shortcut for now
+          tool_name, tool = tools.first
 
           # Zero-parameter tools: call immediately
           if tool.parameters.empty?
@@ -101,30 +97,18 @@ def resolve_tool_call(tools, user_message, config)
             return { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: {}) }
           end
 
-          # Build a minimal extraction prompt per parameter
+          extract_tool_arguments(tool_name, tool, user_message, config)
+        rescue StandardError => e
+          RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" }
+          nil
+        end
+
+        def extract_tool_arguments(tool_name, tool, user_message, config)
           arguments = {}
           bin = BinaryManager.binary_path(config)
 
           tool.parameters.each_value do |param|
-            prompt = "What #{param.name} is mentioned in this text? Reply with just the value, nothing else.\n\n#{user_message}"
-            payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false }
-
-            stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload))
-            next unless status.success?
-
-            body = JSON.parse(stdout) rescue next
-            next unless body['ok']
-
-            raw_output = (body['output'] || '').strip
-            # The model might wrap the answer in JSON or return plain text
-            value = begin
-              parsed = JSON.parse(raw_output)
-              # If it returned {"city": "Tokyo"} or {"value": "Tokyo"}
-              parsed.is_a?(Hash) ? (parsed[param.name.to_s] || parsed.values.first) : parsed.to_s
-            rescue JSON::ParserError
-              raw_output.gsub(/\A["']|["']\z/, '') # strip quotes if plain text
-            end
-
+            value = extract_single_param(bin, param.name, user_message)
             arguments[param.name.to_sym] = value if value && !value.empty?
           end
 
@@ -132,9 +116,33 @@ def resolve_tool_call(tools, user_message, config)
 
           call_id = "call_#{SecureRandom.hex(8)}"
           { call_id => ToolCall.new(id: call_id, name: tool_name.to_s, arguments: arguments) }
-        rescue StandardError => e
-          RubyLLM.logger.debug { "Tool call resolution failed: #{e.message}" }
-          nil
+        end
+
+        def extract_single_param(bin, param_name, user_message)
+          prompt = "What #{param_name} is mentioned in this text? " \
+                   "Reply with just the value, nothing else.\n\n#{user_message}"
+          payload = { prompt: prompt, model: 'on-device', format: 'json', stream: false }
+
+          stdout, _stderr, status = Open3.capture3(bin, stdin_data: JSON.generate(payload))
+          return nil unless status.success?
+
+          body = begin
+            JSON.parse(stdout)
+          rescue JSON::ParserError
+            return nil
+          end
+          return nil unless body['ok']
+
+          parse_extracted_value(body['output']&.strip, param_name)
+        end
+
+        def parse_extracted_value(raw_output, param_name)
+          return nil if raw_output.nil? || raw_output.empty?
+
+          parsed = JSON.parse(raw_output)
+          parsed.is_a?(Hash) ? (parsed[param_name.to_s] || parsed.values.first).to_s : parsed.to_s
+        rescue JSON::ParserError
+          raw_output.gsub(/\A["']|["']\z/, '')
         end
 
         def handle_exit_code(status, stdout, stderr)
@@ -145,20 +153,21 @@ def handle_exit_code(status, stdout, stderr)
 
           begin
             body = JSON.parse(stdout)
-            if body['error']
-              error_msg = "#{body['error']['code']}: #{body['error']['message']}"
-            end
+            error_msg = "#{body['error']['code']}: #{body['error']['message']}" if body['error']
           rescue JSON::ParserError
             error_msg = "#{error_msg} — #{stderr}" unless stderr.empty?
           end
 
+          raise_for_exit_code(code, error_msg)
+        end
+
+        def raise_for_exit_code(code, error_msg)
           case code
           when 1 then raise RubyLLM::BadRequestError, error_msg
           when 2 then raise RubyLLM::Error, "Unsupported environment: #{error_msg}"
           when 3 then raise RubyLLM::ModelNotFoundError, error_msg
-          when 4 then raise RubyLLM::ServerError, error_msg
-          when 5 then raise RubyLLM::ServerError, error_msg
-          else        raise RubyLLM::Error, error_msg
+          when 4, 5 then raise RubyLLM::ServerError, error_msg
+          else raise RubyLLM::Error, error_msg
           end
         end
 
@@ -171,18 +180,15 @@ def parse_binary_response(stdout)
           end
 
           output_text = body['output'] || ''
-          estimated_tokens = estimate_tokens(output_text)
-          model_id = body['model'] || 'apple-intelligence'
-
           tool_calls = extract_tool_calls(output_text)
 
           Message.new(
             role: :assistant,
             content: tool_calls ? '' : output_text,
             tool_calls: tool_calls,
-            model_id: model_id,
+            model_id: body['model'] || 'apple-intelligence',
             input_tokens: 0,
-            output_tokens: estimated_tokens,
+            output_tokens: estimate_tokens(output_text),
             raw: body
           )
         rescue JSON::ParserError => e