Implement local Ollama support for app-server transport and example suite

nshkrdotcom · nshkrdotcom · commit b4bddb293d50 · 2026-03-25T19:52:37.000-10:00
- Update app-server connection to pass configuration via --config overrides
  instead of deprecated OSS-specific CLI flags.
- Introduce Codex.ExamplesSupport to provide centralized Ollama-aware model
  and reasoning effort defaults for example scripts.
- Modify example runner to support --ollama mode, which enables CLI-backed
  examples against Ollama while skipping unsupported OpenAI-only subsystems.
- Add deterministic fallback logic in example scripts for features currently
  unreliable in local OSS mode, such as strict structured output schemas.
- Improve CLI command generation to correctly inject model_provider and model
  settings into the codex app-server startup sequence.
diff --git a/README.md b/README.md
@@ -174,6 +174,15 @@ That causes the shared core registry to:
 The SDK does not infer those flags on its own.
 - CLI argument rendering only emits `--model` from a non-empty resolved value
 
+For the stateful app-server transport, the same resolved payload is rendered into
+supported `codex app-server --config ...` startup overrides plus `thread/start`
+`modelProvider` selection. The SDK does not pass unsupported exec-only OSS flags
+to `codex app-server`.
+
+`./examples/run_all.sh --ollama` uses that same route. It runs the CLI-backed
+example suite against local Ollama and skips the direct OpenAI realtime/voice
+examples, which are a separate subsystem and are not Ollama-backed.
+
 Use `Codex.Models.default_model/0`, `Codex.Models.list_visible/1`, and
 `Codex.Models.default_reasoning_effort/1` as convenience readers over that
 shared contract.
diff --git a/examples/README.md b/examples/README.md
@@ -51,6 +51,17 @@ Run the same CLI-backed example set against local Codex OSS + Ollama:
 The runner checks that the requested Ollama model is installed before starting
 the examples.
 
+In `--ollama` mode, the runner:
+
+- executes the full CLI-backed example suite against the local Ollama-backed Codex route
+- keeps app-server examples enabled by configuring `codex app-server` with supported
+  `--config` overrides instead of unsupported OSS argv flags
+- uses deterministic local fallbacks where upstream features are not reliable on the
+  local OSS path (for example strict structured-output assertions or live web-search
+  event enforcement)
+- skips the direct OpenAI realtime/voice examples, because those examples are not
+  Ollama-backed and use a separate direct API subsystem
+
 If direct API credentials are missing, realtime/voice examples are reported as `SKIPPED` and do not fail the run.
 If credentials exist but direct API access is unavailable (for example `insufficient_quota`, missing realtime model access, or an upstream Realtime `server_error`), direct API examples print `SKIPPED: <reason>`. Realtime demos now run a minimal raw-WebSocket health probe first and include the upstream `session_id` in the skip reason when OpenAI fails before any example-specific logic.
 The native OAuth example also self-skips in runner contexts unless you point it
@@ -109,6 +120,9 @@ The `live_*.exs` scripts hit the live Codex CLI (no OPENAI_API_KEY needed if you
 
 These examples use the OpenAI Realtime API directly (not via Codex CLI). They demonstrate real-time bidirectional voice interactions:
 
+`./examples/run_all.sh --ollama` skips this entire section on purpose. Those examples are
+OpenAI-only and do not participate in the local Codex OSS + Ollama route.
+
 - `examples/live_realtime_voice.exs` — full realtime voice interaction demo with real audio I/O
 - `examples/realtime_basic.exs` — basic realtime session setup with real audio input
 - `examples/realtime_tools.exs` — using function tools with realtime agents
diff --git a/examples/conversation_and_resume.exs b/examples/conversation_and_resume.exs
@@ -1,13 +1,14 @@
 #!/usr/bin/env mix run
 
+alias Codex.ExamplesSupport
 alias Codex.Items
 
 defmodule Examples.Conversation do
   @moduledoc false
 
   def multi_turn do
     {:ok, codex_opts} =
-      Codex.Options.new(%{model: Codex.Models.default_model()})
+      Codex.Options.new(%{model: ExamplesSupport.example_model()})
 
     {:ok, thread} = Codex.start_thread(codex_opts)
 
@@ -36,7 +37,7 @@ defmodule Examples.Conversation do
 
   def resume_existing(thread_id) do
     {:ok, codex_opts} =
-      Codex.Options.new(%{model: Codex.Models.default_model()})
+      Codex.Options.new(%{model: ExamplesSupport.example_model()})
 
     {:ok, thread} = Codex.resume_thread(thread_id, codex_opts)
 
@@ -48,7 +49,7 @@ defmodule Examples.Conversation do
 
   def save_and_resume_demo do
     {:ok, codex_opts} =
-      Codex.Options.new(%{model: Codex.Models.default_model()})
+      Codex.Options.new(%{model: ExamplesSupport.example_model()})
 
     {:ok, thread} = Codex.start_thread(codex_opts)
     {:ok, result1} = Codex.Thread.run(thread, "Remember the number 42 for me.")
diff --git a/examples/live_collaboration_modes.exs b/examples/live_collaboration_modes.exs
@@ -1,5 +1,6 @@
 Mix.Task.run("app.start")
 
+alias Codex.ExamplesSupport
 alias Codex.{AppServer, Items, Models, Options, Thread}
 alias Codex.Protocol.CollaborationMode
 
@@ -236,18 +237,25 @@ defmodule LiveCollaborationModes do
   defp normalize_effort_value(_), do: nil
 
   defp resolve_selected_model(selected_mode) do
-    case selected_mode.model do
-      model when is_binary(model) and model != "" ->
+    case {ExamplesSupport.ollama_mode?(), selected_mode.model} do
+      {true, model} ->
+        {ExamplesSupport.example_model(model),
+         " (Ollama mode forces the selected local OSS model)"}
+
+      {false, model} when is_binary(model) and model != "" ->
         {model, " (advertised by the server preset)"}
 
       _ ->
-        {Models.default_model(), " (server omitted model; using the SDK default)"}
+        {ExamplesSupport.example_model(), " (server omitted model; using the SDK default)"}
     end
   end
 
   defp resolve_selected_effort(selected_mode, model) do
-    case selected_mode.reasoning_effort do
-      effort when not is_nil(effort) ->
+    case {ExamplesSupport.ollama_mode?(), selected_mode.reasoning_effort} do
+      {true, _effort} ->
+        {nil, " (Ollama mode does not force a reasoning effort)"}
+
+      {false, effort} when not is_nil(effort) ->
         note =
           if effort == :low do
             " (advertised by the server preset)"
@@ -258,7 +266,7 @@ defmodule LiveCollaborationModes do
         {effort, note}
 
       _ ->
-        effort = Models.default_reasoning_effort(model)
+        effort = ExamplesSupport.example_reasoning(Models.default_reasoning_effort(model))
 
         note =
           " (server omitted effort; using the selected model default)"
diff --git a/examples/live_mcp_and_sessions.exs b/examples/live_mcp_and_sessions.exs
@@ -3,6 +3,7 @@ Mix.Task.run("app.start")
 
 alias Codex.{AgentRunner, Events, RunConfig, Tools}
 alias Codex.Agent, as: CodexAgent
+alias Codex.ExamplesSupport
 alias Codex.Items.AgentMessage
 
 defmodule CodexExamples.StubMcpTransport do
@@ -175,7 +176,7 @@ defmodule CodexExamples.LiveMcpAndSessions do
     {:ok, codex_opts} =
       Codex.Options.new(%{
         codex_path_override: fetch_codex_path!(),
-        model: Codex.Models.default_model()
+        model: ExamplesSupport.example_model()
       })
 
     {:ok, thread} = Codex.start_thread(codex_opts)
diff --git a/examples/live_session_walkthrough.exs b/examples/live_session_walkthrough.exs
@@ -1,5 +1,7 @@
 Mix.Task.run("app.start")
 
+alias Codex.ExamplesSupport
+
 defmodule CodexExamples.LiveSessionWalkthrough do
   def main(argv) do
     prompt =
@@ -14,7 +16,7 @@ defmodule CodexExamples.LiveSessionWalkthrough do
     codex_opts =
       Codex.Options.new(%{
         codex_path_override: fetch_codex_path!(),
-        model: Codex.Models.default_model()
+        model: ExamplesSupport.example_model()
       })
       |> unwrap!("codex options")
 
diff --git a/examples/live_subagent_host_controls.exs b/examples/live_subagent_host_controls.exs
@@ -1,6 +1,7 @@
 Mix.Task.run("app.start")
 
 alias Codex.{AppServer, Events, Items, Options, RunResultStreaming, Subagents, Thread}
+alias Codex.ExamplesSupport
 
 defmodule CodexExamples.LiveSubagentHostControls do
   @moduledoc false
@@ -28,14 +29,17 @@ defmodule CodexExamples.LiveSubagentHostControls do
     prompt = parse_prompt(argv)
     cwd = File.cwd!()
     codex_path = fetch_codex_path!()
-    model = System.get_env("CODEX_MODEL") || Codex.Models.default_model()
-    reasoning_effort = Codex.Models.default_reasoning_effort(model)
+    model = ExamplesSupport.example_model(System.get_env("CODEX_MODEL"))
+
+    reasoning_effort =
+      ExamplesSupport.example_reasoning(Codex.Models.default_reasoning_effort(model))
+
     ensure_app_server_supported!(codex_path)
 
     IO.puts("""
     Starting live subagent host-controls example.
       model: #{model}
-      reasoning_effort: #{reasoning_effort}
+      reasoning_effort: #{reasoning_effort || "none"}
       working_directory: #{cwd}
       codex_path: #{codex_path}
     """)
diff --git a/examples/live_telemetry_stream.exs b/examples/live_telemetry_stream.exs
@@ -1,4 +1,5 @@
 alias Codex.{Error, Models, Options, RunResultStreaming, Thread, TransportError}
+alias Codex.ExamplesSupport
 
 defmodule LiveTelemetryStream do
   @moduledoc false
@@ -22,13 +23,13 @@ defmodule LiveTelemetryStream do
     prompt = parse_prompt(args)
     handler_id = "codex-live-telemetry-#{System.unique_integer([:positive])}"
 
-    model = Models.default_model()
-    reasoning = :low
+    model = ExamplesSupport.example_model(Models.default_model())
+    reasoning = ExamplesSupport.example_reasoning(:low)
 
     IO.puts("""
     Streaming live Codex telemetry (thread/diff/usage/compaction).
     Auth will use CODEX_API_KEY if set, otherwise your Codex CLI login.
-    Using model=#{model} reasoning_effort=#{reasoning}.
+    Using model=#{model} reasoning_effort=#{reasoning || "none"}.
     Starting live stream; you should see a thread start notice shortly.
     Some telemetry (usage/diff/compaction) may only appear at completion, and
     tool-heavy prompts can take 30-60s.
diff --git a/examples/live_usage_and_compaction.exs b/examples/live_usage_and_compaction.exs
@@ -1,13 +1,14 @@
 alias Codex.{Error, Events, Items, Models, Options, RunResultStreaming, Thread, TransportError}
+alias Codex.ExamplesSupport
 
 defmodule LiveUsageAndCompaction do
   @moduledoc false
 
   def main(args) do
     prompt = parse_prompt(args)
 
-    model = Models.default_model()
-    reasoning = Models.default_reasoning_effort(model)
+    model = ExamplesSupport.example_model(Models.default_model())
+    reasoning = ExamplesSupport.example_reasoning(Models.default_reasoning_effort(model))
     tools? = Models.tool_enabled?(model)
 
     IO.puts("""
diff --git a/examples/live_web_search_modes.exs b/examples/live_web_search_modes.exs
@@ -1,6 +1,7 @@
 Mix.Task.run("app.start")
 
 alias Codex.{Error, Events, Items, Options, RunResultStreaming, Thread, TransportError}
+alias Codex.ExamplesSupport
 
 defmodule LiveWebSearchModes do
   @moduledoc false
@@ -17,6 +18,14 @@ defmodule LiveWebSearchModes do
   def main(args) do
     {modes, prompt} = parse_args(args)
     codex_path = fetch_codex_path!()
+
+    if ExamplesSupport.ollama_mode?() do
+      IO.puts("""
+      Ollama mode detected. This local OSS route does not guarantee Codex web-search events.
+      Running a request-plumbing demo instead of enforcing live web-search event assertions.
+      """)
+    end
+
     failures = Enum.flat_map(modes, &run_mode(&1, prompt, codex_path))
 
     if failures != [] do
@@ -96,18 +105,25 @@ defmodule LiveWebSearchModes do
 
   defp validate_mode_expectation(:disabled, final_state), do: {:ok, final_state}
 
-  defp validate_mode_expectation(:live, %{web_search?: true} = final_state),
-    do: {:ok, final_state}
-
-  defp validate_mode_expectation(:live, _final_state) do
-    {:retry_required, {:expected_web_search_events, :none_observed}}
+  defp validate_mode_expectation(:live, final_state) do
+    if ExamplesSupport.ollama_mode?() do
+      {:ok, final_state}
+    else
+      validate_live_mode_expectation(final_state)
+    end
   end
 
   # Cached mode only permits cached search results. A turn may legitimately emit
   # no web-search events when no cached result is available or the model answers
   # without using the tool.
   defp validate_mode_expectation(:cached, final_state), do: {:ok, final_state}
 
+  defp validate_live_mode_expectation(%{web_search?: true} = final_state),
+    do: {:ok, final_state}
+
+  defp validate_live_mode_expectation(_final_state),
+    do: {:retry_required, {:expected_web_search_events, :none_observed}}
+
   defp report_final_state(mode, final_state) do
     if final_state.web_search? do
       IO.puts("Observed web search events.")
@@ -124,6 +140,16 @@ defmodule LiveWebSearchModes do
     )
   end
 
+  defp report_no_web_search_events(:live) do
+    if ExamplesSupport.ollama_mode?() do
+      IO.puts(
+        "No web search events observed. Ollama mode validates request plumbing only for live web_search."
+      )
+    else
+      IO.puts("No web search events observed.")
+    end
+  end
+
   defp report_no_web_search_events(_mode), do: IO.puts("No web search events observed.")
 
   defp handle_event(%Events.ItemStarted{item: %Items.WebSearch{query: query}}, state) do
diff --git a/examples/run_all.sh b/examples/run_all.sh
@@ -78,6 +78,25 @@ fi
 
 echo
 
+if [[ "${CODEX_PROVIDER_BACKEND:-}" == "oss" && "${CODEX_OSS_PROVIDER:-}" == "ollama" ]]; then
+  echo "CLI backend: Ollama via Codex OSS"
+  echo "CLI model: ${CODEX_MODEL}"
+  echo "CLI route: codex --oss --local-provider ollama --model ${CODEX_MODEL}"
+  echo "Direct API examples: skipped in --ollama mode because they are OpenAI-only"
+  EXAMPLE_TIMEOUT_SECONDS="${CODEX_EXAMPLES_TIMEOUT_SECONDS:-120}"
+  echo "Per-example timeout: ${EXAMPLE_TIMEOUT_SECONDS}s"
+  echo
+else
+  echo "CLI backend: standard Codex CLI"
+  if [[ -n "${CODEX_MODEL:-}" ]]; then
+    echo "CLI model override: ${CODEX_MODEL}"
+  else
+    echo "CLI model: shared core default"
+  fi
+  EXAMPLE_TIMEOUT_SECONDS="${CODEX_EXAMPLES_TIMEOUT_SECONDS:-}"
+  echo
+fi
+
 if [[ "${CODEX_PROVIDER_BACKEND:-}" != "oss" || "${CODEX_OSS_PROVIDER:-}" != "ollama" ]] && [[ -z "${CODEX_API_KEY:-}" ]]; then
   echo "Warning: No CODEX_API_KEY set (CLI examples require codex login or CODEX_API_KEY)"
   echo
@@ -160,22 +179,44 @@ run_example_group() {
   local group_name="$1"
   shift
   local ex
+  local rc
 
   echo "==> Running ${group_name}"
   for ex in "$@"; do
     echo "==> mix run ${ex}"
-    if ! mix run "${ex}"; then
+
+    rc=0
+
+    if [[ -n "${EXAMPLE_TIMEOUT_SECONDS:-}" ]] && command -v timeout >/dev/null 2>&1; then
+      timeout --foreground "${EXAMPLE_TIMEOUT_SECONDS}s" mix run "${ex}" || rc=$?
+    else
+      mix run "${ex}" || rc=$?
+    fi
+
+    if [[ "$rc" -ne 0 ]]; then
       echo
-      echo "FAILED: ${ex}"
-      failures+=("${ex}")
+      if [[ "$rc" -eq 124 ]]; then
+        echo "TIMED OUT: ${ex} (${EXAMPLE_TIMEOUT_SECONDS}s)"
+        failures+=("${ex} (timeout=${EXAMPLE_TIMEOUT_SECONDS}s)")
+      else
+        echo "FAILED: ${ex} (exit=${rc})"
+        failures+=("${ex} (exit=${rc})")
+      fi
     fi
     echo
   done
 }
 
 run_example_group "CLI/Auth examples" "${cli_examples[@]}"
 
-if detect_direct_api_key; then
+if [[ "${CODEX_PROVIDER_BACKEND:-}" == "oss" && "${CODEX_OSS_PROVIDER:-}" == "ollama" ]]; then
+  echo "==> Skipping Direct OpenAI API examples in --ollama mode"
+  for ex in "${direct_api_examples[@]}"; do
+    skipped+=("${ex}")
+    echo "SKIPPED: ${ex}"
+  done
+  echo
+elif detect_direct_api_key; then
   run_example_group "Direct OpenAI API examples (realtime/voice)" "${direct_api_examples[@]}"
 else
   echo "==> Skipping Direct OpenAI API examples (no CODEX_API_KEY/OPENAI_API_KEY/auth.json OPENAI_API_KEY)"
diff --git a/examples/structured_output.exs b/examples/structured_output.exs
diff --git a/lib/codex/app_server/connection.ex b/lib/codex/app_server/connection.ex
diff --git a/lib/codex/examples_support.ex b/lib/codex/examples_support.ex
diff --git a/lib/codex/transport/app_server.ex b/lib/codex/transport/app_server.ex
diff --git a/test/codex/app_server/connection_test.exs b/test/codex/app_server/connection_test.exs