Merge pull request microsoft#8 from robotdad/feat/provider-improvements

mowree · web-flow · commit 3ed36cf8376d · 2026-02-24T18:34:05.000-08:00
Resolved conflicts with PR microsoft#9 (well-known provider): - tests/conftest.py: kept both reset_singleton_state fixture - tests/test_mount.py: kept both SDK binary tests and TestSingleton class
diff --git a/amplifier_module_provider_github_copilot/__init__.py b/amplifier_module_provider_github_copilot/__init__.py
@@ -40,14 +40,16 @@
 
 from __future__ import annotations
 
+import asyncio
 import logging
 import shutil
 from collections.abc import Awaitable, Callable
 from typing import Any
 
 from amplifier_core import ChatResponse, ToolCall
 
-from .client import AuthStatus, SessionInfo, SessionListResult
+from ._constants import DEFAULT_TIMEOUT
+from .client import AuthStatus, CopilotClientWrapper, SessionInfo, SessionListResult
 from .exceptions import (
     CopilotAbortError,
     CopilotAuthenticationError,
@@ -121,6 +123,92 @@
 
 logger = logging.getLogger(__name__)
 
+# ═══════════════════════════════════════════════════════════════════════════════
+# Process-Level Singleton State
+# ═══════════════════════════════════════════════════════════════════════════════
+#
+# Sub-agents spawned by the task tool run as async coroutines in the SAME
+# Python process and asyncio event loop as the parent session (kernel-guaranteed).
+# Each sub-agent gets its own fresh ModuleCoordinator — coordinators are not shared.
+#
+# This singleton ensures all mounts in a process share ONE CopilotClientWrapper
+# (one copilot CLI subprocess) regardless of how many sub-agents are spawned.
+# Without this, N sub-agents spawn N processes × ~500 MB each.
+#
+# Reference: docs/plans/2026-02-23-process-singleton-design.md
+
+_shared_client: CopilotClientWrapper | None = None
+_shared_client_refcount: int = 0
+_shared_client_lock: asyncio.Lock | None = None
+
+
+def _get_lock() -> asyncio.Lock:
+    """Return the singleton lock, creating it lazily on first call.
+
+    Lazy initialization avoids creating asyncio.Lock at import time,
+    which can fail if no event loop exists yet (common in test environments).
+    """
+    global _shared_client_lock
+    if _shared_client_lock is None:
+        _shared_client_lock = asyncio.Lock()
+    return _shared_client_lock
+
+
+async def _acquire_shared_client(
+    config: dict[str, Any],
+    timeout: float,
+) -> CopilotClientWrapper:
+    """Acquire the shared CopilotClientWrapper, creating it if this is the first mount.
+
+    Increments the reference count. Call _release_shared_client() in cleanup
+    to decrement. The subprocess is shut down when the count reaches zero.
+
+    If a second caller passes a different timeout than the first, a DEBUG warning
+    is logged and the existing client is returned unchanged — the second caller's
+    timeout is silently ignored. Sub-agents inherit bundle config from the parent,
+    so all callers typically pass the same values.
+    """
+    global _shared_client, _shared_client_refcount
+    async with _get_lock():
+        if _shared_client is None:
+            logger.info("[MOUNT] Creating shared Copilot subprocess (first mount in process)")
+            _shared_client = CopilotClientWrapper(config=config, timeout=timeout)
+        else:
+            existing_timeout = getattr(_shared_client, "_timeout", timeout)
+            if existing_timeout != timeout:
+                logger.debug(
+                    f"[MOUNT] Ignoring timeout={timeout} for shared client "
+                    f"(already created with timeout={existing_timeout})"
+                )
+        _shared_client_refcount += 1
+        logger.debug(f"[MOUNT] Shared client refcount: {_shared_client_refcount}")
+        return _shared_client
+
+
+async def _release_shared_client() -> None:
+    """Release one reference to the shared client.
+
+    When the count reaches zero, closes and destroys the shared subprocess.
+    Safe to call if already at zero (safety floor prevents negative counts).
+
+    NOTE: If the Python process is killed (SIGKILL/crash), the refcount
+    never reaches zero and close() is never called. This is acceptable —
+    the OS reclaims the copilot subprocess when the parent process exits.
+    There is no mitigation needed for this case.
+    """
+    global _shared_client, _shared_client_refcount
+    async with _get_lock():
+        _shared_client_refcount -= 1
+        logger.debug(f"[MOUNT] Shared client refcount after release: {_shared_client_refcount}")
+        if _shared_client_refcount <= 0:
+            if _shared_client is not None:
+                logger.info(
+                    "[MOUNT] Last mount cleaned up — shutting down shared Copilot subprocess"
+                )
+                await _shared_client.close()
+                _shared_client = None
+            _shared_client_refcount = 0  # safety floor: prevent negative counts
+
 
 async def mount(
     coordinator: Any,  # ModuleCoordinator
@@ -178,26 +266,39 @@ async def mount(
     # Set CLI path in config for provider to use
     config["cli_path"] = cli_path
 
+    # Track whether this call acquired a shared client reference,
+    # so the error path only releases what this call actually acquired.
+    acquired_client: CopilotClientWrapper | None = None
+
     try:
-        # Create provider (api_key is None for Copilot - uses GitHub auth)
-        provider = CopilotSdkProvider(None, config, coordinator)
+        timeout = float(config.get("timeout", DEFAULT_TIMEOUT))
+
+        # Acquire (or reuse) the process-level shared client.
+        # All mounts in this Python process share one CopilotClientWrapper instance.
+        acquired_client = await _acquire_shared_client(config, timeout)
+
+        # Create provider, injecting the shared client
+        provider = CopilotSdkProvider(None, config, coordinator, client=acquired_client)
 
         # Register with coordinator
         await coordinator.mount("providers", provider, name="github-copilot")
 
         logger.info("[MOUNT] CopilotSdkProvider mounted successfully")
 
-        # Return cleanup function
+        # Return cleanup function — releases the shared reference, not the provider
         async def cleanup() -> None:
             """Cleanup function called when unmounting."""
             logger.info("[MOUNT] Unmounting CopilotSdkProvider...")
-            await provider.close()
+            await _release_shared_client()
             logger.info("[MOUNT] CopilotSdkProvider unmounted")
 
         return cleanup
 
     except Exception as e:
         logger.error(f"[MOUNT] Failed to mount CopilotSdkProvider: {e}")
+        # Only release if this call successfully acquired a reference before the failure
+        if acquired_client is not None:
+            await _release_shared_client()
         return None
 
 
diff --git a/amplifier_module_provider_github_copilot/provider.py b/amplifier_module_provider_github_copilot/provider.py
@@ -154,6 +154,7 @@ def __init__(
         api_key: str | None = None,  # Unused, kept for signature compatibility
         config: dict[str, Any] | None = None,
         coordinator: Any | None = None,  # ModuleCoordinator
+        client: CopilotClientWrapper | None = None,  # Shared singleton if provided
     ):
         """
         Initialize the Copilot SDK provider.
@@ -169,6 +170,10 @@ def __init__(
                 - debug_truncate_length: Max length for debug output (default: 180)
                 - cli_path: Path to Copilot CLI executable
             coordinator: Amplifier's ModuleCoordinator for hooks and events
+            client: Optional pre-created CopilotClientWrapper to reuse. If None,
+                a new wrapper is created (backward-compatible default). Pass the
+                shared singleton from _acquire_shared_client() to avoid spawning
+                multiple copilot subprocesses.
 
         Note:
             Timeout is automatically selected based on whether extended_thinking
@@ -204,10 +209,15 @@ def __init__(
         # Streaming configuration (default: enabled like Anthropic provider)
         self._use_streaming = bool(config.get("use_streaming", True))
 
-        # Initialize client wrapper
-        self._client = CopilotClientWrapper(
-            config=config,
-            timeout=self._timeout,
+        # Initialize client wrapper — use injected singleton if provided,
+        # otherwise create a new one (backward-compatible default).
+        self._client = (
+            client
+            if client is not None
+            else CopilotClientWrapper(
+                config=config,
+                timeout=self._timeout,
+            )
         )
 
         # Retry configuration (lighter defaults than HTTP-only providers
diff --git a/docs/plans/2026-02-23-process-singleton-design.md b/docs/plans/2026-02-23-process-singleton-design.md
@@ -0,0 +1,161 @@
+# Process-Level Singleton Fix for GitHub Copilot Provider
+
+## Goal
+
+Fix the GitHub Copilot provider so that all sub-agents spawned by a recipe within a single Amplifier session share one `CopilotClientWrapper` instance (and therefore one copilot CLI subprocess), rather than each spawning their own.
+
+Tracked as: [Issue #7](https://github.com/microsoft/amplifier-module-provider-github-copilot/issues/7)
+
+## Background
+
+The copilot CLI binary is Electron-bundled at ~500 MB per process. With the current implementation, N sub-agents spawned by a recipe creates N subprocesses consuming N × ~500 MB RAM — the root cause of the OOM being investigated.
+
+Two architectural facts constrain the solution:
+
+- Sub-agents spawned by the `task` tool run as async coroutines in the **same Python process** and same asyncio event loop as the parent session (kernel-guaranteed)
+- Each sub-agent gets its own fresh `ModuleCoordinator` — coordinators are not shared across sessions
+
+There is no kernel facility for cross-session resource sharing; resource pooling is module-level policy, consistent with Amplifier's "mechanism not policy" principle.
+
+## Approach
+
+A process-level singleton held as module-level state in `__init__.py`, with reference counting to ensure the subprocess lives exactly as long as at least one session is mounted. The change is entirely contained to `__init__.py` — no other files change.
+
+Two other approaches were considered and rejected:
+
+- **Process-level cap (semaphore)**: Limits blast radius but doesn't eliminate waste; more complex without being more correct
+- **Status quo + polish only**: Valid if the OOM were purely a runaway delegation bug, but the singleton is the right architecture independently — even legitimate heavy recipes waste N × 500 MB today
+
+## Architecture
+
+The change is surgical. `CopilotClientWrapper` itself doesn't change. The only change is in when and how many times it gets created.
+
+**Before:**
+```
+mount() called → new CopilotClientWrapper() → new copilot subprocess
+mount() called → new CopilotClientWrapper() → new copilot subprocess
+mount() called → new CopilotClientWrapper() → new copilot subprocess
+```
+
+**After:**
+```
+mount() called → _acquire_shared_client() → new copilot subprocess   (ref=1)
+mount() called → _acquire_shared_client() → reuse existing            (ref=2)
+mount() called → _acquire_shared_client() → reuse existing            (ref=3)
+cleanup called → _release_shared_client() → ref=2
+cleanup called → _release_shared_client() → ref=1
+cleanup called → _release_shared_client() → ref=0 → subprocess shuts down
+```
+
+**Blast radius:** `__init__.py` only. `CopilotClientWrapper`, `provider.py`, `tool_capture.py`, `_constants.py`, and `converters.py` are untouched.
+
+## Components
+
+### Module-Level Singleton State
+
+Three module-level variables at the top of `__init__.py`:
+
+```python
+_shared_client: CopilotClientWrapper | None = None
+_shared_client_refcount: int = 0
+_shared_client_lock: asyncio.Lock | None = None
+```
+
+The lock is initialized lazily rather than at module load time. `asyncio.Lock()` must be created on the running event loop — creating it at import time can cause issues if the module is imported before an event loop exists (common in test environments). A `_get_lock()` helper handles this:
+
+```python
+def _get_lock() -> asyncio.Lock:
+    global _shared_client_lock
+    if _shared_client_lock is None:
+        _shared_client_lock = asyncio.Lock()
+    return _shared_client_lock
+```
+
+### Acquire and Release Functions
+
+```python
+async def _acquire_shared_client(config, timeout) -> CopilotClientWrapper:
+    global _shared_client, _shared_client_refcount
+    async with _get_lock():
+        if _shared_client is None:
+            _shared_client = CopilotClientWrapper(config, timeout)
+        _shared_client_refcount += 1
+        return _shared_client
+
+async def _release_shared_client() -> None:
+    global _shared_client, _shared_client_refcount
+    async with _get_lock():
+        _shared_client_refcount -= 1
+        if _shared_client_refcount <= 0:
+            if _shared_client is not None:
+                await _shared_client.close()
+                _shared_client = None
+            _shared_client_refcount = 0  # safety floor against accidental negatives
+```
+
+The safety floor at zero matters: if cleanup is ever called more times than mount (e.g., in test teardown), a negative count must not prevent the next acquisition from creating a fresh client.
+
+### Updated `mount()`
+
+`mount()` changes in one place only — acquiring the shared client instead of constructing a new one directly:
+
+```python
+async def mount(coordinator, config):
+    timeout = config.get("timeout", DEFAULT_TIMEOUT)
+
+    client = await _acquire_shared_client(config, timeout)
+
+    provider = CopilotSdkProvider(client=client, config=config)
+    coordinator.providers.register("github-copilot", provider)
+
+    async def cleanup():
+        await _release_shared_client()
+
+    return cleanup
+```
+
+Everything else — provider construction, coordinator registration, returning a cleanup callable — is unchanged.
+
+## Data Flow
+
+`_acquire_shared_client()` only uses `config` and `timeout` on the **first** call when it constructs the wrapper. Subsequent calls reuse the existing client regardless of the values passed. Sub-agents inherit bundle config from the parent, so in practice they all pass identical values.
+
+If a sub-agent passes a different `timeout` than the first mount, `_acquire_shared_client()` emits a `DEBUG` log warning and returns the existing client unchanged. No exception is raised — the caller receives a working client and the discrepancy is visible in logs.
+
+## Error Handling
+
+**Config mismatch**: `DEBUG` warning logged, existing client returned, no exception raised.
+
+**Concurrent mounts at startup**: Two sub-agents mounting simultaneously before the client exists are serialized by the `asyncio.Lock`. The second waiter finds `_shared_client` already populated, increments the refcount, and returns. Double-creation is not possible.
+
+**Unclean shutdown** (SIGKILL / process crash): The refcount never reaches zero, so `close()` never fires and the copilot subprocess becomes an orphan. This is acceptable — the OS reclaims the subprocess when the parent Python process dies. A code comment will document this reasoning explicitly so future maintainers don't treat it as an oversight.
+
+## Observability
+
+Three events are emitted via `coordinator.hooks.emit()`, consistent with existing module patterns (e.g., `provider:tool_sequence_repaired`):
+
+| Event | When | Payload |
+|---|---|---|
+| `github-copilot:subprocess_created` | First acquisition | `session_id` |
+| `github-copilot:subprocess_reused` | Subsequent acquisitions | `session_id`, `refcount` |
+| `github-copilot:subprocess_shutdown` | Refcount reaches zero | `session_id` |
+
+These are visible in session logs without any extra configuration.
+
+## Testing Strategy
+
+New tests live in `tests/test_mount.py`. `CopilotClientWrapper.__init__` is mocked to count instantiations. A pytest fixture resets `_shared_client`, `_shared_client_refcount`, and `_shared_client_lock` to `None`/`0`/`None` before each test — required for isolation with module-level state.
+
+| Test | What it verifies |
+|---|---|
+| **Singleton creation** | `mount()` once → one `CopilotClientWrapper` created, refcount=1 |
+| **Reuse** | `mount()` three times → one `CopilotClientWrapper` created, refcount=3 |
+| **Cleanup lifecycle** | Three mounts, three cleanups → `close()` called only on the third (refcount=0) |
+| **Concurrent mounts** | `asyncio.gather()` fires multiple `mount()` calls simultaneously → still one client (exercises the lock) |
+| **Config mismatch warning** | Mount with `timeout=300`, mount again with `timeout=600` → `DEBUG` warning emitted, no exception |
+
+No integration tests are needed — the copilot subprocess is already mocked in the existing suite. These tests only exercise the reference counting logic, which is pure Python with no external dependencies. Existing tests for `CopilotClientWrapper` and `CopilotSdkProvider` are unaffected since neither class changes.
+
+## Open Questions
+
+None — design is fully validated and ready for implementation.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -39,6 +39,36 @@ def mock_cache_home(tmp_path, monkeypatch):
     return tmp_path
 
 
+@pytest.fixture(autouse=True)
+def reset_singleton_state():
+    """Reset module-level singleton state before each test.
+
+    Required for test isolation: module-level variables persist across
+    tests in the same pytest session. Without this reset, singleton tests
+    bleed state into each other.
+
+    Uses hasattr guards because the singleton attributes don't exist yet
+    until Task 4 adds them to __init__.py. Guards make this fixture safe
+    to land before the implementation.
+    """
+    import amplifier_module_provider_github_copilot as mod
+
+    # Guard: attributes may not exist until implementation is added (Task 4)
+    if hasattr(mod, "_shared_client"):
+        mod._shared_client = None  # type: ignore[attr-defined]
+    if hasattr(mod, "_shared_client_refcount"):
+        mod._shared_client_refcount = 0  # type: ignore[attr-defined]
+    if hasattr(mod, "_shared_client_lock"):
+        mod._shared_client_lock = None  # type: ignore[attr-defined]
+    yield
+    if hasattr(mod, "_shared_client"):
+        mod._shared_client = None  # type: ignore[attr-defined]
+    if hasattr(mod, "_shared_client_refcount"):
+        mod._shared_client_refcount = 0  # type: ignore[attr-defined]
+    if hasattr(mod, "_shared_client_lock"):
+        mod._shared_client_lock = None  # type: ignore[attr-defined]
+
+
 # Fix for Windows asyncio cleanup issues causing KeyboardInterrupt
 # See: https://github.com/pytest-dev/pytest-asyncio/issues/671
 if sys.platform == "win32":
diff --git a/tests/test_mount.py b/tests/test_mount.py