Add caching for list_models to prevent rate limiting

Copilot · friggeri · Copilot · commit bc3aa204e0da · 2026-01-30T17:21:23.000Z
Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;
diff --git a/python/copilot/client.py b/python/copilot/client.py
@@ -157,6 +157,8 @@ def __init__(self, options: Optional[CopilotClientOptions] = None):
         self._state: ConnectionState = "disconnected"
         self._sessions: dict[str, CopilotSession] = {}
         self._sessions_lock = threading.Lock()
+        self._models_cache: Optional[list["ModelInfo"]] = None
+        self._models_cache_lock = threading.Lock()
 
     def _parse_cli_url(self, url: str) -> tuple[str, int]:
         """
@@ -281,6 +283,10 @@ async def stop(self) -> list["StopError"]:
             await self._client.stop()
             self._client = None
 
+        # Clear models cache
+        with self._models_cache_lock:
+            self._models_cache = None
+
         # Kill CLI process
         # Kill CLI process (only if we spawned it)
         if self._process and not self._is_external_server:
@@ -325,6 +331,10 @@ async def force_stop(self) -> None:
                 pass  # Ignore errors during force stop
             self._client = None
 
+        # Clear models cache
+        with self._models_cache_lock:
+            self._models_cache = None
+
         # Kill CLI process immediately
         if self._process and not self._is_external_server:
             self._process.kill()
@@ -705,6 +715,9 @@ async def list_models(self) -> list["ModelInfo"]:
         """
         List available models with their metadata.
 
+        Results are cached after the first successful call to avoid rate limiting.
+        The cache is cleared when the client disconnects.
+
         Returns:
             A list of ModelInfo objects with model details.
 
@@ -720,9 +733,21 @@ async def list_models(self) -> list["ModelInfo"]:
         if not self._client:
             raise RuntimeError("Client not connected")
 
+        # Check cache first (thread-safe)
+        with self._models_cache_lock:
+            if self._models_cache is not None:
+                return self._models_cache
+
+        # Cache miss - fetch from backend
         response = await self._client.request("models.list", {})
         models_data = response.get("models", [])
-        return [ModelInfo.from_dict(model) for model in models_data]
+        models = [ModelInfo.from_dict(model) for model in models_data]
+
+        # Update cache (thread-safe)
+        with self._models_cache_lock:
+            self._models_cache = models
+
+        return models
 
     async def list_sessions(self) -> list["SessionMetadata"]:
         """
diff --git a/python/e2e/test_client.py b/python/e2e/test_client.py
@@ -135,3 +135,36 @@ async def test_should_list_models_when_authenticated(self):
             await client.stop()
         finally:
             await client.force_stop()
+
+    async def test_should_cache_models_list(self):
+        """Test that list_models caches results to avoid rate limiting"""
+        client = CopilotClient({"cli_path": CLI_PATH, "use_stdio": True})
+
+        try:
+            await client.start()
+
+            auth_status = await client.get_auth_status()
+            if not auth_status.isAuthenticated:
+                # Skip if not authenticated - models.list requires auth
+                await client.stop()
+                return
+
+            # First call should fetch from backend
+            models1 = await client.list_models()
+            assert isinstance(models1, list)
+
+            # Second call should return cached results (same object reference)
+            models2 = await client.list_models()
+            assert models2 is models1, "Second call should return cached results"
+
+            # After stopping, cache should be cleared
+            await client.stop()
+
+            # Restart and verify cache is empty
+            await client.start()
+            models3 = await client.list_models()
+            assert models3 is not models1, "Cache should be cleared after disconnect"
+
+            await client.stop()
+        finally:
+            await client.force_stop()