Merge branch 'main' into scratchpad_service

torrmal · torrmal · commit 9d17117846da · 2026-04-20T20:19:14.000-07:00
diff --git a/anton/__init__.py b/anton/__init__.py
@@ -1 +1 @@
-__version__ = "2.0.1"
+__version__ = "2.0.2"
diff --git a/anton/cli.py b/anton/cli.py
@@ -21,7 +21,7 @@
 from anton import __version__
 
 from anton.utils.prompt import prompt_or_cancel
-from anton.core.llm.openai import build_chat_completion_kwargs
+from anton.core.llm.openai import build_chat_completion_kwargs, _is_azure_endpoint
 
 from anton.chat import ChatSession
 from anton.core.session import ChatSessionConfig
@@ -236,6 +236,10 @@ def _make_console() -> Console:
 
 
 console = _make_console()
+_ensure_dependencies(console)
+
+import openai
+from openai import AzureOpenAI
 
 
 def _get_settings(ctx: typer.Context):
@@ -281,8 +285,6 @@ def main(
     ),
 ) -> None:
     """Anton — a self-evolving autonomous system."""
-    _ensure_dependencies(console)
-
     from anton.config.settings import AntonSettings
 
     settings = AntonSettings()
@@ -294,7 +296,9 @@ def main(
     from anton.updater import check_and_update
 
     if check_and_update(console, settings):
-        # Re-exec with the freshly installed code so no old modules remain in memory.
+        # Mark the env before replacing the process so the next invocation
+        # skips the update check and doesn't loop.
+        os.environ["_ANTON_UPDATED"] = "1"
         _reexec()
 
     ctx.ensure_object(dict)
@@ -865,8 +869,6 @@ def _test():
 
 def _setup_openai(settings, ws) -> None:
     """Set up OpenAI with a single model for both reasoning and coding."""
-    import openai
-
     console.print()
     while True:
         api_key = _setup_prompt("API key", is_password=True)
@@ -919,8 +921,6 @@ def _test():
 
 def _setup_gemini(settings, ws) -> None:
     """Set up Google Gemini via its OpenAI-compatible endpoint."""
-    import openai
-
     _GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
 
     console.print()
@@ -977,10 +977,22 @@ def _test():
     ws.set_secret("ANTON_CODING_MODEL", model)
 
 
-def _setup_custom_openai(settings, ws) -> None:
-    """Set up a custom OpenAI-compatible endpoint (Ollama, vLLM, Together, Groq, LM Studio, etc.)."""
-    import openai
 
+def _strip_to_azure_endpoint(raw_url: str) -> str:
+    """Return only the scheme+host of a URL, stripping any path/query.
+
+    AzureOpenAI constructs the deployment path internally, so the endpoint
+    must not include /openai/deployments/... or ?api-version=...
+    """
+    from urllib.parse import urlparse
+    parsed = urlparse(raw_url if "://" in raw_url else f"https://{raw_url}")
+    scheme = parsed.scheme or "https"
+    host = parsed.netloc or parsed.path
+    return f"{scheme}://{host}"
+
+
+def _setup_custom_openai(settings, ws) -> None:
+    """Set up a custom OpenAI-compatible endpoint (Ollama, vLLM, Together, Groq, LM Studio, Azure, etc.)."""
     console.print()
     console.print(
         "  [anton.muted]Works with Ollama, vLLM, Together, Groq, LM Studio, or any OpenAI-compatible API.[/]"
@@ -994,7 +1006,6 @@ def _setup_custom_openai(settings, ws) -> None:
         console.print("  [anton.warning]Base URL is required.[/]")
     if not base_url.startswith("http://") and not base_url.startswith("https://"):
         base_url = "http://" + base_url
-    base_url = base_url.rstrip("/")
 
     api_key = _setup_prompt(
         "API key (Enter to skip if not needed)", is_password=True
@@ -1008,10 +1019,32 @@ def _setup_custom_openai(settings, ws) -> None:
             break
         console.print("  [anton.warning]Model name is required.[/]")
 
+    api_version = _setup_prompt(
+        "API version (leave blank for standard endpoints, required for Azure)"
+    ).strip() or None
+    if api_version and _is_azure_endpoint(base_url):
+        # Strip path/query — AzureOpenAI builds the deployment URL internally.
+        base_url = _strip_to_azure_endpoint(base_url)
+
+    base_url = base_url.rstrip("/")
+
     try:
 
         def _test():
-            client = openai.OpenAI(api_key=api_key, base_url=base_url)
+            if api_version and _is_azure_endpoint(base_url):
+                client = AzureOpenAI(
+                    azure_endpoint=base_url,
+                    api_key=api_key,
+                    api_version=api_version,
+                )
+            elif api_version:
+                client = openai.OpenAI(
+                    api_key=api_key,
+                    base_url=base_url,
+                    default_query={"api-version": api_version},
+                )
+            else:
+                client = openai.OpenAI(api_key=api_key, base_url=base_url)
             response = client.chat.completions.create(
                 **build_chat_completion_kwargs(
                     model=model,
@@ -1032,12 +1065,14 @@ def _test():
 
     settings.openai_api_key = api_key
     settings.openai_base_url = base_url
+    settings.openai_api_version = api_version
     settings.planning_provider = "openai-compatible"
     settings.coding_provider = "openai-compatible"
     settings.planning_model = model
     settings.coding_model = model
     ws.set_secret("ANTON_OPENAI_API_KEY", api_key)
     ws.set_secret("ANTON_OPENAI_BASE_URL", base_url)
+    ws.set_secret("ANTON_OPENAI_API_VERSION", api_version or "")
     ws.set_secret("ANTON_PLANNING_PROVIDER", "openai-compatible")
     ws.set_secret("ANTON_CODING_PROVIDER", "openai-compatible")
     ws.set_secret("ANTON_PLANNING_MODEL", model)
diff --git a/anton/config/settings.py b/anton/config/settings.py
@@ -36,6 +36,7 @@ class AntonSettings(CoreSettings):
     anthropic_api_key: str | None = None
     openai_api_key: str | None = None
     openai_base_url: str | None = None
+    openai_api_version: str | None = None  # Azure api-version query param
 
     memory_enabled: bool = True
     memory_dir: str = ".anton"
diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
@@ -79,10 +79,12 @@ def _dump_namespace(ns: dict) -> str | None:
             _llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get(
                 "ANTON_OPENAI_API_KEY"
             )
+            _llm_api_version = os.environ.get("ANTON_OPENAI_API_VERSION") or None
             _llm_provider = _ProviderClass(
                 api_key=_llm_api_key or None,
                 base_url=_llm_base_url or None,
                 ssl_verify=_llm_ssl_verify,
+                api_version=_llm_api_version,
             )
         else:
             _llm_provider = _ProviderClass()  # Anthropic doesn't need ssl_verify
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
@@ -218,17 +218,20 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient:
         from .anthropic import AnthropicProvider
         from .openai import OpenAIProvider
 
+        api_version = getattr(settings, "openai_api_version", None)
         providers = {
             "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
             "openai": lambda: OpenAIProvider(
                 api_key=settings.openai_api_key,
                 base_url=settings.openai_base_url,
                 ssl_verify=settings.minds_ssl_verify,
+                api_version=api_version,
             ),
             "openai-compatible": lambda: OpenAIProvider(
                 api_key=settings.openai_api_key,
                 base_url=settings.openai_base_url,
                 ssl_verify=settings.minds_ssl_verify,
+                api_version=api_version,
             ),
         }
 
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
@@ -4,6 +4,7 @@
 from collections.abc import AsyncIterator
 
 import openai
+from openai import AsyncAzureOpenAI
 
 from .provider import (
     ContextOverflowError,
@@ -175,6 +176,16 @@ def _translate_user_blocks(blocks: list[dict]) -> list[dict]:
     return result
 
 
+def _is_azure_endpoint(url: str | None) -> bool:
+    """Return True if the URL looks like an Azure OpenAI endpoint."""
+    if not url:
+        return False
+    from urllib.parse import urlparse
+    parsed = urlparse(url if "://" in url else f"https://{url}")
+    host = (parsed.netloc or parsed.path).lower()
+    return host.endswith(".openai.azure.com") or host.endswith(".cognitiveservices.azure.com")
+
+
 def build_chat_completion_kwargs(
     *,
     model: str,
@@ -202,28 +213,43 @@ def __init__(
         api_key: str | None = None,
         base_url: str | None = None,
         ssl_verify: bool = True,
+        api_version: str | None = None,
     ) -> None:
         self._api_key = api_key
         self._base_url = base_url
         self._ssl_verify = ssl_verify
+        self._api_version = api_version
 
         import httpx
 
-        kwargs = {}
-        if api_key:
-            kwargs["api_key"] = api_key
-        if base_url:
-            kwargs["base_url"] = base_url
-        if not ssl_verify:
-            kwargs["http_client"] = httpx.AsyncClient(verify=False)
-        self._client = openai.AsyncOpenAI(**kwargs)
+        if api_version and _is_azure_endpoint(base_url):
+            # Azure OpenAI: use the dedicated client which handles deployment
+            # URL construction and api-version automatically.
+            azure_kwargs: dict = {"api_version": api_version}
+            if api_key:
+                azure_kwargs["api_key"] = api_key
+            if base_url:
+                azure_kwargs["azure_endpoint"] = base_url
+            if not ssl_verify:
+                azure_kwargs["http_client"] = httpx.AsyncClient(verify=False)
+            self._client = AsyncAzureOpenAI(**azure_kwargs)
+        else:
+            kwargs: dict = {}
+            if api_key:
+                kwargs["api_key"] = api_key
+            if base_url:
+                kwargs["base_url"] = base_url
+            if not ssl_verify:
+                kwargs["http_client"] = httpx.AsyncClient(verify=False)
+            self._client = openai.AsyncOpenAI(**kwargs)
 
     def export_connection_info(self) -> ProviderConnectionInfo:
         return ProviderConnectionInfo(
             provider=self.name,
             api_key=self._api_key,
             base_url=self._base_url,
             ssl_verify=self._ssl_verify,
+            api_version=self._api_version,
         )
 
     async def complete(
diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py
@@ -145,6 +145,7 @@ class ProviderConnectionInfo:
     api_key: str | None = field(default=None, repr=False)
     base_url: str | None = None
     ssl_verify: bool | None = None
+    api_version: str | None = None  # Azure api-version query param
 
 
 class LLMProvider(ABC):
diff --git a/anton/updater.py b/anton/updater.py
@@ -23,9 +23,16 @@ def check_and_update(console, settings) -> bool:
 
     Returns True if an update was applied and the process should restart.
     """
+    import os
+
     if settings.disable_autoupdates:
         return False
 
+    # Guard against infinite restart loops.  _reexec() sets this before
+    # replacing the process; the new process inherits it and skips the check.
+    if os.environ.get("_ANTON_UPDATED"):
+        return False
+
     result: dict = {}
 
     def _worker():
diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py
@@ -236,3 +236,86 @@ def test_from_settings_openai(self):
             assert isinstance(client, LLMClient)
             assert isinstance(client._planning_provider, OpenAIProvider)
             assert isinstance(client._coding_provider, OpenAIProvider)
+
+
+class TestAzureOpenAIProvider:
+    def test_uses_async_azure_openai_when_api_version_set(self):
+        """When api_version is provided, AsyncAzureOpenAI must be used."""
+        mock_azure_client = MagicMock()
+        with patch("anton.core.llm.openai.openai"), \
+             patch("anton.core.llm.openai.AsyncAzureOpenAI", return_value=mock_azure_client) as mock_cls:
+            provider = OpenAIProvider(
+                api_key="azure-key",
+                base_url="https://myresource.cognitiveservices.azure.com",
+                api_version="2024-12-01-preview",
+            )
+            mock_cls.assert_called_once()
+            call_kwargs = mock_cls.call_args.kwargs
+            assert call_kwargs["api_version"] == "2024-12-01-preview"
+            assert call_kwargs["api_key"] == "azure-key"
+            assert call_kwargs["azure_endpoint"] == "https://myresource.cognitiveservices.azure.com"
+            assert provider._client is mock_azure_client
+
+    def test_uses_async_openai_when_no_api_version(self):
+        """Without api_version, the standard AsyncOpenAI client must be used."""
+        mock_std_client = MagicMock()
+        with patch("anton.core.llm.openai.openai") as mock_openai:
+            mock_openai.AsyncOpenAI.return_value = mock_std_client
+            provider = OpenAIProvider(api_key="sk-test", base_url="http://localhost:11434/v1")
+            mock_openai.AsyncOpenAI.assert_called_once()
+            assert provider._client is mock_std_client
+
+    def test_export_connection_info_includes_api_version(self):
+        with patch("anton.core.llm.openai.openai"), \
+             patch("anton.core.llm.openai.AsyncAzureOpenAI"):
+            provider = OpenAIProvider(
+                api_key="key",
+                base_url="https://res.openai.azure.com",
+                api_version="2024-12-01-preview",
+            )
+            info = provider.export_connection_info()
+            assert info.api_version == "2024-12-01-preview"
+            assert info.base_url == "https://res.openai.azure.com"
+
+    def test_from_settings_passes_api_version_to_provider(self):
+        """LLMClient.from_settings propagates openai_api_version to OpenAIProvider."""
+        with patch("anton.core.llm.openai.openai"), \
+             patch("anton.core.llm.openai.AsyncAzureOpenAI") as mock_azure_cls:
+            settings = AntonSettings(
+                planning_provider="openai-compatible",
+                coding_provider="openai-compatible",
+                planning_model="gpt-4.1-mini",
+                coding_model="gpt-4.1-mini",
+                openai_api_key="azure-key",
+                openai_base_url="https://myresource.cognitiveservices.azure.com",
+                openai_api_version="2024-12-01-preview",
+                _env_file=None,
+            )
+            client = LLMClient.from_settings(settings)
+            assert mock_azure_cls.called
+            call_kwargs = mock_azure_cls.call_args.kwargs
+            assert call_kwargs["api_version"] == "2024-12-01-preview"
+            assert isinstance(client._planning_provider, OpenAIProvider)
+
+    async def test_azure_provider_complete_calls_chat_completions(self):
+        """Azure provider routes complete() through chat.completions just like standard."""
+        mock_azure_client = AsyncMock()
+        mock_azure_client.chat.completions.create = AsyncMock(
+            return_value=_make_mock_response(content="azure response", prompt_tokens=8, completion_tokens=12)
+        )
+        with patch("anton.core.llm.openai.openai"), \
+             patch("anton.core.llm.openai.AsyncAzureOpenAI", return_value=mock_azure_client):
+            provider = OpenAIProvider(
+                api_key="azure-key",
+                base_url="https://myresource.cognitiveservices.azure.com",
+                api_version="2024-12-01-preview",
+            )
+            result = await provider.complete(
+                model="gpt-4.1-mini",
+                system="be helpful",
+                messages=[{"role": "user", "content": "hello"}],
+            )
+            assert result.content == "azure response"
+            assert result.usage.input_tokens == 8
+            assert result.usage.output_tokens == 12
+            mock_azure_client.chat.completions.create.assert_awaited_once()
diff --git a/tests/test_openai_setup.py b/tests/test_openai_setup.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "2.0.1"`
	`1`	`+__version__ = "2.0.2"`
Original file line number	Diff line number	Diff line change
`@@ -79,10 +79,12 @@ def _dump_namespace(ns: dict) -> str \| None:`
`79`	`79`	`_llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get(`
`80`	`80`	`"ANTON_OPENAI_API_KEY"`
`81`	`81`	`)`
	`82`	`+ _llm_api_version = os.environ.get("ANTON_OPENAI_API_VERSION") or None`
`82`	`83`	`_llm_provider = _ProviderClass(`
`83`	`84`	`api_key=_llm_api_key or None,`
`84`	`85`	`base_url=_llm_base_url or None,`
`85`	`86`	`ssl_verify=_llm_ssl_verify,`
	`87`	`+ api_version=_llm_api_version,`
`86`	`88`	`)`
`87`	`89`	`else:`
`88`	`90`	`_llm_provider = _ProviderClass() # Anthropic doesn't need ssl_verify`