diff --git a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/E2ELocal/Program.cs b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/E2ELocal/Program.cs index 957aff895..8744abbd0 100644 --- a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/E2ELocal/Program.cs +++ b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/E2ELocal/Program.cs @@ -254,12 +254,11 @@ static string BuildFoundryUrl(string projectEndpoint, string agent, string sessi var parts = new Uri(projectEndpoint); var project = parts.AbsolutePath.TrimEnd('/').Split('/')[^1]; var qs = HttpUtility.ParseQueryString(string.Empty); - qs["project_name"] = project; - qs["agent_name"] = agent; qs["api-version"] = apiVersion; qs["agent_session_id"] = sessionId; var scheme = parts.Scheme is "https" or "wss" ? "wss" : "ws"; - return $"{scheme}://{parts.Host}/api/projects/agents/endpoint/protocols/invocations_ws?{qs}"; + var path = $"/api/projects/{Uri.EscapeDataString(project)}/agents/{Uri.EscapeDataString(agent)}/endpoint/protocols/invocations_ws"; + return $"{scheme}://{parts.Host}{path}?{qs}"; } static async Task GetEntraTokenAsync(string resource = "https://ai.azure.com") diff --git a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/README.md b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/README.md index b3905a8a9..30f5a75ca 100644 --- a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/README.md +++ b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/README.md @@ -205,10 +205,8 @@ data-plane** WebSocket URL (the proxy and `E2ELocal` build this for you from `--foundry` + `--agent`): ``` -wss://.services.ai.azure.com/api/projects/agents/endpoint/protocols/invocations_ws +wss://.services.ai.azure.com/api/projects//agents//endpoint/protocols/invocations_ws ?api-version=v1 - &project_name= - &agent_name= &agent_session_id= ``` @@ -217,10 +215,10 @@ Where the segments come from: | Part | Value | |------|-------| | `` | AI Services account host — the same host as your Foundry project endpoint (`https://.services.ai.azure.com/api/projects/`). | -| `/api/projects/agents/endpoint/protocols/invocations_ws` | Fixed data-plane route — `agents` and `endpoint` are literal path segments, not your agent name. The actual agent is picked via the `agent_name` query parameter. | +| `/api/projects//agents//endpoint/protocols/invocations_ws` | Data-plane route; project and agent are URL-encoded path segments. | | `api-version=v1` | Foundry data-plane API version. | -| `project_name=` | The last segment of your project endpoint path. | -| `agent_name=` | Matches the agent `name` in [`agent.manifest.yaml`](agent.manifest.yaml) — `hello-world-dotnet-invocations-ws`. | +| `` | The last segment of your project endpoint path. | +| `` | Matches the agent `name` in [`agent.manifest.yaml`](agent.manifest.yaml) — `hello-world-dotnet-invocations-ws`. | | `agent_session_id=` | A caller-generated string that identifies the conversation. Reuse the same id to resume; use a fresh one (e.g. a GUID) to start a new session. | Every request must also include `Authorization: Bearer ` diff --git a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/chat_client/Proxy/Program.cs b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/chat_client/Proxy/Program.cs index 798347181..9976a73a6 100644 --- a/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/chat_client/Proxy/Program.cs +++ b/samples/csharp/hosted-agents/bring-your-own/invocations_ws/HelloWorld/chat_client/Proxy/Program.cs @@ -207,12 +207,11 @@ static string BuildFoundryUrl(string projectEndpoint, string agent, string sessi var parts = new Uri(projectEndpoint); var project = parts.AbsolutePath.TrimEnd('/').Split('/')[^1]; var qs = HttpUtility.ParseQueryString(string.Empty); - qs["project_name"] = project; - qs["agent_name"] = agent; qs["api-version"] = apiVersion; qs["agent_session_id"] = sessionId; var scheme = parts.Scheme is "https" or "wss" ? "wss" : "ws"; - return $"{scheme}://{parts.Host}/api/projects/agents/endpoint/protocols/invocations_ws?{qs}"; + var path = $"/api/projects/{Uri.EscapeDataString(project)}/agents/{Uri.EscapeDataString(agent)}/endpoint/protocols/invocations_ws"; + return $"{scheme}://{parts.Host}{path}?{qs}"; } static async Task GetEntraTokenAsync(string resource) diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.azdignore b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.azdignore new file mode 100644 index 000000000..4a74eabf4 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.azdignore @@ -0,0 +1,3 @@ +agent.manifest.yaml +agent.yaml +.env.example diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.dockerignore b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.dockerignore new file mode 100644 index 000000000..b709ec79b --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.dockerignore @@ -0,0 +1,26 @@ +**/__pycache__/ +**/*.py[cod] +**/*.egg-info/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +env/ + +# IDE settings +.vscode/ +.idea/ + +# Version control +.git/ +.gitignore + +# Docker files +.dockerignore + +# Docs +README.md + +# Local environment (never bake credentials into the image) +.env diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.e2e-tests b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.e2e-tests new file mode 100644 index 000000000..405651e0a --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.e2e-tests @@ -0,0 +1,19 @@ +# ===== E2E-TESTS INFRASTRUCTURE (Existing Foundry Project) ===== +# This configuration uses the pre-provisioned agents-e2e-tests-canadacentral resource group + +# Full resource ID of the existing AI Foundry project +AZURE_AI_PROJECT_ID=/subscriptions/921496dc-987f-410f-bd57-426eb2611356/resourceGroups/agents-e2e-tests-canadacentral/providers/Microsoft.CognitiveServices/accounts/e2e-tests-canadacentral-account/projects/e2e-tests-canadacentral + +# AI Foundry API endpoint +AZURE_AI_PROJECT_ENDPOINT=https://e2e-tests-canadacentral-account.services.ai.azure.com/api/projects/e2e-tests-canadacentral + +# Subscription and region +AZURE_SUBSCRIPTION_ID=921496dc-987f-410f-bd57-426eb2611356 +AZURE_ENV_NAME=e2e-tests-canadacentral +AZURE_LOCATION=canadacentral + +# Existing container registry for pushing container images +AZURE_CONTAINER_REGISTRY_ENDPOINT=crco5phi2qd67pq.azurecr.io + +# Optional: Application Insights +# APPLICATIONINSIGHTS_CONNECTION_STRING=... diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.example b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.example new file mode 100644 index 000000000..f473db294 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.env.example @@ -0,0 +1,49 @@ +# ===== REQUIRED — Existing Foundry Project (Bring Your Own) ===== + +# Full resource ID of your existing AI Foundry project. +# Format: /subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts//projects/ +AZURE_AI_PROJECT_ID= + +# AI Foundry API endpoint for your project. +# Format: https://..api.ai.azure.com/api/projects/ +# OR: https://.services.ai.azure.com/api/projects/ +AZURE_AI_PROJECT_ENDPOINT= + +# Your Azure subscription ID. +AZURE_SUBSCRIPTION_ID= + +# ===== OPTIONAL — Container Image Registry (for docker deployments) ===== + +# Your existing container registry endpoint (for pushing container images). +# Format: .azurecr.io +# Only needed if deploying with `language: docker` in azure.yaml +# For code-only (bundled ZIP) deployments, this is not required. +AZURE_CONTAINER_REGISTRY_ENDPOINT= + +# ===== OPTIONAL — Environment Configuration ===== + +# Environment name (used for naming and tagging). Defaults to 'e2e-tests-canadacentral' if not set. +AZURE_ENV_NAME=e2e-tests-canadacentral + +# Azure region for your deployment (informational only; resources must already exist). +AZURE_LOCATION=canadacentral + +# ===== OPTIONAL — Application Insights Telemetry ===== + +# Application Insights connection string (auto-injected into container if set). +# APPLICATIONINSIGHTS_CONNECTION_STRING=InstrumentationKey=... + +# ===== ABOUT THIS SAMPLE ===== + +# This diagnostic agent uses the Invocations protocol (v1.0.0). +# It does NOT call an LLM and does NOT require a model deployment. +# It only performs network/environment diagnostics inside the runtime sandbox. + +# For "Bring Your Own" (BYO) infrastructure, all Azure resources must exist before deployment: +# - Foundry Account and Project (created separately) +# - Container Registry (if using docker mode) — already created +# - Resource Group with proper networking/permissions configured +# +# This sample does not provision infrastructure (no infra/ directory). +# Use: azd deploy --no-prompt (to deploy agent to existing project) +# Do NOT use: azd up (since there is no infrastructure to provision) diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.gitignore b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.gitignore new file mode 100644 index 000000000..8e8438024 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/.gitignore @@ -0,0 +1 @@ +.azure diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/Dockerfile b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/Dockerfile new file mode 100644 index 000000000..b89292edb --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.12-slim +WORKDIR /app +COPY . user_agent/ +WORKDIR /app/user_agent +RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi +EXPOSE 8088 +CMD ["python", "main.py"] diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/README.md b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/README.md new file mode 100644 index 000000000..462daef3c --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/README.md @@ -0,0 +1,341 @@ + +**IMPORTANT!** All samples and other resources made available in this GitHub repository ("samples") are designed to assist in accelerating development of agents, solutions, and agent workflows for various scenarios. Review all provided resources and carefully test output behavior in the context of your use case. AI responses may be inaccurate and AI actions should be monitored with human oversight. Learn more in the transparency note for [Agent Service](https://learn.microsoft.com/en-us/azure/ai-foundry/responsible-ai/agents/transparency-note). + +Agents, solutions, or other output you create may be subject to legal and regulatory requirements, may require licenses, or may not be suitable for all industries, scenarios, or use cases. By using any sample, you are acknowledging that any output created using those samples are solely your responsibility, and that you will comply with all applicable laws, regulations, and relevant safety standards, terms of service, and codes of conduct. + +Third-party samples contained in this folder are subject to their own designated terms, and they have not been tested or verified by Microsoft or its affiliates. + +Microsoft has no responsibility to you or others with respect to any of these samples or any resulting output. + + +# Diagnostic Agent (Python, Invocations) + +A **diagnostic** hosted-agent built on the Invocations protocol. It does **not** call an LLM and does **not** require a Foundry project endpoint or a model deployment. Instead, on each invocation, it runs DNS / TCP / TLS / HTTP probes against caller-supplied hostnames and returns a structured JSON report describing what the runtime sandbox can actually reach. + +Use this image to answer questions like: + +- From inside the delegated `agent-subnet-*`, what does `.azurecr.io` resolve to? A private IP or a public one? +- Does `https://.azurecr.io/v2/` return `401 Unauthorized` (registry reachable) or does the request hang / get connection refused / TLS-verify-fail? +- Can the runtime egress to public Azure endpoints (`login.microsoftonline.com`, `management.azure.com`) or only to private endpoints? + +## Design notes + +- **Stdlib-only probe code.** All DNS / TCP / TLS / HTTP probes are written against `socket`, `ssl`, `urllib`, and `http.client`. The network is the very thing being diagnosed; the probes must not depend on import-time package fetches or pyca handshakes that obscure the failure mode. +- **No model, no project endpoint.** The manifest declares no `resources` and no `environment_variables`. The image is portable across any Foundry project. +- **Single JSON response.** All probe outcomes are returned in one HTTP 200 response — per-probe failures are reported in the `status` / `hint` fields, not via non-2xx HTTP codes. This keeps client-side parsing simple. +- **Caller controls the probe matrix.** The request body lists hostnames; nothing is hard-coded to a specific customer ACR. An empty body runs only the safe defaults (container info, env dump, and a small set of public Azure endpoints). +- **No secrets in the response.** Env vars matching `KEY`, `SECRET`, `PASSWORD`, `TOKEN`, `CONNECTION_STRING`, or `SAS` are reported with their length only. + +## Getting Started (Bring Your Own Infrastructure) + +This sample is designed for **Bring Your Own** (BYO) infrastructure scenarios where the Azure Foundry account, project, and supporting resources are already provisioned separately. + +### Prerequisites + +- An existing **Azure Foundry project** (account + project already created) +- An existing **container registry** (if deploying in container mode) +- **Azure CLI** with `azure.ai.agents` extension installed: + ```bash + azd config set ai.agents.version 0.1.22-preview + ``` + +### Deployment + +1. **Set environment variables** — Copy `.env.example` to `.env` and fill in your existing Foundry project details: + ```bash + cp .env.example .env + ``` + + Edit `.env` with your project information: + ```env + AZURE_AI_PROJECT_ID=/subscriptions//resourceGroups//providers/Microsoft.CognitiveServices/accounts//projects/ + AZURE_AI_PROJECT_ENDPOINT=https://.services.ai.azure.com/api/projects/ + AZURE_SUBSCRIPTION_ID= + AZURE_ENV_NAME= + AZURE_LOCATION= + AZURE_CONTAINER_REGISTRY_ENDPOINT=.azurecr.io # For container mode only + ``` + +2. **Deploy the agent** — Use `azd deploy` (not `azd up`, since no infrastructure needs provisioning): + + **Option A: Container Mode (Recommended)** — Docker image pushed to container registry: + ```bash + # Default configuration — uses azure.yaml as-is + azd deploy --no-prompt + ``` + - Builds Docker image from `Dockerfile` + - Pushes to `AZURE_CONTAINER_REGISTRY_ENDPOINT` + - Deploys container to Foundry + - **Requires**: ACR configured in `.env` + + **Option B: ZIP Mode** — Bundle Python code directly (no container): + ```bash + # Step 1: Edit azure.yaml + # Change this line: + # language: docker + # To: + # language: python + # + # And remove the docker section entirely (lines 12-13): + # docker: + # remoteBuild: false + + # Step 2: Deploy + azd deploy --no-prompt + ``` + - Bundles Python source code as ZIP + - Deploys to Foundry without container image + - **No ACR required** + - Useful for code-only scenarios or testing + +3. **Invoke the agent** — Once deployed, invoke it via REST: + ```bash + TOKEN=$(az account get-access-token --resource https://ai.azure.com --query accessToken -o tsv) + curl -X POST \ + "https://.services.ai.azure.com/api/projects//agents/diagnostic-agent-python-invocations/versions/1/invocations" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"hosts": ["microsoft.com"]}' + ``` + +## Deployment Modes Comparison + +| Aspect | Container Mode | ZIP Mode | +|--------|---|---| +| **Command** | `azd deploy --no-prompt` (default) | Edit `azure.yaml`, then `azd deploy --no-prompt` | +| **Build Process** | Builds Docker image → Pushes to ACR | Bundles Python code as ZIP | +| **Requires ACR** | ✅ Yes | ❌ No | +| **Container Image Size** | ~500 MB (python:3.12-slim) | N/A (code only) | +| **Startup Speed** | ~30 seconds | ~30 seconds (similar) | +| **Use Case** | Production, versioned images | Testing, code-only scenarios | +| **Config Change** | None (default) | Edit `azure.yaml` (1 line) | + +Validation: both execution paths were tested locally after removing IMDS/MSI support. + +## Troubleshooting: ACR Not Reachable From Private Network + +If private networking is misconfigured, container-mode deployment can fail before the diagnostic image is even available (for example, DNS failure, blocked egress, or Private Endpoint routing issues to your private ACR). + +Use one of the following fallback paths to keep debugging network reachability: + +### Path 1 (Preferred): ZIP deploy from a VM attached to the target network + +This route avoids ACR entirely and still runs the same diagnostics code in Foundry. + +1. Use a VM that is attached to the same VNet/subnet path you want to validate. +2. In `azure.yaml`, switch to ZIP mode: + - Change `language: docker` to `language: python`. + - Remove the `docker:` block. +3. Deploy with: + ```bash + azd deploy --no-prompt + ``` +4. Invoke the agent and probe your private endpoints as usual. + +When ZIP mode works but container mode fails, the issue is typically on the ACR path (DNS, NSG/UDR/firewall, or PE routing), not in the probe logic. + +### Path 2: Temporary public-ACR fallback for image distribution + +If you must validate container mode while private ACR is unreachable, use a temporary public ACR (PNA enabled) for this diagnostic image. + +1. Set `AZURE_CONTAINER_REGISTRY_ENDPOINT=.azurecr.io` in `.env`. +2. Run: + ```bash + azd deploy --no-prompt + ``` +3. Re-run the same diagnostic probes. + +If deployment succeeds with public ACR but fails with private ACR, the regression is isolated to private ACR connectivity/policy. + +For security, treat this as a short-lived troubleshooting step only: remove temporary public exposure and revert to private ACR once networking is fixed. + +## Request body contract + +All fields are optional: + +```json +{ + "hosts": [ + ".azurecr.io", + "..data.azurecr.io" + ], + "public_hosts": [ + "https://www.microsoft.com/", + "https://management.azure.com/metadata/endpoints?api-version=2020-09-01", + "https://login.microsoftonline.com/common/v2.0/.well-known/openid-configuration" + ], + "include_env_dump": true, + "include_container_info": true, + "tcp_timeout_sec": 5, + "http_timeout_sec": 10 +} +``` + +| Field | Default | Notes | +|---|---|---| +| `hosts` | `[]` | List of FQDNs. For each, runs DNS → TCP/443 → TLS/443 → HTTPS GET. For `*.azurecr.io` and `*.data.azurecr.io` hosts, the GET path is `/v2/` (returns 401 with `Www-Authenticate` when reachable). For all other hosts, GET path is `/`. | +| `public_hosts` | small built-in list | Full URLs. HTTPS-GET only — no DNS/TCP/TLS breakdown. Pass `[]` to skip. | +| `include_env_dump` | `true` | Returns env vars matching an allowlist prefix (`FOUNDRY_`, `AZURE_`, `KUBERNETES_`, etc.); credential-shaped values are length-only. | +| `include_container_info` | `true` | Hostname, container IP, default gateway from `/proc/net/route`, resolvers from `/etc/resolv.conf`. | +| `tcp_timeout_sec` | `5` | Per-attempt TCP/TLS timeout. | +| `http_timeout_sec` | `10` | HTTP timeout. | + +You may also send a **plain-text body** containing a single hostname; the agent treats it as `{"hosts": [""]}`. Useful from the Foundry portal chat UI. + +If the body is empty, the agent runs only the defaults: container info + env dump + the built-in public-host list. No private hosts are probed unless explicitly requested. + +## Response shape + +```json +{ + "status": "ok", + "agent_session_id": "...", + "invocation_id": "...", + "timestamp_utc": "2026-06-12T...", + "checks": { + "container": { + "hostname": "...", + "ip": "10.0.0.42", + "default_route": "10.0.0.1 via eth0", + "resolvers": ["168.63.129.16"] + }, + "env": { + "AZURE_REGION": "westus2", + "FOUNDRY_PROJECT_ENDPOINT": "https://...", + "KUBERNETES_SERVICE_HOST": "10.0.0.1" + }, + "hosts": [ + { + "host": ".azurecr.io", + "dns": {"status": "ok", "ips": ["10.0.1.4"], "any_private": true, "all_private": true}, + "tcp_443": {"status": "ok", "ip": "10.0.1.4", "port": 443, "ms": 1.8}, + "tls_443": {"status": "ok", "version": "TLSv1.3", "cipher": "TLS_AES_256_GCM_SHA384", "cert_subject": "CN=*.azurecr.io", "cert_sans": ["*.azurecr.io", "*..data.azurecr.io"]}, + "http_get": {"status": "ok", "code": 401, "headers": {"www-authenticate": "Bearer realm=...", "docker-distribution-api-version": "registry/2.0"}} + } + ], + "public_hosts": [ + {"status": "ok", "url": "https://www.microsoft.com/", "code": 200} + ] + } +} +``` + +## Interpretation cheat-sheet + +| Symptom in response | Likely cause | +|---|---| +| `hosts[].dns.status = FAIL gaierror` | Resolver doesn't have the zone. For `privatelink.*`, the private DNS zone isn't linked to this VNet. | +| `hosts[].dns.ips` all RFC1918 → ✅ | Private Endpoint resolution is working. | +| `hosts[].dns.ips` contain a public IP for a `privatelink.*` host | Zone link missing or pointed at the wrong VNet. `hint` field flags this. | +| `tcp_443.status = FAIL timeout` | NSG egress rule, UDR routing to an NVA that black-holes the flow, or firewall drop. | +| `tcp_443.status = FAIL refused` | PE is in Disconnected state, or an upstream device is sending RST. | +| `tls_443.status = FAIL SSLCertVerificationError` | A firewall is doing TLS interception. Bypass `*.azurecr.io` / `*.azure.com`. | +| `tls_443.status = FAIL SSLError` mid-handshake | NVA breaking SNI. Enable SNI passthrough. | +| `http_get.code = 401` on `/v2/` for ACR | Registry is reachable. ✅ | +| `http_get.code = 403` on `/v2/` for ACR | PNA=Disabled + caller not on an approved PE. | + +## Per-service expected results + +When probing a private-link-enabled Foundry project's BYO dependency +services, each service has a distinct healthy fingerprint. Anything that +deviates from the row below points at a misconfiguration, not auth. + +| Service | FQDN pattern | Expected cert SANs | Expected unauth `GET /` | +|---|---|---|---| +| ACR (registry) | `.azurecr.io` | `*.azurecr.io`, `*..geo.azurecr.io` | `401` + `WWW-Authenticate: Bearer realm=".../oauth2/token"` (path: `/v2/`) | +| ACR (data) | `..data.azurecr.io` | `*..data.azurecr.io`, `*.azurecr.io`, `*.data.azurecr.io` | `403 DENIED` (path: `/v2/`) | +| Cosmos DB | `.documents.azure.com` | `*.{sql,mongo,table,gremlin,cassandra}.cosmosdb.azure.com` | `401 Unauthorized` + JSON body about missing `authorization` header | +| Storage (blob) | `.blob.core.windows.net` | `*.blob.core.windows.net`, `*.blob.storage.azure.net` | `400 InvalidQueryParameterValue` (root GET is malformed by design) | +| AI Search | `.search.windows.net` | `*.search.windows.net`, `*.management.search.windows.net` | `401 Unauthorized` + `WWW-Authenticate: Bearer ... resource="https://search.azure.com"` | +| AI Services (cognitive) | `.cognitiveservices.azure.com` | `*.cognitiveservices.azure.com`, `*.openai.azure.com`, `*.services.ai.azure.com` | `200 Service Operational` | +| AI Services (openai) | `.openai.azure.com` | (same as above) | `200 Service Operational` | +| AI Services (services.ai) | `.services.ai.azure.com` | `.services.ai.azure.com` (account-specific cert) | `200 OK` (`server: Kestrel`) | + +Any cert issuer other than a `Microsoft TLS …` / `Microsoft Azure RSA TLS Issuing CA …` / per-account cert (for example, an enterprise TLS inspection CA) suggests the TLS handshake was intercepted by a network device instead of terminating at the expected Private Endpoint. + +## Realistic multi-service response + +Here's what a successful probe against all of a Foundry project's BYO +private endpoints looks like (truncated for readability; placeholder +resource names). DNS resolves to the PE subnet (`192.168.1.0/24`), TCP/443 +succeeds, TLS terminates with a Microsoft-issued cert whose SANs cover the +host, and the unauth `GET /` returns each service's expected challenge. + +Request: + +```json +{ + "hosts": [ + "myorgacr.southindia.data.azurecr.io", + "myorgacr.azurecr.io", + "myorgcosmos.documents.azure.com", + "myorgstorage.blob.core.windows.net", + "myorgsearch.search.windows.net", + "myorgaisvc.cognitiveservices.azure.com", + "myorgaisvc.openai.azure.com", + "myorgaisvc.services.ai.azure.com" + ] +} +``` + +Response (per-host summary, full JSON elided): + +| Host | DNS IP | TCP | TLS | HTTP | +|---|---|---|---|---| +| `myorgacr.southindia.data.azurecr.io` | `192.168.1.11` | ok | ok | `403 DENIED` (`/v2/`) | +| `myorgacr.azurecr.io` | `192.168.1.12` | ok | ok | `401 Bearer realm=...` (`/v2/`) | +| `myorgcosmos.documents.azure.com` | `192.168.1.4` | ok | ok | `401` (Cosmos) | +| `myorgstorage.blob.core.windows.net` | `192.168.1.9` | ok | ok | `400 InvalidQueryParameterValue` | +| `myorgsearch.search.windows.net` | `192.168.1.10` | ok | ok | `401` + Search WWW-Authenticate | +| `myorgaisvc.cognitiveservices.azure.com` | `192.168.1.6` | ok | ok | `200 Service Operational` | +| `myorgaisvc.openai.azure.com` | `192.168.1.7` | ok | ok | `200 Service Operational` | +| `myorgaisvc.services.ai.azure.com` | `192.168.1.8` | ok | ok | `200 OK` (Kestrel) | + +## Running locally + +This sample follows the same `azd ai agent` workflow as the other invocations samples. See [hello-world/README.md](../hello-world/README.md) for the full `azd` / Foundry Toolkit walkthrough. + +For the local-only path (no `azd`): + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +python main.py +``` + +The agent listens on `http://localhost:8088/`. Invoke it: + +```bash +# Default profile (container + env + public hosts only) +curl -sS -X POST "http://localhost:8088/invocations?agent_session_id=diag-001" \ + -H "Content-Type: application/json" -d '{}' | jq + +# Probe a specific ACR (registry + data plane) +curl -sS -X POST "http://localhost:8088/invocations?agent_session_id=diag-001" \ + -H "Content-Type: application/json" \ + -d '{ + "hosts": [ + ".azurecr.io", + "..data.azurecr.io" + ], + "public_hosts": [] + }' | jq + +# Plain-text body — quick single-host check from the portal chat UI +curl -sS -X POST "http://localhost:8088/invocations" \ + -H "Content-Type: text/plain" \ + --data ".azurecr.io" | jq +``` + +The interesting runs happen when the image is deployed into a Foundry project and invoked from there. + +## Deploying to Microsoft Foundry + +Same `azd` / Foundry Toolkit workflow as the other invocations samples — see [hello-world/README.md](../hello-world/README.md#deploying-the-agent-to-microsoft-foundry). Because the manifest declares no `resources` block, deployment does not provision a model. + +## Security notes + +- This image is intended for diagnostics, not for production agent traffic. Treat its responses as semi-public: nothing in the response is a credential, but env-var names can reveal infrastructure topology. +- The image never writes secrets. It does not parse, log, or return `Authorization` headers, or any env var matching credential-shaped substrings. +- The image performs HTTPS-GET only. No POST/PUT/DELETE; no authenticated calls to the probed hosts. diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.manifest.yaml b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.manifest.yaml new file mode 100644 index 000000000..8f079e69d --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.manifest.yaml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/AgentSchema/refs/heads/main/schemas/v1.0/AgentManifest.yaml +name: diagnostic-agent-python-invocations +displayName: "Diagnostic Agent (Python, Invocations)" +description: > + Network and environment diagnostic agent. On invocation, runs DNS / TCP / TLS / + HTTP probes against caller-supplied hostnames (e.g. a private ACR), dumps the + container's networking state (IP, routes, resolvers). Designed to verify what + a hosted-agent runtime can actually reach from inside the delegated agent subnet. +metadata: + tags: + - AI Agent Hosting + - Invocations Protocol + - Bring Your Own + - Python + - Diagnostics +template: + name: diagnostic-agent-python-invocations + kind: hosted + protocols: + - protocol: invocations + version: 1.0.0 + # No environment_variables and no resources block: + # the diagnostic-agent intentionally requires no model and no Foundry project + # endpoint. Everything it needs is on the container itself. diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.yaml b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.yaml new file mode 100644 index 000000000..652dbd6b3 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/agent.yaml @@ -0,0 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/AgentSchema/refs/heads/main/schemas/v1.0/ContainerAgent.yaml +kind: hosted +name: diagnostic-agent-python-invocations +protocols: + - protocol: invocations + version: 1.0.0 +resources: + cpu: "0.25" + memory: 0.5Gi diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/azure.yaml b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/azure.yaml new file mode 100644 index 000000000..df4b99624 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/azure.yaml @@ -0,0 +1,19 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/Azure/azure-dev/main/schemas/v1.0/azure.yaml.json + +requiredVersions: + extensions: + azure.ai.agents: '>=0.1.0-preview' +name: diagnostic-agent +services: + diagnostic-agent: + project: . + host: azure.ai.agent + language: docker + config: + container: + resources: + cpu: "0.25" + memory: 0.5Gi + scale: + maxReplicas: 1 + startupCommand: python main.py diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/main.py b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/main.py new file mode 100644 index 000000000..51c5a5c09 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/main.py @@ -0,0 +1,603 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Diagnostic Agent — network and environment diagnostics from inside a hosted-agent +runtime sandbox. + +On each invocation, runs a configurable set of probes against caller-supplied +hostnames and returns a single structured JSON response. Designed to answer: + + "What can the runtime inside the delegated agent subnet actually reach?" + +Typical use case (the question that motivated this image): + + "From inside agent-subnet-*, does nslookup .azurecr.io resolve + to a private IP? Does curl -v https://.azurecr.io/v2/ return + a 401? If not, where does it break?" + +The agent is deliberately stdlib-only for the probe code itself +(``socket``, ``ssl``, ``urllib``, ``http.client``) — the network is the very +thing being diagnosed, so the probes must not depend on an import-time package +fetch. + +Required environment variables: none (intentional). The agent does not call +LLMs and does not require a Foundry project endpoint. + +POST body contract (all fields optional):: + + { + "hosts": [".azurecr.io", ".westus2.data.azurecr.io"], + "public_hosts": ["https://www.microsoft.com/", + "https://management.azure.com/", + "https://login.microsoftonline.com/"], + "include_env_dump": true, + "include_container_info": true, + "tcp_timeout_sec": 5, + "http_timeout_sec": 10 + } + +If the body is empty, a default profile runs: container info + env dump ++ a small fixed list of public Azure endpoints. No private hosts +are probed unless explicitly requested. + +The response is **always HTTP 200**. Every probe — and every top-level +section of the handler — is wrapped in its own try/except and reports +failure via a ``status`` / ``err`` / ``msg`` / ``hint`` block inside the +response JSON. Even a crash in the handler itself returns 200 with an +``error`` block, so the caller (which often cannot read non-2xx bodies) +always gets actionable diagnostic data. +""" + +from __future__ import annotations + +import http.client +import ipaddress +import json +import logging +import os +import socket +import ssl +import sys +import time +import traceback +import urllib.parse +from datetime import datetime, timezone +from typing import Any + +from starlette.requests import Request +from starlette.responses import JSONResponse + +from azure.ai.agentserver.invocations import InvocationAgentServerHost + +# Emit all logs to stdout so they show up in the hosted-agent log stream. +# basicConfig is a no-op if the root logger already has handlers (e.g. when +# the host configures uvicorn logging first), so also force the level. +logging.basicConfig( + level=os.environ.get("DEBUG_AGENT_LOG_LEVEL", "INFO"), + format="%(asctime)s %(levelname)s %(name)s %(message)s", + stream=sys.stdout, +) +logging.getLogger().setLevel(os.environ.get("DEBUG_AGENT_LOG_LEVEL", "INFO")) +logger = logging.getLogger("diagnostic_agent") + +# Environment-variable allowlist for the env dump. Captures only metadata +# that's useful for triaging (region, hosting fabric, project endpoint) and +# nothing that could leak credentials. Anything not on this list is omitted. +_ENV_ALLOWLIST_PREFIXES = ( + "FOUNDRY_", + "AZURE_", + "KUBERNETES_", + "POD_", + "NODE_", + "HOSTNAME", + "REGION", + "LOCATION", +) +_ENV_REDACT_SUBSTRINGS = ( + "KEY", + "SECRET", + "PASSWORD", + "TOKEN", + "CONNECTION_STRING", + "SAS", +) + +_DEFAULT_PUBLIC_HOSTS = [ + "https://www.microsoft.com/", + "https://management.azure.com/metadata/endpoints?api-version=2020-09-01", + "https://login.microsoftonline.com/common/v2.0/.well-known/openid-configuration", +] + + +# ── helpers ────────────────────────────────────────────────────────────────── + + +def _is_private_ip(ip_str: str) -> bool: + try: + return ipaddress.ip_address(ip_str).is_private + except ValueError: + return False + + +def _redact_env_value(name: str, value: str) -> str: + upper = name.upper() + if any(s in upper for s in _ENV_REDACT_SUBSTRINGS): + return f"" + return value + + +def _read_text(path: str, max_bytes: int = 4096) -> str | None: + try: + with open(path, "rb") as f: + return f.read(max_bytes).decode("utf-8", errors="replace") + except OSError: + return None + + +def _default_route() -> str | None: + """Parse /proc/net/route to find the default gateway.""" + text = _read_text("/proc/net/route") + if not text: + return None + for line in text.splitlines()[1:]: + cols = line.split() + # Iface Destination Gateway Flags ... + if len(cols) >= 3 and cols[1] == "00000000": + try: + gw_hex = cols[2] + octets = [int(gw_hex[i : i + 2], 16) for i in (6, 4, 2, 0)] + return f"{octets[0]}.{octets[1]}.{octets[2]}.{octets[3]} via {cols[0]}" + except (ValueError, IndexError): + return None + return None + + +def _resolvers() -> list[str]: + text = _read_text("/etc/resolv.conf") + if not text: + return [] + out: list[str] = [] + for line in text.splitlines(): + parts = line.split() + if len(parts) >= 2 and parts[0] == "nameserver": + out.append(parts[1]) + return out + + +# ── individual probes ──────────────────────────────────────────────────────── + + +def probe_container_info() -> dict[str, Any]: + try: + hostname = socket.gethostname() + try: + ip = socket.gethostbyname(hostname) + except OSError as e: + ip = f"" + return { + "status": "ok", + "hostname": hostname, + "ip": ip, + "default_route": _default_route(), + "resolvers": _resolvers(), + } + except Exception as e: # noqa: BLE001 — diagnostic; never let a probe kill the response + return { + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:300], + "hint": "Reading container hostname / /proc state failed unexpectedly.", + } + + +def probe_env_dump() -> dict[str, Any]: + try: + out: dict[str, Any] = {"status": "ok", "values": {}} + values: dict[str, str] = out["values"] + for k, v in sorted(os.environ.items()): + if any(k.startswith(p) for p in _ENV_ALLOWLIST_PREFIXES): + values[k] = _redact_env_value(k, v) + return out + except Exception as e: # noqa: BLE001 + return { + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:300], + "hint": "Iterating os.environ failed unexpectedly.", + } + + +def probe_dns(host: str) -> dict[str, Any]: + try: + infos = socket.getaddrinfo(host, 443, type=socket.SOCK_STREAM) + ips = sorted({info[4][0] for info in infos}) + any_private = any(_is_private_ip(ip) for ip in ips) + all_private = all(_is_private_ip(ip) for ip in ips) + result: dict[str, Any] = { + "status": "ok", + "ips": ips, + "any_private": any_private, + "all_private": all_private, + } + # For ``privatelink.*``-style names, public IPs are a smell + if not all_private and "privatelink" in host: + result["hint"] = ( + "Resolved to a non-RFC1918 address; the privatelink zone may not be " + "linked to this VNet, or the link points at the wrong VNet." + ) + return result + except socket.gaierror as e: + return { + "status": "FAIL", + "err": "gaierror", + "msg": str(e), + "hint": "DNS lookup failed. Resolver may not have the zone, or DNS traffic is blocked.", + } + + +def probe_tcp(ip: str, port: int, timeout_sec: int) -> dict[str, Any]: + t0 = time.perf_counter() + try: + with socket.create_connection((ip, port), timeout=timeout_sec): + return { + "status": "ok", + "ip": ip, + "port": port, + "ms": round((time.perf_counter() - t0) * 1000, 1), + } + except socket.timeout: + return { + "status": "FAIL", + "ip": ip, + "port": port, + "ms": round((time.perf_counter() - t0) * 1000, 1), + "err": "timeout", + "hint": "TCP SYN silently dropped. Likely a network security rule, routing issue, or firewall drop.", + } + except ConnectionRefusedError: + return { + "status": "FAIL", + "ip": ip, + "port": port, + "err": "refused", + "hint": "Connection refused. PE may be in Disconnected state, or an upstream device is sending RST.", + } + except OSError as e: + return { + "status": "FAIL", + "ip": ip, + "port": port, + "err": type(e).__name__, + "msg": str(e)[:200], + "hint": "OS-level network error (no route, host unreachable). Check UDR / VNet peering.", + } + + +def probe_tls(host: str, ip: str, port: int, timeout_sec: int) -> dict[str, Any]: + t0 = time.perf_counter() + ctx = ssl.create_default_context() + try: + with socket.create_connection((ip, port), timeout=timeout_sec) as raw: + with ctx.wrap_socket(raw, server_hostname=host) as tls: + cert = tls.getpeercert() + subject = ", ".join( + "=".join(p[0]) for p in cert.get("subject", []) if p + ) + issuer = ", ".join( + "=".join(p[0]) for p in cert.get("issuer", []) if p + ) + sans = [v for k, v in cert.get("subjectAltName", []) if k == "DNS"] + return { + "status": "ok", + "ms": round((time.perf_counter() - t0) * 1000, 1), + "version": tls.version(), + "cipher": tls.cipher()[0] if tls.cipher() else None, + "cert_subject": subject, + "cert_issuer": issuer, + "cert_sans": sans[:10], + } + except ssl.SSLCertVerificationError as e: + return { + "status": "FAIL", + "err": "SSLCertVerificationError", + "msg": str(e)[:300], + "hint": "Cert verify failed. A firewall is likely doing TLS interception — bypass *.azurecr.io / *.azure.com.", + } + except ssl.SSLError as e: + return { + "status": "FAIL", + "err": "SSLError", + "msg": str(e)[:300], + "hint": "TLS handshake failed mid-stream. A network middlebox may be breaking SNI; ensure TLS passthrough.", + } + except (socket.timeout, OSError) as e: + return { + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:200], + "hint": "TCP succeeded but TLS phase failed. Could be a network device reset or a transient issue.", + } + + +def probe_http_get( + url: str, host_header: str | None, http_timeout_sec: int +) -> dict[str, Any]: + """Plain HTTPS GET. Reports status, headers, body preview. Never sends auth.""" + parsed = urllib.parse.urlparse(url) + if parsed.scheme != "https": + return {"status": "FAIL", "err": "scheme", "hint": "Only HTTPS supported."} + try: + ctx = ssl.create_default_context() + conn = http.client.HTTPSConnection( + parsed.hostname, + parsed.port or 443, + timeout=http_timeout_sec, + context=ctx, + ) + path = parsed.path or "/" + if parsed.query: + path = f"{path}?{parsed.query}" + headers = {"User-Agent": "foundry-diagnostic-agent/1.0", "Accept": "*/*"} + if host_header: + headers["Host"] = host_header + t0 = time.perf_counter() + conn.request("GET", path, headers=headers) + resp = conn.getresponse() + body = resp.read(2048) + elapsed = round((time.perf_counter() - t0) * 1000, 1) + # Surface a small subset of headers that are useful for triage. + useful_headers = { + k.lower(): v + for k, v in resp.getheaders() + if k.lower() + in ( + "www-authenticate", + "server", + "content-type", + "docker-distribution-api-version", + "x-ms-request-id", + "x-ms-correlation-request-id", + ) + } + return { + "status": "ok", + "url": url, + "code": resp.status, + "reason": resp.reason, + "ms": elapsed, + "headers": useful_headers, + "body_preview": body.decode("utf-8", errors="replace")[:400], + } + except Exception as e: # noqa: BLE001 + return { + "status": "FAIL", + "url": url, + "err": type(e).__name__, + "msg": str(e)[:300], + "hint": "HTTPS request failed. See per-layer hints in TCP/TLS probes for the same host.", + } + + +def probe_host(host: str, tcp_timeout_sec: int, http_timeout_sec: int) -> dict[str, Any]: + """Composite probe: DNS -> TCP/443 -> TLS/443 -> HTTPS GET /v2/ (or /).""" + result: dict[str, Any] = {"host": host} + dns = probe_dns(host) + result["dns"] = dns + if dns["status"] != "ok" or not dns.get("ips"): + return result + ip = dns["ips"][0] + result["tcp_443"] = probe_tcp(ip, 443, tcp_timeout_sec) + if result["tcp_443"]["status"] != "ok": + return result + result["tls_443"] = probe_tls(host, ip, 443, tcp_timeout_sec) + if result["tls_443"]["status"] != "ok": + return result + # For ACR-shaped hosts, /v2/ is the canonical reachability test and returns + # 401 with a useful WWW-Authenticate header. For everything else hit /. + path = "/v2/" if host.endswith(".azurecr.io") or ".data.azurecr.io" in host else "/" + result["http_get"] = probe_http_get( + f"https://{host}{path}", host_header=None, http_timeout_sec=http_timeout_sec + ) + return result + + +# ── handler ────────────────────────────────────────────────────────────────── + +app = InvocationAgentServerHost() + + +def _parse_body(body: bytes) -> dict[str, Any]: + if not body: + return {} + try: + data = json.loads(body) + return data if isinstance(data, dict) else {} + except json.JSONDecodeError: + # Plain-text body (e.g. from the Foundry portal chat UI) — treat the + # text as a single hostname so users can paste an FQDN and get answers. + text = body.decode("utf-8", errors="replace").strip() + if text: + return {"hosts": [text]} + return {} + + +@app.invoke_handler +async def handle_invoke(request: Request) -> JSONResponse: + session_id = getattr(request.state, "session_id", None) + invocation_id = getattr(request.state, "invocation_id", None) + t_start = time.perf_counter() + checks: dict[str, Any] = {} + section_errors: list[dict[str, Any]] = [] + + def _run_section(name: str, fn) -> None: + """Run a probe section under its own try/except so one failure cannot + suppress sibling diagnostics.""" + try: + checks[name] = fn() + except Exception as e: # noqa: BLE001 — diagnostic; surface everything + tb = traceback.format_exc() + logger.error( + "section FAIL name=%s err=%s msg=%s\n%s", + name, + type(e).__name__, + str(e)[:500], + tb, + ) + checks[name] = { + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:500], + "traceback": tb, + } + section_errors.append({"section": name, "err": type(e).__name__}) + + try: + body = await request.body() + spec = _parse_body(body) + + tcp_timeout_sec = int(spec.get("tcp_timeout_sec") or 5) + http_timeout_sec = int(spec.get("http_timeout_sec") or 10) + + hosts = spec.get("hosts") or [] + public_hosts = spec.get("public_hosts") + if public_hosts is None: + public_hosts = _DEFAULT_PUBLIC_HOSTS + + include_env = spec.get("include_env_dump", True) + include_container = spec.get("include_container_info", True) + + logger.info( + "invoke start invocation=%s session=%s body_len=%d hosts=%d public=%d", + invocation_id, + session_id, + len(body), + len(hosts), + len(public_hosts), + ) + logger.debug("invoke spec=%s", spec) + + if include_container: + _run_section("container", probe_container_info) + logger.info( + "probe container status=%s ip=%s", + checks["container"].get("status"), + checks["container"].get("ip"), + ) + if include_env: + _run_section("env", probe_env_dump) + logger.info( + "probe env status=%s keys=%d", + checks["env"].get("status"), + len(checks["env"].get("values") or {}), + ) + if hosts: + def _hosts_section() -> list[dict[str, Any]]: + results: list[dict[str, Any]] = [] + for h in hosts: + try: + results.append( + probe_host(h, tcp_timeout_sec, http_timeout_sec) + ) + except Exception as e: # noqa: BLE001 + results.append( + { + "host": h, + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:300], + } + ) + return results + + _run_section("hosts", _hosts_section) + for r in checks["hosts"] if isinstance(checks.get("hosts"), list) else []: + logger.info( + "probe host %s dns=%s tcp=%s tls=%s http=%s", + r.get("host"), + (r.get("dns") or {}).get("status"), + (r.get("tcp_443") or {}).get("status"), + (r.get("tls_443") or {}).get("status"), + (r.get("http_get") or {}).get("code") + or (r.get("http_get") or {}).get("status"), + ) + if public_hosts: + def _public_section() -> list[dict[str, Any]]: + results: list[dict[str, Any]] = [] + for u in public_hosts: + try: + results.append(probe_http_get(u, None, http_timeout_sec)) + except Exception as e: # noqa: BLE001 + results.append( + { + "url": u, + "status": "FAIL", + "err": type(e).__name__, + "msg": str(e)[:300], + } + ) + return results + + _run_section("public_hosts", _public_section) + for r in ( + checks["public_hosts"] + if isinstance(checks.get("public_hosts"), list) + else [] + ): + logger.info( + "probe public %s code=%s status=%s", + r.get("url"), + r.get("code"), + r.get("status"), + ) + + elapsed_ms = round((time.perf_counter() - t_start) * 1000, 1) + logger.info( + "invoke ok invocation=%s session=%s ms=%s checks=%s section_errors=%d", + invocation_id, + session_id, + elapsed_ms, + list(checks.keys()), + len(section_errors), + ) + + return JSONResponse( + { + "status": "ok" if not section_errors else "partial", + "agent_session_id": session_id, + "invocation_id": invocation_id, + "timestamp_utc": datetime.now(timezone.utc).isoformat(), + "elapsed_ms": elapsed_ms, + "section_errors": section_errors, + "checks": checks, + } + ) + except Exception as e: # noqa: BLE001 — last-chance; still return 200 with details + elapsed_ms = round((time.perf_counter() - t_start) * 1000, 1) + tb = traceback.format_exc() + logger.error( + "invoke FAIL invocation=%s session=%s ms=%s err=%s msg=%s\n%s", + invocation_id, + session_id, + elapsed_ms, + type(e).__name__, + str(e)[:500], + tb, + ) + return JSONResponse( + { + "status": "handler_error", + "agent_session_id": session_id, + "invocation_id": invocation_id, + "timestamp_utc": datetime.now(timezone.utc).isoformat(), + "elapsed_ms": elapsed_ms, + "checks": checks, + "error": { + "type": type(e).__name__, + "message": str(e)[:500], + "traceback": tb, + }, + } + ) + + +if __name__ == "__main__": + app.run() diff --git a/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/requirements.txt b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/requirements.txt new file mode 100644 index 000000000..f8acbf1a8 --- /dev/null +++ b/samples/python/hosted-agents/bring-your-own/invocations/diagnostic-agent/requirements.txt @@ -0,0 +1 @@ +azure-ai-agentserver-invocations==1.0.0b4 diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/chat_client/proxy.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/chat_client/proxy.py index b562d4891..5f2f87f29 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/chat_client/proxy.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/chat_client/proxy.py @@ -52,7 +52,7 @@ import sys import uuid from http import HTTPStatus -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit import websockets from websockets.asyncio.server import serve @@ -66,15 +66,18 @@ def _foundry_url(project_endpoint: str, agent: str, session_id: str, api_version parts = urlsplit(project_endpoint) project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent, "api-version": api_version, "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( "wss" if parts.scheme in ("https", "wss") else "ws", parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/e2e_local.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/e2e_local.py index 93961c9a7..5ad5ff20d 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/e2e_local.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/duplex-live-agent/e2e_local.py @@ -30,7 +30,7 @@ import subprocess import sys import time -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit import websockets @@ -39,15 +39,18 @@ def _foundry_url(project_endpoint: str, agent: str, session_id: str, api_version parts = urlsplit(project_endpoint) project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent, "api-version": api_version, "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( "wss" if parts.scheme in ("https", "wss") else "ws", parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/README.md b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/README.md index 2d57ec747..36f611e18 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/README.md +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/README.md @@ -197,10 +197,8 @@ data-plane** WebSocket URL (`e2e_local.py` builds this for you from `--foundry` + `--agent`): ``` -wss://.services.ai.azure.com/api/projects/agents/endpoint/protocols/invocations_ws +wss://.services.ai.azure.com/api/projects//agents//endpoint/protocols/invocations_ws ?api-version=v1 - &project_name= - &agent_name= &agent_session_id= ``` @@ -209,10 +207,10 @@ Where the segments come from: | Part | Value | |------|-------| | `` | AI Services account host — the same host as your Foundry project endpoint (`https://.services.ai.azure.com/api/projects/`). | -| `/api/projects/agents/endpoint/protocols/invocations_ws` | Fixed data-plane route — `agents` and `endpoint` are literal path segments, not your agent name. The actual agent is picked via the `agent_name` query parameter. | +| `/api/projects//agents//endpoint/protocols/invocations_ws` | Data-plane route; project and agent are URL-encoded path segments. | | `api-version=v1` | Foundry data-plane API version. | -| `project_name=` | The last segment of your project endpoint path. | -| `agent_name=` | Matches the agent `name` in [`agent.manifest.yaml`](agent.manifest.yaml) — `hello-world`. | +| `` | The last segment of your project endpoint path. | +| `` | Matches the agent `name` in [`agent.manifest.yaml`](agent.manifest.yaml) — `hello-world`. | | `agent_session_id=` | A caller-generated string that identifies the conversation. Reuse the same id to resume; use a fresh one (e.g. a UUID) to start a new session. | Every request must also include `Authorization: Bearer ` diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/chat_client/proxy.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/chat_client/proxy.py index b562d4891..5f2f87f29 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/chat_client/proxy.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/chat_client/proxy.py @@ -52,7 +52,7 @@ import sys import uuid from http import HTTPStatus -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit import websockets from websockets.asyncio.server import serve @@ -66,15 +66,18 @@ def _foundry_url(project_endpoint: str, agent: str, session_id: str, api_version parts = urlsplit(project_endpoint) project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent, "api-version": api_version, "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( "wss" if parts.scheme in ("https", "wss") else "ws", parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/e2e_local.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/e2e_local.py index 93961c9a7..5ad5ff20d 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/e2e_local.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/hello-world/e2e_local.py @@ -30,7 +30,7 @@ import subprocess import sys import time -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit import websockets @@ -39,15 +39,18 @@ def _foundry_url(project_endpoint: str, agent: str, session_id: str, api_version parts = urlsplit(project_endpoint) project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent, "api-version": api_version, "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( "wss" if parts.scheme in ("https", "wss") else "ws", parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/livekit-server/chat_client/upstream.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/livekit-server/chat_client/upstream.py index e5a8c993d..e49d33d75 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/livekit-server/chat_client/upstream.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/livekit-server/chat_client/upstream.py @@ -13,7 +13,7 @@ import os import subprocess import time -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit logger = logging.getLogger("chat-client.upstream") @@ -41,15 +41,18 @@ def _build_url(agent_name: str, session_id: str) -> str: scheme = "wss" if parts.scheme in ("https", "wss") else "ws" project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent_name, "api-version": _api_version(), "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent_name, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( scheme, parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/README.md b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/README.md index 6e08aaf26..4a4688141 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/README.md +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/README.md @@ -146,10 +146,8 @@ The portal builds the upstream URL as: ``` wss://{account}.services.ai.azure.com - /api/projects/agents/endpoint/protocols/invocations_ws - ?project_name={project} - &agent_name={PIPECAT_WEBRTC_AGENT_NAME} - &api-version={API_VERSION} + /api/projects/{project}/agents/{PIPECAT_WEBRTC_AGENT_NAME}/endpoint/protocols/invocations_ws + ?api-version={API_VERSION} &agent_session_id={generated-per-connection} ``` diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/chat_client/upstream.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/chat_client/upstream.py index d0625bd0b..0557c220a 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/chat_client/upstream.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-webrtc/chat_client/upstream.py @@ -13,7 +13,7 @@ import os import subprocess import time -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit logger = logging.getLogger("chat-client.upstream") @@ -41,15 +41,18 @@ def _build_url(agent_name: str, session_id: str) -> str: scheme = "wss" if parts.scheme in ("https", "wss") else "ws" project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent_name, "api-version": _api_version(), "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent_name, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( scheme, parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", )) diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/README.md b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/README.md index 56a2b65bd..a420f9c44 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/README.md +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/README.md @@ -88,10 +88,8 @@ The portal builds the upstream URL as: ``` wss://{account}.services.ai.azure.com - /api/projects/agents/endpoint/protocols/invocations_ws - ?project_name={project} - &agent_name={PIPECAT_WEBSOCKET_AGENT_NAME} - &api-version={API_VERSION} + /api/projects/{project}/agents/{PIPECAT_WEBSOCKET_AGENT_NAME}/endpoint/protocols/invocations_ws + ?api-version={API_VERSION} &agent_session_id={generated-per-connection} ``` diff --git a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/chat_client/upstream.py b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/chat_client/upstream.py index da3d4aabb..08f590cea 100644 --- a/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/chat_client/upstream.py +++ b/samples/python/hosted-agents/bring-your-own/invocations_ws/pipecat-ws-server/chat_client/upstream.py @@ -13,7 +13,7 @@ import os import subprocess import time -from urllib.parse import urlencode, urlsplit, urlunsplit +from urllib.parse import quote, urlencode, urlsplit, urlunsplit logger = logging.getLogger("chat-client.upstream") @@ -41,15 +41,18 @@ def _build_url(agent_name: str, session_id: str) -> str: scheme = "wss" if parts.scheme in ("https", "wss") else "ws" project = parts.path.rstrip("/").rsplit("/", 1)[-1] qs = urlencode({ - "project_name": project, - "agent_name": agent_name, "api-version": _api_version(), "agent_session_id": session_id, }) + path = ( + f"/api/projects/{quote(project, safe='')}" + f"/agents/{quote(agent_name, safe='')}" + "/endpoint/protocols/invocations_ws" + ) return urlunsplit(( scheme, parts.netloc, - "/api/projects/agents/endpoint/protocols/invocations_ws", + path, qs, "", ))