microsoft · sharadhisrikanth28 · Jun 18, 2026 · Jun 18, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/evals/azure-advisor/eval.yaml b/evals/azure-advisor/eval.yaml
@@ -0,0 +1,305 @@
+# Vally eval config for the azure-advisor skill.
+#
+# Replaces the legacy Jest trigger tests (tests/azure-advisor/triggers.test.ts),
+# which are deprecated per tests/README.md. This suite expresses the skill's
+# scenarios as Vally stimuli graded against a real LLM agent run.
+#
+# Coverage (per tests/AGENTS.md):
+# - Routing stimuli measure skill-invocation rate across the documented WHEN
+#   trigger phrases (5 runs each, 80% threshold), with early termination once the
+#   skill is invoked to keep runs cheap.
+# - Response-quality stimuli (runs: 1) check the assistant output mentions Advisor
+#   recommendations and does not surface fatal errors.
+
+name: azure-advisor-integration-eval
+description: |
+  Integration evaluation for the azure-advisor skill. Verifies routing for Advisor
+  review, recommendation-check, health-check, and audit prompts via skill-invocation
+  rate (5 runs, 80% threshold), plus response-quality checks for Advisor recommendation
+  output.
+
+tags:
+  type: integration
+  skill: azure-advisor
+
+config:
+  runs: 5
+  timeout: "10m"
+  executor: integration-test-agent-runner
+  model: claude-sonnet-4.6
+
+scoring:
+  threshold: 0.8
+
+stimuli:
+  # ═══════════════════════════════════════════
+  # Skill routing prompts
+  # ═══════════════════════════════════════════
+
+  # ── run-advisor-review (smoke) ──
+  - name: "Run an Advisor review"
+    prompt: "Run an Azure Advisor review of my subscription"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: smoke
+      cost: llm
+      area: routing
+      earlyTerminate: '[{"type":"skill-call","skill":"azure-advisor"},{"type":"tool-call-count","count":3}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── check-recommendations ──
+  - name: "Check Advisor recommendations"
+    prompt: "Check my Azure Advisor recommendations"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+      earlyTerminate: '[{"type":"skill-call","skill":"azure-advisor"},{"type":"tool-call-count","count":3}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── what-does-advisor-say ──
+  - name: "What does Advisor say about my subscription"
+    prompt: "What does Advisor say about my Azure subscription?"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+      earlyTerminate: '[{"type":"skill-call","skill":"azure-advisor"},{"type":"tool-call-count","count":3}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── advisor-health-check ──
+  - name: "Azure Advisor health check"
+    prompt: "Give me an Azure Advisor health check"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+      earlyTerminate: '[{"type":"skill-call","skill":"azure-advisor"},{"type":"tool-call-count","count":3}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── audit-with-advisor ──
+  - name: "Audit resources with Advisor"
+    prompt: "Audit my Azure resources with Advisor"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+      earlyTerminate: '[{"type":"skill-call","skill":"azure-advisor"},{"type":"tool-call-count","count":3}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ═══════════════════════════════════════════
+  # Boundary (negative) routing — adjacent services from
+  # the skill's DO NOT USE FOR section must NOT route to azure-advisor.
+  # `disallowed` asserts the skill stays out of these requests.
+  # ═══════════════════════════════════════════
+
+  # ── boundary-cost ──
+  - name: "Cost query does not route to advisor"
+    prompt: "Analyze my Azure subscription costs and spending trends"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: smoke
+      cost: llm
+      area: routing
+    graders:
+      - type: skill-invocation
+        config:
+          disallowed:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── boundary-diagnostics ──
+  - name: "Diagnostics query does not route to advisor"
+    prompt: "My App Service keeps returning 500 errors, help me troubleshoot it"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+    graders:
+      - type: skill-invocation
+        config:
+          disallowed:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── boundary-rbac ──
+  - name: "RBAC query does not route to advisor"
+    prompt: "Grant my team Reader access to the production resource group"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+    graders:
+      - type: skill-invocation
+        config:
+          disallowed:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── boundary-keyvault ──
+  - name: "Key Vault query does not route to advisor"
+    prompt: "Store a new secret in my Azure Key Vault"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+    graders:
+      - type: skill-invocation
+        config:
+          disallowed:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── boundary-appservice ──
+  - name: "App Service deploy does not route to advisor"
+    prompt: "Deploy my web app to Azure App Service"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: routing
+    graders:
+      - type: skill-invocation
+        config:
+          disallowed:
+            - azure-advisor
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ═══════════════════════════════════════════
+  # Tool-call trajectory — validate behavior up to the point the agent
+  # invokes an Advisor MCP tool. Early-terminates as soon as an `advisor_`
+  # tool is called (or after a small tool-call cap as a safety net), then the
+  # `tool-calls` grader asserts an `advisor_` tool was actually executed.
+  # ═══════════════════════════════════════════
+
+  # ── review-calls-advisor-tool ──
+  - name: "Advisor review reaches an advisor tool call"
+    prompt: "Run an Azure Advisor review of my subscription"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: smoke
+      cost: llm
+      area: behavior
+      earlyTerminate: '[{"type":"tool-call-match","toolPattern":"advisor_","argsPattern":".*"},{"type":"tool-call-count","count":8}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: tool-calls
+        config:
+          required:
+            - name: "advisor_"
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ── check-recommendations-calls-advisor-tool ──
+  - name: "Recommendation check reaches an advisor tool call"
+    prompt: "Check my Azure Advisor recommendations"
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: behavior
+      earlyTerminate: '[{"type":"tool-call-match","toolPattern":"advisor_","argsPattern":".*"},{"type":"tool-call-count","count":8}]'
+    graders:
+      - type: skill-invocation
+        config:
+          required:
+            - azure-advisor
+      - type: tool-calls
+        config:
+          required:
+            - name: "advisor_"
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
+
+  # ═══════════════════════════════════════════
+  # Response quality tests
+  # ═══════════════════════════════════════════
+
+  # ── review-mentions-recommendations ──
+  - name: "Advisor review mentions recommendations"
+    prompt: "Run an Azure Advisor review of my subscription"
+    config:
+      runs: 1
+    tags:
+      type: integration
+      skill: azure-advisor
+      tier: full
+      cost: llm
+      area: response-quality
+    graders:
+      - type: output-matches
+        config:
+          pattern: "(?i)recommendation|advisor"
+      - type: completed
+      - type: output-not-matches
+        config:
+          pattern: "(?i)fatal error|unhandled exception|stack trace"
@@ -0,0 +1,44 @@
+---
+name: azure-advisor
+description: "Azure Advisor reviews resources and provides recommendations using Azure MCP advisor_* tools. WHEN: \"run an advisor review\", \"check my Azure advisor recommendations\", \"summarize advisor findings\", \"what does Advisor say about my subscription\", \"give me an advisor health check\", \"audit my Azure resources with Advisor\". USE FOR: read-only catalog, recommendations, and IaC fixes. DO NOT USE FOR: changing resources, billing (use azure-cost), or non-Advisor issues (use azure-diagnostics)."
+license: MIT
+metadata:
+  author: Microsoft
+  version: "0.0.0-placeholder"
+---
+
+# Azure Advisor Skill
+
+Azure Advisor is a **product area** with multiple capabilities. This skill routes a
+user's intent to the right capability and runs it using whichever `advisor_*` MCP tools
+the connected Azure MCP server exposes. Routing is by *capability* (catalog,
+recommendations, summary, IaC fix), not by hard-coded tool names, so the skill stays
+useful as new advisor tools land.
+
+## Pre-Execution Requirements
+
+Inspect the available `advisor_*` MCP tools and their parameters before running a
+capability. Match a tool when its name **contains** `advisor_` (i.e. `*advisor_*`), not
+only when it *starts with* it — MCP clients prepend a server-name prefix (e.g.
+`azure-mcp-advisor_recommendation_list`). Match by capability description, not by a fixed
+name list — see the shared [Capability Routing](references/capability-routing.md) reference.
+
+## Shared References
+
+These product-area references are reused by **every** capability below. Read the
+relevant one before acting:
+
+| Reference | Purpose |
+|-----------|---------|
+| [Capability Routing](references/capability-routing.md) | Resolve which `advisor_*` MCP tool to call for each capability (catalog, recommendations, summary, IaC fix). |
+| [Subscription Discovery](references/subscription-discovery.md) | Resolve a single target subscription, or enumerate and classify all subscriptions by environment, without hardcoding. |
+| [Resource Scope Discovery](references/resource-discovery.md) | Narrow a review to the resources defined in this repo (resource group / type / id) without hardcoding. |
+
+## Capabilities
+
+Route the user's request to the matching capability. **Use these instead of the main
+skill when they match the task:**
+
+| Capability | When to Use | Reference |
+|-----------|-------------|-----------|
+| **review** | Run a holistic, read-only Advisor sweep across one subscription — or **all** subscriptions classified by environment (dev/staging/prod) — probing the catalog, pulling active recommendations, aggregating by category/impact, spotlighting high-impact items, and proposing IaC fix snippets. | [review](review/review.md) |
@@ -0,0 +1,37 @@
+# Shared Reference — Advisor Capability Routing
+
+> **Shared across all `azure-advisor` capabilities.** Any capability that needs to
+> pick an `advisor_*` MCP tool should link here instead of re-defining the table.
+> Match tools by *capability description*, never by hard-coded name.
+
+The connected Azure MCP server may expose a changing set of `advisor_*` tools. Resolve
+which tool to invoke at each step from this capability table.
+
+## Tool name matching
+
+Match a tool when its name **contains** `advisor_` (i.e. `*advisor_*`) — **not** only
+when it *starts with* `advisor_`. MCP clients prepend the **server name** to every tool,
+so the same Advisor tool surfaces under a different prefix depending on the host:
+
+| Client | Example tool name |
+|---|---|
+| Copilot CLI | `azure-mcp-advisor_recommendation_list` |
+| Other hosts | `<server-prefix>advisor_recommendation_list` |
+
+A strict *starts-with* check would reject `azure-mcp-advisor_recommendation_list` and the
+skill would wrongly report "no Advisor tools found". Always substring-match on `advisor_`.
+
+| Capability | Look for an `advisor_*` tool whose description says... | Required input |
+|---|---|---|
+| **Metadata / catalog** | "list recommendation types / categories / impact levels / supported values" | tenant context (no subscription needed) |
+| **Active recommendations** | "list Advisor recommendations in a subscription" | subscription (resource group optional, filters optional) |
+| **Aggregation / summary** | "summarize / group / aggregate Advisor recommendations" | subscription + group-by field |
+| **IaC remediation** | "apply Advisor recommendations to IaC / ARM / Bicep / Terraform" | a resource type identifier |
+
+## Resolution rules
+
+- If a step's capability has **no matching tool**, skip it and note that in the chat
+  output (e.g. "no aggregation tool available, presenting raw list").
+- If **multiple tools** match, prefer the one with the more specific description.
+- **Never** substitute a tool whose name does **not** contain `advisor_` for a missing
+  capability — report the gap and skip instead.