Azure · zhoxing-ms · Sep 15, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -62,6 +62,8 @@
 
 /src/aks-preview/ @andyzhangx @andyliuliming @fumingzhang
 
+/src/aks-agent/ @nilo19 @mainerd
+
 /src/bastion/ @aavalang
 
 /src/vm-repair/ @haagha

@@ -12,6 +12,17 @@ To release a new version, please select a new version number (usually plus 1 to
 Pending
 +++++++
 
+1.0.0b2
++++++++
+
+- Add MCP integration for `az aks agent` with aks-mcp binary management and local server lifecycle (download, version validation, start/stop, health checks).
+- Introduce dual-mode operation: MCP mode (enhanced) and Traditional mode (built-in toolsets), with mode-specific system prompts.
+- Implement smart toolset refresh strategy with persisted mode state to avoid unnecessary refresh on repeated runs.
+- Add `--no-aks-mcp` flag to force Traditional mode when desired.
+- Add `az aks agent status` command to display MCP binary availability/version, server health, and overall mode/readiness.
+- Add structured error handling with user-friendly messages and actionable suggestions for MCP/binary/server/config errors.
+- Port and adapt comprehensive unit tests covering binary manager, MCP manager, configuration generation/validation, status models/collection, error handling, user feedback, parameters, smart refresh, MCP integration, and status command.
+
 1.0.0b1
 +++++++
 * Add interactive AI-powered debugging tool `az aks agent`.
@@ -8,3 +8,11 @@
 CONST_AGENT_NAME = "AKS AGENT"
 CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
 CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.yaml"
+
+# MCP Integration Constants (ported from previous change)
+CONST_MCP_BINARY_NAME = "aks-mcp"
+CONST_MCP_DEFAULT_PORT = 8003
+CONST_MCP_DEFAULT_URL = "http://localhost:8003/sse"
+CONST_MCP_MIN_VERSION = "0.0.7"
+CONST_MCP_GITHUB_REPO = "Azure/aks-mcp"
+CONST_MCP_BINARY_DIR = "bin"
@@ -26,7 +26,14 @@
           short-summary: Name of the resource group.
         - name: --model
           type: string
-          short-summary: Model to use for the LLM.
+          short-summary: Specify the LLM provider and model or deployment to use for the AI assistant.
+          long-summary: |-
+            The --model parameter determines which large language model (LLM) and provider will be used to analyze your cluster.
+            For OpenAI, use the model name directly (e.g., gpt-4o).
+            For Azure OpenAI, use `azure/<deployment name>` (e.g., azure/gpt-4.1).
+            Each provider may require different environment variables and model naming conventions.
+            For a full list of supported providers, model patterns, and required environment variables, see https://docs.litellm.ai/docs/providers.
+            Note: For Azure OpenAI, it is recommended to set the deployment name as the model name until https://github.com/BerriAI/litellm/issues/13950 is resolved.
         - name: --api-key
           type: string
           short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
@@ -48,36 +55,30 @@
         - name: --refresh-toolsets
           type: bool
           short-summary: Refresh the toolsets status.
+        - name: --status
+          type: bool
+          short-summary: Show AKS agent configuration and status information.
+        - name: --no-aks-mcp
+          type: bool
+          short-summary: Disable AKS MCP integration and use traditional toolsets.
 
     examples:
         - name: Ask about pod issues in the cluster with Azure OpenAI
           text: |-
             export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
             export AZURE_API_VERSION="2025-01-01-preview"
             export AZURE_API_KEY="sk-xxx"
-            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
+            az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1
         - name: Ask about pod issues in the cluster with OpenAI
           text: |-
             export OPENAI_API_KEY="sk-xxx"
             az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
-        - name: Run in interactive mode without a question
-          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
-        - name: Run in non-interactive batch mode
-          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
-        - name: Show detailed tool output during analysis
-          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
-        - name: Use custom configuration file
-          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
-        - name: Run agent with no echo of the original question
-          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
-        - name: Refresh toolsets to get the latest available tools
-          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
         - name: Run agent with config file
           text: |
-            az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml
+            az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --name MyManagedCluster --resource-group MyResourceGroup
             Here is an example of config file:
             ```json
-            model: "gpt-4o"
+            model: "azure/gpt-4.1"
             api_key: "..."
             # define a list of mcp servers, mcp server can be defined
             mcp_servers:
@@ -103,4 +104,30 @@
               aks/core:
                 enabled: false
               ```
+        - name: Run in interactive mode without a question
+          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1 --api-key "sk-xxx"
+        - name: Run in non-interactive batch mode
+          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/gpt-4.1
+        - name: Show detailed tool output during analysis
+          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/gpt-4.1
+        - name: Use custom configuration file
+          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/gpt-4.1
+        - name: Run agent with no echo of the original question
+          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/gpt-4.1
+        - name: Refresh toolsets to get the latest available tools
+          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/gpt-4.1
+        - name: Show agent status (MCP readiness)
+          text: az aks agent --status
+        - name: Run in interactive mode without a question
+          text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
+        - name: Run in non-interactive batch mode
+          text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
+        - name: Show detailed tool output during analysis
+          text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
+        - name: Use custom configuration file
+          text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
+        - name: Run agent with no echo of the original question
+          text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
+        - name: Refresh toolsets to get the latest available tools
+          text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
 """
@@ -17,7 +17,9 @@ def load_arguments(self, _):
     with self.argument_context("aks agent") as c:
         c.positional(
             "prompt",
+            nargs='?',
             help="Ask any question and answer using available tools.",
+            required=False,
         )
         c.argument(
             "resource_group_name",
@@ -47,12 +49,12 @@ def load_arguments(self, _):
         )
         c.argument(
             "model",
-            help="The model to use for the LLM.",
+            help=" Specify the LLM provider and model or deployment to use for the AI assistant.",
             required=False,
             type=str,
         )
         c.argument(
-            "api-key",
+            "api_key",
             help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
             required=False,
             type=str,
@@ -77,3 +79,15 @@ def load_arguments(self, _):
             help="Refresh the toolsets status.",
             action="store_true",
         )
+        c.argument(
+            "status",
+            options_list=["--status"],
+            action="store_true",
+            help="Show AKS agent configuration and status information.",
+        )
+        c.argument(
+            "no_aks_mcp",
+            options_list=["--no-aks-mcp"],
+            help="Disable AKS MCP integration and use traditional toolsets.",
+            action="store_true",
+        )