Skip to content

Commit 3f57b5f

Browse files
committed
Merge branch 'main' into mitchzhu/azlosguard
2 parents 83c6eb3 + 3b3cd48 commit 3f57b5f

159 files changed

Lines changed: 58064 additions & 15904 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/CODEOWNERS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262

6363
/src/aks-preview/ @andyzhangx @andyliuliming @fumingzhang
6464

65+
/src/aks-agent/ @nilo19 @mainerd
66+
6567
/src/bastion/ @aavalang
6668

6769
/src/vm-repair/ @haagha
@@ -333,3 +335,5 @@
333335
/src/amlfs/ @Aman-Jain-14 @amajai @mawhite @brpanask @tibanyas
334336

335337
/src/storage-discovery/ @shanefujs @calvinhzy
338+
339+
/src/aks-agent/ @feiskyer @mainred @nilo19
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
name: Trigger ADO OneBranch Extension Release Pipeline
2+
3+
# Run this workflow every time a commit gets pushed to main
4+
# This triggers the ADO OneBranch Extension Release Pipeline
5+
on:
6+
push:
7+
branches:
8+
- main
9+
10+
permissions:
11+
contents: read
12+
id-token: write
13+
14+
jobs:
15+
build:
16+
name: Trigger Extension Release Pipeline
17+
runs-on: ubuntu-latest
18+
steps:
19+
- name: Harden Runner
20+
uses: step-security/harden-runner@63c24ba6bd7ba022e95695ff85de572c04a18142 # v2.7.0
21+
with:
22+
egress-policy: audit
23+
- name: Azure login
24+
uses: azure/login@v2
25+
with:
26+
client-id: ${{ secrets.ADO_SP_ClientID }}
27+
tenant-id: ${{ secrets.ADO_SP_TenantID }}
28+
allow-no-subscriptions: true
29+
- name: Trigger ADO Pipeline and Wait for Completion
30+
uses: azure/cli@v2
31+
env:
32+
ado-org: ${{secrets.ADO_ORGANIZATION}}
33+
ado-project: ${{secrets.ADO_PROJECT}}
34+
ado-pipeline-id: 396380
35+
commit-id: ${{ github.sha }}
36+
with:
37+
inlineScript: |
38+
# Trigger the pipeline and capture the build ID
39+
echo "Triggering ADO pipeline..."
40+
BUILD_RESULT=$(az pipelines build queue \
41+
--definition-id ${{ env.ado-pipeline-id }} \
42+
--organization ${{ env.ado-org }} \
43+
--project ${{ env.ado-project }} \
44+
--variables commit_id=${{ env.commit-id }} \
45+
--output json)
46+
47+
BUILD_ID=$(echo $BUILD_RESULT | jq -r '.id')
48+
echo "Pipeline triggered with Build ID: $BUILD_ID"
49+
50+
if [ "$BUILD_ID" = "null" ] || [ -z "$BUILD_ID" ]; then
51+
echo "Failed to get build ID from pipeline trigger"
52+
exit 1
53+
fi
54+
55+
# Wait for the build to complete
56+
echo "Waiting for build $BUILD_ID to complete..."
57+
while true; do
58+
BUILD_JSON=$(az pipelines build show \
59+
--id $BUILD_ID \
60+
--organization ${{ env.ado-org }} \
61+
--project ${{ env.ado-project }} \
62+
--output json)
63+
64+
BUILD_STATUS=$(echo "$BUILD_JSON" | jq -r '.status')
65+
BUILD_RESULT_STATUS=$(echo "$BUILD_JSON" | jq -r '.result // "none"')
66+
67+
echo "Current status: $BUILD_STATUS, Result: $BUILD_RESULT_STATUS"
68+
69+
# Check if build is completed
70+
if [ "$BUILD_STATUS" = "completed" ]; then
71+
echo "Build completed with result: $BUILD_RESULT_STATUS"
72+
73+
# Check if the build was successful
74+
if [ "$BUILD_RESULT_STATUS" = "succeeded" ]; then
75+
echo "✅ ADO pipeline build succeeded!"
76+
exit 0
77+
elif [ "$BUILD_RESULT_STATUS" = "partiallySucceeded" ]; then
78+
echo "⚠️ ADO pipeline build partially succeeded"
79+
exit 1
80+
else
81+
echo "❌ ADO pipeline build failed with result: $BUILD_RESULT_STATUS"
82+
exit 1
83+
fi
84+
fi
85+
86+
# Check for other terminal states
87+
if [ "$BUILD_STATUS" = "cancelling" ] || [ "$BUILD_STATUS" = "cancelled" ]; then
88+
echo "❌ ADO pipeline build was cancelled"
89+
exit 1
90+
fi
91+
92+
# Wait 30 seconds before checking again
93+
echo "Build still running... waiting 30 seconds"
94+
sleep 30
95+
done

src/aks-agent/HISTORY.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@ To release a new version, please select a new version number (usually plus 1 to
1111

1212
Pending
1313
+++++++
14+
* Don't print version check at bottom toolbar
15+
16+
1.0.0b2
17+
+++++++
18+
19+
* Add MCP integration for `az aks agent` with aks-mcp binary management and local server lifecycle (download, version validation, start/stop, health checks).
20+
* Introduce dual-mode operation: MCP mode (enhanced) and Traditional mode (built-in toolsets), with mode-specific system prompts.
21+
* Implement smart toolset refresh strategy with persisted mode state to avoid unnecessary refresh on repeated runs.
22+
* Add `--no-aks-mcp` flag to force Traditional mode when desired.
23+
* Add `az aks agent status` command to display MCP binary availability/version, server health, and overall mode/readiness.
24+
* Add structured error handling with user-friendly messages and actionable suggestions for MCP/binary/server/config errors.
25+
* Port and adapt comprehensive unit tests covering binary manager, MCP manager, configuration generation/validation, status models/collection, error handling, user feedback, parameters, smart refresh, MCP integration, and status command.
1426

1527
1.0.0b1
1628
+++++++

src/aks-agent/azext_aks_agent/_consts.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,11 @@
88
CONST_AGENT_NAME = "AKS AGENT"
99
CONST_AGENT_NAME_ENV_KEY = "AGENT_NAME"
1010
CONST_AGENT_CONFIG_FILE_NAME = "aksAgent.yaml"
11+
12+
# MCP Integration Constants (ported from previous change)
13+
CONST_MCP_BINARY_NAME = "aks-mcp"
14+
CONST_MCP_DEFAULT_PORT = 8003
15+
CONST_MCP_DEFAULT_URL = "http://localhost:8003/sse"
16+
CONST_MCP_MIN_VERSION = "0.0.7"
17+
CONST_MCP_GITHUB_REPO = "Azure/aks-mcp"
18+
CONST_MCP_BINARY_DIR = "bin"

src/aks-agent/azext_aks_agent/_help.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,14 @@
2626
short-summary: Name of the resource group.
2727
- name: --model
2828
type: string
29-
short-summary: Model to use for the LLM.
29+
short-summary: Specify the LLM provider and model or deployment to use for the AI assistant.
30+
long-summary: |-
31+
The --model parameter determines which large language model (LLM) and provider will be used to analyze your cluster.
32+
For OpenAI, use the model name directly (e.g., gpt-4o).
33+
For Azure OpenAI, use `azure/<deployment name>` (e.g., azure/gpt-4.1).
34+
Each provider may require different environment variables and model naming conventions.
35+
For a full list of supported providers, model patterns, and required environment variables, see https://docs.litellm.ai/docs/providers.
36+
Note: For Azure OpenAI, it is recommended to set the deployment name as the model name until https://github.com/BerriAI/litellm/issues/13950 is resolved.
3037
- name: --api-key
3138
type: string
3239
short-summary: API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY).
@@ -48,36 +55,30 @@
4855
- name: --refresh-toolsets
4956
type: bool
5057
short-summary: Refresh the toolsets status.
58+
- name: --status
59+
type: bool
60+
short-summary: Show AKS agent configuration and status information.
61+
- name: --no-aks-mcp
62+
type: bool
63+
short-summary: Disable AKS MCP integration and use traditional toolsets.
5164
5265
examples:
5366
- name: Ask about pod issues in the cluster with Azure OpenAI
5467
text: |-
5568
export AZURE_API_BASE="https://my-azureopenai-service.openai.azure.com/"
5669
export AZURE_API_VERSION="2025-01-01-preview"
5770
export AZURE_API_KEY="sk-xxx"
58-
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment
71+
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1
5972
- name: Ask about pod issues in the cluster with OpenAI
6073
text: |-
6174
export OPENAI_API_KEY="sk-xxx"
6275
az aks agent "Why are my pods not starting?" --name MyManagedCluster --resource-group MyResourceGroup --model gpt-4o
63-
- name: Run in interactive mode without a question
64-
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
65-
- name: Run in non-interactive batch mode
66-
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
67-
- name: Show detailed tool output during analysis
68-
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
69-
- name: Use custom configuration file
70-
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
71-
- name: Run agent with no echo of the original question
72-
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
73-
- name: Refresh toolsets to get the latest available tools
74-
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
7576
- name: Run agent with config file
7677
text: |
77-
az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml
78+
az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --name MyManagedCluster --resource-group MyResourceGroup
7879
Here is an example of config file:
7980
```json
80-
model: "gpt-4o"
81+
model: "azure/gpt-4.1"
8182
api_key: "..."
8283
# define a list of mcp servers, mcp server can be defined
8384
mcp_servers:
@@ -103,4 +104,30 @@
103104
aks/core:
104105
enabled: false
105106
```
107+
- name: Run in interactive mode without a question
108+
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/gpt-4.1 --api-key "sk-xxx"
109+
- name: Run in non-interactive batch mode
110+
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/gpt-4.1
111+
- name: Show detailed tool output during analysis
112+
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/gpt-4.1
113+
- name: Use custom configuration file
114+
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/gpt-4.1
115+
- name: Run agent with no echo of the original question
116+
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/gpt-4.1
117+
- name: Refresh toolsets to get the latest available tools
118+
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/gpt-4.1
119+
- name: Show agent status (MCP readiness)
120+
text: az aks agent --status
121+
- name: Run in interactive mode without a question
122+
text: az aks agent "Check the pod status in my cluster" --name MyManagedCluster --resource-group MyResourceGroup --model azure/my-gpt4.1-deployment --api-key "sk-xxx"
123+
- name: Run in non-interactive batch mode
124+
text: az aks agent "Diagnose networking issues" --no-interactive --max-steps 15 --model azure/my-gpt4.1-deployment
125+
- name: Show detailed tool output during analysis
126+
text: az aks agent "Why is my service workload unavailable in namespace workload-ns?" --show-tool-output --model azure/my-gpt4.1-deployment
127+
- name: Use custom configuration file
128+
text: az aks agent "Check kubernetes pod resource usage" --config-file /path/to/custom.yaml --model azure/my-gpt4.1-deployment
129+
- name: Run agent with no echo of the original question
130+
text: az aks agent "What is the status of my cluster?" --no-echo-request --model azure/my-gpt4.1-deployment
131+
- name: Refresh toolsets to get the latest available tools
132+
text: az aks agent "What is the status of my cluster?" --refresh-toolsets --model azure/my-gpt4.1-deployment
106133
"""

src/aks-agent/azext_aks_agent/_params.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ def load_arguments(self, _):
1717
with self.argument_context("aks agent") as c:
1818
c.positional(
1919
"prompt",
20+
nargs='?',
2021
help="Ask any question and answer using available tools.",
22+
required=False,
2123
)
2224
c.argument(
2325
"resource_group_name",
@@ -47,12 +49,12 @@ def load_arguments(self, _):
4749
)
4850
c.argument(
4951
"model",
50-
help="The model to use for the LLM.",
52+
help=" Specify the LLM provider and model or deployment to use for the AI assistant.",
5153
required=False,
5254
type=str,
5355
)
5456
c.argument(
55-
"api-key",
57+
"api_key",
5658
help="API key to use for the LLM (if not given, uses environment variables AZURE_API_KEY, OPENAI_API_KEY)",
5759
required=False,
5860
type=str,
@@ -77,3 +79,15 @@ def load_arguments(self, _):
7779
help="Refresh the toolsets status.",
7880
action="store_true",
7981
)
82+
c.argument(
83+
"status",
84+
options_list=["--status"],
85+
action="store_true",
86+
help="Show AKS agent configuration and status information.",
87+
)
88+
c.argument(
89+
"no_aks_mcp",
90+
options_list=["--no-aks-mcp"],
91+
help="Disable AKS MCP integration and use traditional toolsets.",
92+
action="store_true",
93+
)

0 commit comments

Comments
 (0)