-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathagentic_release.py
More file actions
68 lines (59 loc) · 2.66 KB
/
agentic_release.py
File metadata and controls
68 lines (59 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os, time, requests, subprocess
from crewai import Agent, Task, Crew, LLM
from crewai.tools import tool
from phoenix.otel import register
from openinference.instrumentation.openai import OpenAIInstrumentor
register(project_name="manifest-driven-sre", endpoint="http://localhost:6006/v1/traces")
OpenAIInstrumentor().instrument()
class K8sOrchestrator:
@tool("Kubernetes Manifest Tool")
def manage_manifest(action: str, file_path: str):
"""Action: 'apply' or 'delete'. file_path: path to the YAML file."""
cmd = ["kubectl", action, "-f", file_path]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return f"SUCCESS: {action} on {file_path}. {result.stdout}"
except Exception as e:
return f"ERROR: {e}"
@tool("Health Probe Tool")
def run_probes():
"""Hits the canary endpoint http://localhost:5003/health exactly 5 times."""
results = []
for i in range(5):
try:
res = requests.get("http://localhost:5003/health", timeout=2)
data = res.json()
results.append(f"Probe {i+1}: Node: {data.get('node')} | Status: {res.status_code}")
except:
results.append(f"Probe {i+1}: Connection Failed")
time.sleep(1)
return "\n".join(results)
def start_autonomous_rollout():
llm = LLM(model="ollama/tinyllama", base_url="http://localhost:11434", temperature=0.1)
k8s = K8sOrchestrator()
agent = Agent(
role='Autonomous Release Manager',
goal='Orchestrate canary rollouts using K8s manifests and rollback on failure.',
backstory=(
"You are a high-level orchestrator. You do not write code; you manage YAML files. "
"Your only metric for success is the HTTP status of the /health endpoint. "
"If any probe returns 500, delete the canary manifest immediately."
),
tools=[k8s.manage_manifest, k8s.run_probes],
llm=llm
)
t1 = Task(
description="Apply the canary manifest: 'k8s/canary.yaml'.",
agent=agent, expected_output="Confirmation of manifest application."
)
t2 = Task(
description=(
"Wait 15s for the pod to pull the image, then run 'Health Probe Tool'. "
"If any 500 status appears, 'delete' the 'k8s/canary.yaml' manifest. "
"If all are 200, conclude that the release is stable."
),
agent=agent, context=[t1], expected_output="Final RCA and Rollout Decision."
)
return Crew(agents=[agent], tasks=[t1, t2]).kickoff()
if __name__ == "__main__":
print(start_autonomous_rollout())