Skip to content

Commit 80b18a6

Browse files
authored
Remove Holmes workload health actions (#1998)
Signed-off-by: Codex <codex@openai.com>
1 parent 7c38202 commit 80b18a6

4 files changed

Lines changed: 0 additions & 211 deletions

File tree

helm/robusta/values.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,9 @@ lightActions:
9191
- prometheus_all_available_metrics
9292
- prometheus_get_series
9393
- prometheus_get_label_names
94-
- holmes_workload_health
9594
- holmes_conversation
9695
- holmes_issue_chat
9796
- holmes_chat
98-
- holmes_workload_chat
9997
- list_pods
10098
- kubectl_describe
10199
- fetch_resource_yaml

src/robusta/core/model/base_params.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -232,44 +232,6 @@ class HolmesConversationParams(HolmesParams):
232232
include_tool_call_results: bool = True
233233

234234

235-
class HolmesWorkloadHealthParams(HolmesParams):
236-
"""
237-
:var ask: Override question to ask holmes
238-
:var resource: The resource related to this investigation. A resource has a `name` and `kind`, and may have `namespace` and `node`
239-
:var alert_history: fetch historical alert data on the resource
240-
:var alert_history_since_hours: Timespan of historic data to use in hours. 24 by default.
241-
:var stored_instrucitons: Use remote instructions specified for the workload.
242-
:var instructions: List of extra instructions to supply.
243-
:var silent_healthy: Does not create findings in the case of healthy workload.
244-
245-
:example ask: What are all the issues in my cluster right now?
246-
"""
247-
248-
ask: Optional[str]
249-
resource: Optional[ResourceInfo] = ResourceInfo()
250-
alert_history: bool = True
251-
alert_history_since_hours: float = 24
252-
stored_instrucitons: bool = True
253-
instructions: List[str] = []
254-
include_tool_calls: bool = True
255-
include_tool_call_results: bool = True
256-
silent_healthy: bool = False
257-
258-
259-
class HolmesWorkloadHealthChatParams(HolmesParams):
260-
"""
261-
:var ask: User's prompt for holmes
262-
:var workload_health_result: Result from the workload health check
263-
:var resource: The resource related to the initial investigation
264-
:var conversation_history: List of previous user prompts and responses.
265-
"""
266-
267-
ask: str
268-
workload_health_result: HolmesInvestigationResult
269-
resource: ResourceInfo
270-
conversation_history: Optional[list[dict]] = None
271-
272-
273235
class NamespacedResourcesParams(ActionParams):
274236
"""
275237
:var name: Resource name

src/robusta/core/playbooks/internal/ai_integration.py

Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
HolmesChatParams,
1313
HolmesConversationParams,
1414
HolmesIssueChatParams,
15-
HolmesWorkloadHealthChatParams,
16-
HolmesWorkloadHealthParams,
1715
ResourceInfo,
1816
)
1917
from robusta.core.model.events import ExecutionBaseEvent
@@ -35,7 +33,6 @@
3533
HolmesRequest,
3634
HolmesResult,
3735
HolmesResultsBlock,
38-
HolmesWorkloadHealthRequest,
3936
)
4037
from robusta.core.reporting.utils import convert_svg_to_png
4138
from robusta.core.stream.utils import (
@@ -44,10 +41,6 @@
4441
parse_sse_data,
4542
StreamEvents,
4643
)
47-
from robusta.core.schedule.model import FixedDelayRepeat
48-
from robusta.integrations.kubernetes.autogenerated.events import (
49-
KubernetesAnyChangeEvent,
50-
)
5144
from robusta.integrations.prometheus.utils import HolmesDiscovery
5245
from robusta.utils.error_codes import ActionException, ErrorCodes
5346

@@ -173,71 +166,6 @@ def ask_holmes(event: ExecutionBaseEvent, params: AIInvestigateParams):
173166
handle_holmes_error(e)
174167

175168

176-
@action
177-
def holmes_workload_health(
178-
event: ExecutionBaseEvent, params: HolmesWorkloadHealthParams
179-
):
180-
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
181-
if not holmes_url:
182-
raise ActionException(
183-
ErrorCodes.HOLMES_DISCOVERY_FAILED,
184-
"Robusta couldn't connect to the Holmes client.",
185-
)
186-
187-
params.resource.cluster = event.get_context().cluster_name
188-
189-
try:
190-
result = requests.post(
191-
f"{holmes_url}/api/workload_health_check", data=params.json()
192-
)
193-
result.raise_for_status()
194-
195-
holmes_result = HolmesResult(**json.loads(result.text))
196-
197-
healthy = True
198-
try:
199-
analysis = json.loads(holmes_result.analysis)
200-
healthy = analysis.get("workload_healthy")
201-
except Exception:
202-
logging.exception(
203-
"Error in holmes response format, analysis did not return the expected json format."
204-
)
205-
pass
206-
207-
if params.silent_healthy and healthy:
208-
return
209-
210-
finding = Finding(
211-
title=f"AI Health check of {params.resource}",
212-
aggregation_key="HolmesHealthCheck",
213-
subject=FindingSubject(
214-
name=params.resource.name if params.resource else "",
215-
namespace=params.resource.namespace if params.resource else "",
216-
subject_type=(
217-
FindingSubjectType.from_kind(params.resource.kind)
218-
if params.resource
219-
else FindingSubjectType.TYPE_NONE
220-
),
221-
node=params.resource.node if params.resource else "",
222-
container=params.resource.container if params.resource else "",
223-
),
224-
finding_type=FindingType.AI_ANALYSIS,
225-
failure=False,
226-
)
227-
finding.add_enrichment(
228-
[HolmesResultsBlock(holmes_result=holmes_result)],
229-
enrichment_type=EnrichmentType.ai_analysis,
230-
)
231-
232-
event.add_finding(finding)
233-
except Exception as e:
234-
logging.exception(
235-
f"Failed to get holmes analysis for {params.resource}, {params.ask}",
236-
exc_info=True,
237-
)
238-
handle_holmes_error(e)
239-
240-
241169
def build_conversation_title(params: HolmesConversationParams) -> str:
242170
return (
243171
f"{params.resource}, {params.ask} for issue '{params.context.robusta_issue_id}'"
@@ -315,42 +243,6 @@ def holmes_conversation(event: ExecutionBaseEvent, params: HolmesConversationPar
315243
handle_holmes_error(e)
316244

317245

318-
class DelayedHealthCheckParams(HolmesWorkloadHealthParams):
319-
delay_seconds: int = 120
320-
321-
322-
@action
323-
def delayed_health_check(
324-
event: KubernetesAnyChangeEvent, action_params: DelayedHealthCheckParams
325-
):
326-
"""
327-
runs a holmes workload health action with a delay
328-
"""
329-
metadata = event.obj and event.obj.metadata
330-
331-
if not action_params.ask:
332-
action_params.ask = f"help me diagnose an issue with a workload {metadata.namespace}/{event.obj.kind}/{metadata.name} running in my Kubernetes cluster. Can you assist with identifying potential issues and pinpoint the root cause."
333-
334-
action_params.resource = ResourceInfo(
335-
name=metadata.name, namespace=metadata.namespace, kind=event.obj.kind
336-
)
337-
338-
logging.info(
339-
f"Scheduling health check. {metadata.name} delays: {action_params.delay_seconds}"
340-
)
341-
event.get_scheduler().schedule_action(
342-
action_func=holmes_workload_health,
343-
task_id=f"health_check_{metadata.name}_{metadata.namespace}",
344-
scheduling_params=FixedDelayRepeat(
345-
repeat=1, seconds_delay=action_params.delay_seconds
346-
),
347-
named_sinks=event.named_sinks,
348-
action_params=action_params,
349-
replace_existing=True,
350-
standalone_task=True,
351-
)
352-
353-
354246
@action
355247
def holmes_issue_chat(event: ExecutionBaseEvent, params: HolmesIssueChatParams):
356248
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
@@ -493,64 +385,6 @@ def holmes_chat(event: ExecutionBaseEvent, params: HolmesChatParams):
493385
handle_holmes_error(e)
494386

495387

496-
@action
497-
def holmes_workload_chat(
498-
event: ExecutionBaseEvent, params: HolmesWorkloadHealthChatParams
499-
):
500-
holmes_url = HolmesDiscovery.find_holmes_url(params.holmes_url)
501-
if not holmes_url:
502-
raise ActionException(
503-
ErrorCodes.HOLMES_DISCOVERY_FAILED,
504-
"Robusta couldn't connect to the Holmes client.",
505-
)
506-
507-
try:
508-
holmes_req = HolmesWorkloadHealthRequest(
509-
ask=params.ask,
510-
conversation_history=params.conversation_history,
511-
workload_health_result=params.workload_health_result,
512-
resource=params.resource,
513-
model=params.model,
514-
)
515-
result = requests.post(
516-
f"{holmes_url}/api/workload_health_chat", data=holmes_req.json()
517-
)
518-
result.raise_for_status()
519-
520-
holmes_result = HolmesChatResult(**json.loads(result.text))
521-
522-
finding = Finding(
523-
title=f"AI Chat for Health Check of {params.resource}",
524-
aggregation_key="HolmesWorkloadConversationResult",
525-
subject=FindingSubject(
526-
name=params.resource.name if params.resource else "",
527-
namespace=params.resource.namespace if params.resource else "",
528-
subject_type=(
529-
FindingSubjectType.from_kind(params.resource.kind)
530-
if params.resource
531-
else FindingSubjectType.TYPE_NONE
532-
),
533-
node=params.resource.node if params.resource else "",
534-
container=params.resource.container if params.resource else "",
535-
),
536-
finding_type=FindingType.AI_ANALYSIS,
537-
failure=False,
538-
)
539-
finding.add_enrichment(
540-
[HolmesChatResultsBlock(holmes_result=holmes_result)],
541-
enrichment_type=EnrichmentType.ai_analysis,
542-
)
543-
544-
event.add_finding(finding)
545-
546-
except Exception as e:
547-
logging.exception(
548-
f"Failed to get holmes chat for health check of {params.resource}",
549-
exc_info=True,
550-
)
551-
handle_holmes_error(e)
552-
553-
554388
def stream_and_render_graphs(url, holmes_req, event):
555389
with requests.post(
556390
url,

src/robusta/core/reporting/holmes.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,3 @@ class HolmesChatResult(BaseModel):
9393

9494
class HolmesChatResultsBlock(BaseBlock):
9595
holmes_result: Optional[HolmesChatResult]
96-
97-
98-
class HolmesWorkloadHealthRequest(HolmesChatRequest):
99-
workload_health_result: HolmesInvestigationResult
100-
resource: ResourceInfo

0 commit comments

Comments
 (0)