Skip to content

Commit 5c28656

Browse files
committed
Merge branch 'master' into ROB-746-get-resource-yaml
2 parents 85f68ca + e3c320d commit 5c28656

10 files changed

Lines changed: 240 additions & 31 deletions

File tree

docs/configuration/holmesgpt/builtin_toolsets.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Builtin Toolsets
99
toolsets/argocd
1010
toolsets/aws
1111
toolsets/confluence
12+
toolsets/datetime
1213
toolsets/docker
1314
toolsets/grafanaloki
1415
toolsets/grafanatempo
@@ -17,6 +18,7 @@ Builtin Toolsets
1718
toolsets/kafka
1819
toolsets/kubernetes
1920
toolsets/opensearch
21+
toolsets/prometheus
2022
toolsets/robusta
2123
toolsets/slab
2224

@@ -50,6 +52,11 @@ by the user by providing credentials or API keys to external systems.
5052
:link: toolsets/confluence
5153
:link-type: doc
5254

55+
.. grid-item-card:: :octicon:`cpu;1em;` Datetime
56+
:class-card: sd-bg-light sd-bg-text-light
57+
:link: toolsets/datetime
58+
:link-type: doc
59+
5360
.. grid-item-card:: :octicon:`cpu;1em;` Docker
5461
:class-card: sd-bg-light sd-bg-text-light
5562
:link: toolsets/docker
@@ -79,11 +86,17 @@ by the user by providing credentials or API keys to external systems.
7986
:class-card: sd-bg-light sd-bg-text-light
8087
:link: toolsets/kubernetes
8188
:link-type: doc
89+
8290
.. grid-item-card:: :octicon:`cpu;1em;` OpenSearch
8391
:class-card: sd-bg-light sd-bg-text-light
8492
:link: toolsets/opensearch
8593
:link-type: doc
8694

95+
.. grid-item-card:: :octicon:`cpu;1em;` Prometheus
96+
:class-card: sd-bg-light sd-bg-text-light
97+
:link: toolsets/prometheus
98+
:link-type: doc
99+
87100
.. grid-item-card:: :octicon:`cpu;1em;` Robusta
88101
:class-card: sd-bg-light sd-bg-text-light
89102
:link: toolsets/robusta
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
Datetime :checkmark:`_`
2+
=======================
3+
.. include:: ./_toolset_enabled_by_default.inc.rst
4+
5+
By enabling this toolset, HolmesGPT will be able to get the current UTC date and time.
6+
This feature works well with other toolsets. For example, the :doc:`prometheus <prometheus>`
7+
toolset needs ``start`` and ``end`` time parameters to properly create and execute
8+
PromQL queries.
9+
10+
Configuration
11+
-------------
12+
13+
.. code-block:: yaml
14+
15+
holmes:
16+
toolsets:
17+
datetime:
18+
enabled: true
19+
20+
.. include:: ./_toolset_configuration.inc.rst
21+
22+
Capabilities
23+
------------
24+
.. include:: ./_toolset_capabilities.inc.rst
25+
26+
.. list-table::
27+
:header-rows: 1
28+
:widths: 30 70
29+
30+
* - Tool Name
31+
- Description
32+
* - get_current_time
33+
- Return current time information. Useful to build queries that require a time information
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
Prometheus
2+
==========
3+
4+
By enabling this toolset, HolmesGPT will be able to generate graphs from prometheus metrics as well as help you write and
5+
validate prometheus queries.
6+
7+
There is also an option for Holmes to analyze prometheus metrics. When enabled, HolmesGPT can detect memory leak patterns,
8+
CPU throttling, high latency for your APIs, etc. The configuration field to enable prometheus metrics analysis is
9+
``tool_calls_return_data``.
10+
11+
Configuration
12+
-------------
13+
14+
.. md-tab-set::
15+
16+
.. md-tab-item:: Robusta Helm Chat
17+
18+
.. code-block:: yaml
19+
20+
holmes:
21+
toolsets:
22+
prometheus/metrics:
23+
enabled: true
24+
config:
25+
prometheus_url: ...
26+
metrics_labels_time_window_hrs: 48 # default value
27+
metrics_labels_cache_duration_hrs: 12 # default value
28+
fetch_labels_with_labels_api: false # default value
29+
fetch_metadata_with_series_api: false # default value
30+
tool_calls_return_data: false # default value
31+
headers:
32+
Authorization: "Basic <base_64_encoded_string>"
33+
34+
35+
.. include:: ./_toolset_configuration.inc.rst
36+
37+
.. md-tab-item:: Holmes CLI
38+
39+
Add the following to **~/.holmes/config.yaml**, creating the file if it doesn't exist:
40+
41+
.. code-block:: yaml
42+
43+
toolsets:
44+
prometheus/metrics:
45+
enabled: true
46+
config:
47+
prometheus_url: ...
48+
metrics_labels_time_window_hrs: 48 # default value
49+
metrics_labels_cache_duration_hrs: 12 # default value
50+
fetch_labels_with_labels_api: false # default value
51+
fetch_metadata_with_series_api: false # default value
52+
tool_calls_return_data: false # default value
53+
headers:
54+
Authorization: "Basic <base_64_encoded_string>"
55+
56+
It is also possible to set the ``PROMETHEUS_URL`` environment variable instead of the above ``prometheus_url`` config key.
57+
58+
Prior to generating a PromQL query, HolmesQPT tends to list the available metrics. This is done to ensure the metrics used
59+
in PromQL are actually available.
60+
61+
Below is the full list of options for this toolset:
62+
63+
- **metrics_labels_time_window_hrs** Represents the time window, in hours, over which labels are fetched. This avoids fetching obsolete labels. Set it to ``null`` to let HolmesGPT fetch labels regardless of when they were generated.
64+
- **metrics_labels_cache_duration_hrs** How long are labels cached, in hours. Set it to ``null`` to disable caching.
65+
- **fetch_labels_with_labels_api** Uses prometheus `labels API <https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names>`_ to fetch labels instead of the `series API <https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers>`_. In some cases setting to True can improve the performance of the toolset, however there will be an increased number of HTTP calls to prometheus. You can experiment with both as they are functionally identical.
66+
- **fetch_metadata_with_series_api** Uses the `series API <https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers>`_ instead of the `metadata API <https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata>`_. You should only set this value to `true` if the metadata API is disabled or not working. HolmesGPT's ability to select the right metric will be negatively impacted because the series API does not return key metadata like the metrics/series description or their type (gauge, histogram, etc.).
67+
- **tool_calls_return_data** Experimental. If true, the prometheus data will be available to HolmesGPT. In some cases, HolmesGPT will be able to detect memory leaks or other anomalies. This is disabled by default to reduce the likelyhood of reaching the input token limit.
68+
- **headers** Extra headers to pass to all prometheus http requests. Use this to pass authentication. Prometheus `supports basic authentication <https://prometheus.io/docs/guides/basic-auth/>`_.
69+
70+
Capabilities
71+
------------
72+
.. include:: ./_toolset_capabilities.inc.rst
73+
74+
.. list-table::
75+
:header-rows: 1
76+
:widths: 30 70
77+
78+
* - Tool Name
79+
- Description
80+
* - list_available_metrics
81+
- List all the available metrics to query from prometheus, including their types (counter, gauge, histogram, summary) and available labels.
82+
* - execute_prometheus_instant_query
83+
- Execute an instant PromQL query
84+
* - execute_prometheus_range_query
85+
- Execute a PromQL range query

src/robusta/core/model/base_params.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class AIInvestigateParams(HolmesParams):
111111
ask: Optional[str]
112112
context: Optional[Dict[str, Any]]
113113
sections: Optional[Dict[str, str]] = None
114+
stream: bool = False
114115

115116

116117
class HolmesToolsResult(BaseModel):

src/robusta/core/model/events.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections import defaultdict
55
from dataclasses import dataclass, field
66
from enum import Enum
7-
from typing import Any, Dict, List, Optional
7+
from typing import Any, Dict, List, Optional, Callable
88

99
from pydantic import BaseModel
1010

@@ -59,6 +59,7 @@ class ExecutionBaseEvent:
5959
_scheduler: Optional[PlaybooksScheduler] = None
6060
_context: Optional[ExecutionContext] = None
6161
_event_emitter: Optional[EventEmitter] = None
62+
_ws: Optional[Callable[[str], None]] = None
6263

6364
def set_context(self, context: ExecutionContext):
6465
self._context = context

src/robusta/core/playbooks/internal/ai_integration.py

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,35 +62,43 @@ def ask_holmes(event: ExecutionBaseEvent, params: AIInvestigateParams):
6262
include_tool_call_results=True,
6363
sections=params.sections
6464
)
65-
result = requests.post(f"{holmes_url}/api/investigate", data=holmes_req.json())
66-
result.raise_for_status()
6765

68-
holmes_result = HolmesResult(**json.loads(result.text))
69-
title_suffix = (
70-
f" on {params.resource.name}"
71-
if params.resource and params.resource.name and params.resource.name.lower() != "unresolved"
72-
else ""
73-
)
74-
75-
kind = params.resource.kind if params.resource else None
76-
finding = Finding(
77-
title=f"AI Analysis of {investigation__title}{title_suffix}",
78-
aggregation_key="HolmesInvestigationResult",
79-
subject=FindingSubject(
80-
name=params.resource.name if params.resource else "",
81-
namespace=params.resource.namespace if params.resource else "",
82-
subject_type=FindingSubjectType.from_kind(kind) if kind else FindingSubjectType.TYPE_NONE,
83-
node=params.resource.node if params.resource else "",
84-
container=params.resource.container if params.resource else "",
85-
),
86-
finding_type=FindingType.AI_ANALYSIS,
87-
failure=False,
88-
)
89-
finding.add_enrichment(
90-
[HolmesResultsBlock(holmes_result=holmes_result)], enrichment_type=EnrichmentType.ai_analysis
91-
)
66+
if params.stream:
67+
with requests.post(f"{holmes_url}/api/stream/investigate", data=holmes_req.json(), stream=True) as resp:
68+
for line in resp.iter_content(chunk_size=None, decode_unicode=True): # Avoid streaming chunks from holmes. send them as they arrive.
69+
event.ws(data=line)
70+
return
9271

93-
event.add_finding(finding)
72+
else:
73+
result = requests.post(f"{holmes_url}/api/investigate", data=holmes_req.json())
74+
result.raise_for_status()
75+
76+
holmes_result = HolmesResult(**json.loads(result.text))
77+
title_suffix = (
78+
f" on {params.resource.name}"
79+
if params.resource and params.resource.name and params.resource.name.lower() != "unresolved"
80+
else ""
81+
)
82+
83+
kind = params.resource.kind if params.resource else None
84+
finding = Finding(
85+
title=f"AI Analysis of {investigation__title}{title_suffix}",
86+
aggregation_key="HolmesInvestigationResult",
87+
subject=FindingSubject(
88+
name=params.resource.name if params.resource else "",
89+
namespace=params.resource.namespace if params.resource else "",
90+
subject_type=FindingSubjectType.from_kind(kind) if kind else FindingSubjectType.TYPE_NONE,
91+
node=params.resource.node if params.resource else "",
92+
container=params.resource.container if params.resource else "",
93+
),
94+
finding_type=FindingType.AI_ANALYSIS,
95+
failure=False,
96+
)
97+
finding.add_enrichment(
98+
[HolmesResultsBlock(holmes_result=holmes_result)], enrichment_type=EnrichmentType.ai_analysis
99+
)
100+
101+
event.add_finding(finding)
94102

95103
except Exception as e:
96104
logging.exception(

src/robusta/core/playbooks/playbooks_event_handler.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Callable, Dict, List, Optional
33

44
from robusta.core.model.events import ExecutionBaseEvent
55
from robusta.core.playbooks.base_trigger import TriggerEvent
@@ -39,6 +39,13 @@ def run_external_action(
3939
"""Execute an external action"""
4040
pass
4141

42+
@abstractmethod
43+
def run_external_stream_action(
44+
self, action_name: str, action_params: Optional[dict], stream: Callable[str, Optional[str]]
45+
) -> Optional[Dict[str, Any]]:
46+
"""Execute an external stream action"""
47+
pass
48+
4249
@abstractmethod
4350
def get_global_config(self) -> dict:
4451
"""Return runner global config"""

src/robusta/core/playbooks/playbooks_event_handler_impl.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def __run_playbook_actions(
197197
start_time = time.time()
198198
source: str = (
199199
"manual_action"
200-
if any(name == SYNC_RESPONSE_SINK for name in getattr(execution_event, "named_sinks", []))
200+
if any(name == SYNC_RESPONSE_SINK for name in (execution_event.named_sinks or []))
201201
else ""
202202
)
203203
self.__prepare_execution_event(execution_event)
@@ -368,3 +368,37 @@ def handle_sigint(self, sig, frame):
368368

369369
self.set_cluster_active(False)
370370
sys.exit(0)
371+
372+
def run_external_stream_action(
373+
self, action_name: str, action_params: Optional[dict], ws
374+
) -> Optional[Dict[str, Any]]:
375+
action_def = self.registry.get_actions().get_action(action_name)
376+
if not action_def:
377+
return self.__error_resp(f"External action not found {action_name}", ErrorCodes.ACTION_NOT_FOUND.value)
378+
379+
if not action_def.from_params_func:
380+
return self.__error_resp(
381+
f"Action {action_name} cannot run using external event", ErrorCodes.NOT_EXTERNAL_ACTION.value
382+
)
383+
384+
try:
385+
instantiation_params = action_def.from_params_parameter_class(**action_params)
386+
except Exception:
387+
return self.__error_resp(
388+
f"Failed to create execution instance for"
389+
f" {action_name} {action_def.from_params_parameter_class}"
390+
f" {action_params} {traceback.format_exc()}",
391+
ErrorCodes.EVENT_PARAMS_INSTANTIATION_FAILED.value,
392+
)
393+
394+
execution_event = action_def.from_params_func(instantiation_params)
395+
if not execution_event:
396+
return self.__error_resp(
397+
f"Failed to create execution event for {action_name} {action_params}",
398+
ErrorCodes.EVENT_INSTANTIATION_FAILED.value,
399+
)
400+
401+
execution_event.ws = ws
402+
playbook_action = PlaybookAction(action_name=action_name, action_params=action_params)
403+
404+
return self.__run_playbook_actions(execution_event, [playbook_action])

src/robusta/core/reporting/action_requests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ExternalActionRequest(BaseModel):
2727
partial_auth_b: str = "" # Auth for public key auth protocol option - should be added by the relay
2828
request_id: str = "" # If specified, should return a sync response using the specified request_id
2929
no_sinks: bool = False # Indicates not to send to sinks at all. The request body has a sink list,
30+
stream: bool = False
3031
# however an empty sink list means using the server default sinks
3132

3233

src/robusta/integrations/receiver.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ def stop(self):
136136
def __sync_response(cls, status_code: int, request_id: str, data) -> Dict:
137137
return {"action": "response", "request_id": request_id, "status_code": status_code, "data": data}
138138

139+
def __stream_response(self, request_id: str, data: str):
140+
self.ws.send(data=json.dumps({"action": "stream", "request_id": request_id, "data": data}))
141+
142+
def __close_stream_response(self, request_id: str, data: str):
143+
self.ws.send(data=json.dumps({"action": "stream", "request_id": request_id, "data": data, "close": True}))
144+
139145
def __exec_external_request(self, action_request: ExternalActionRequest, validate_timestamp: bool):
140146
logging.debug(f"Callback `{action_request.body.action_name}` {to_safe_str(action_request.body.action_params)}")
141147
sync_response = action_request.request_id != "" # if request_id is set, we need to write back the response
@@ -175,6 +181,23 @@ def __exec_external_request(self, action_request: ExternalActionRequest, validat
175181
http_code = 200 if response.get("success") else 500
176182
self.ws.send(data=json.dumps(self.__sync_response(http_code, action_request.request_id, response)))
177183

184+
def __exec_external_stream_request(self, action_request: ExternalActionRequest, validate_timestamp: bool):
185+
logging.debug(f"Callback `{action_request.body.action_name}` {to_safe_str(action_request.body.action_params)}")
186+
187+
validation_response = self.__validate_request(action_request, validate_timestamp)
188+
if validation_response.http_code != 200:
189+
req_json = action_request.json(exclude={"body"})
190+
body_json = action_request.body.json(exclude={"action_params"}) # action params already printed above
191+
logging.error(f"Failed to validate action request {req_json} {body_json}")
192+
self.__close_stream_response(action_request.request_id, validation_response.dict(exclude={"http_code"}))
193+
return
194+
195+
res = self.event_handler.run_external_stream_action(action_request.body.action_name,
196+
action_request.body.action_params,
197+
lambda data: self.__stream_response(request_id=action_request.request_id, data=data))
198+
res = "" if res.get("success") else json.dumps(res)
199+
self.__close_stream_response(action_request.request_id, res)
200+
178201
def _process_action(self, action: ExternalActionRequest, validate_timestamp: bool) -> None:
179202
self._executor.submit(self._process_action_sync, action, validate_timestamp)
180203

@@ -189,7 +212,10 @@ def _process_action_sync(self, action: ExternalActionRequest, validate_timestamp
189212
else:
190213
ctx = nullcontext()
191214
with ctx:
192-
self.__exec_external_request(action, validate_timestamp)
215+
if action.stream:
216+
self.__exec_external_stream_request(action, validate_timestamp)
217+
else:
218+
self.__exec_external_request(action, validate_timestamp)
193219
except Exception:
194220
logging.error(
195221
f"Failed to run incoming event {self._stringify_incoming_event(action.dict())}",

0 commit comments

Comments
 (0)