Skip to content

Commit 4803456

Browse files
authored
Merge branch 'master' into doc-improvements-prometheus-holmes
2 parents ed4f45e + f223c59 commit 4803456

10 files changed

Lines changed: 323 additions & 21 deletions

File tree

docs/_static/custom.css

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,37 @@ h3 {
298298
margin-bottom: 20px !important;
299299
}
300300

301+
/* Back to Top Button */
302+
.back-to-top {
303+
position: fixed;
304+
bottom: 30px;
305+
right: 30px;
306+
width: 50px;
307+
height: 50px;
308+
background-color: var(--md-primary-fg-color);
309+
color: white;
310+
border-radius: 50%;
311+
text-align: center;
312+
line-height: 50px;
313+
font-size: 20px;
314+
text-decoration: none;
315+
opacity: 0;
316+
visibility: hidden;
317+
z-index: 1000;
318+
transition: opacity 0.3s, visibility 0.3s;
319+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3);
320+
cursor: pointer;
321+
}
322+
323+
.back-to-top.visible {
324+
opacity: 0.7;
325+
visibility: visible;
326+
}
327+
328+
.back-to-top:hover {
329+
opacity: 1;
330+
}
331+
301332
.sd-sphinx-override p {
302333
margin-bottom: 0px;
303334
}

docs/_templates/base.html

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
{% extends '!base.html' %}
22
{% block site_meta %}
33
{{ super() }}
4+
<!-- Back to Top Button -->
5+
<a href="#" class="back-to-top" aria-label="Back to top">
6+
<span></span>
7+
</a>
48
<!-- PostHog and Segment -->
59
<script>
610
!function(t,e){var o,n,p,r;e.__SV||(window.posthog=e,e._i=[],e.init=function(i,s,a){function g(t,e){var o=e.split(".");2==o.length&&(t=t[o[0]],e=o[1]),t[e]=function(){t.push([e].concat(Array.prototype.slice.call(arguments,0)))}}(p=t.createElement("script")).type="text/javascript",p.async=!0,p.src=s.api_host+"/static/array.js",(r=t.getElementsByTagName("script")[0]).parentNode.insertBefore(p,r);var u=e;for(void 0!==a?u=e[a]=[]:a="posthog",u.people=u.people||[],u.toString=function(t){var e="posthog";return"posthog"!==a&&(e+="."+a),t||(e+=" (stub)"),e},u.people.toString=function(){return u.toString(1)+".people (stub)"},o="capture identify alias people.set people.set_once set_config register register_once unregister opt_out_capturing has_opted_out_capturing opt_in_capturing reset isFeatureEnabled onFeatureFlags".split(" "),n=0;n<o.length;n++)g(u,o[n]);e._i.push([i,s,a])},e.__SV=1)}(document,window.posthog||[]);
@@ -27,6 +31,26 @@
2731
document.getElementById('__palette_1').onclick = () => refreshRobustaLogo(true);
2832
document.getElementById('__palette_2').onclick = () => refreshRobustaLogo(false);
2933

34+
// Back to top button functionality
35+
const backToTopButton = document.querySelector('.back-to-top');
36+
37+
// Show button when page is scrolled down
38+
window.addEventListener('scroll', function() {
39+
if (window.pageYOffset > 300) {
40+
backToTopButton.classList.add('visible');
41+
} else {
42+
backToTopButton.classList.remove('visible');
43+
}
44+
});
45+
46+
// Smooth scroll to top when button is clicked
47+
backToTopButton.addEventListener('click', function(e) {
48+
e.preventDefault();
49+
window.scrollTo({
50+
top: 0,
51+
behavior: 'smooth'
52+
});
53+
});
3054
}, false);
3155
</script>
3256
{% endblock %}

docs/configuration/holmesgpt/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Robusta can integrate with `Holmes GPT <https://github.com/robusta-dev/holmesgpt
1818

1919
When available, AI based investigations can be launched in one of two ways:
2020

21-
1. Click the ``Ask Holmes`` button in Slack. The AI investigation will be sent back as a new message.
21+
1. Click the ``Ask HolmesGPT`` button in Slack. The AI investigation will be sent back as a new message.
2222

2323
.. image:: /images/robusta-holmes-investigation.png
2424
:width: 600px

docs/configuration/holmesgpt/toolsets/coralogix_logs.rst

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,6 @@ Configuration
4141
api_key: <your coralogix API key>
4242
domain: eu2.coralogix.com # Your Coralogix domain
4343
team_hostname: my-team # Your team's hostname in coralogix, without the domain part
44-
labels:
45-
pod: "kubernetes.pod_name"
46-
namespace: "kubernetes.namespace_name"
47-
app: "kubernetes.labels.app"
4844
4945
kubernetes/logs:
5046
enabled: false # Disable HolmesGPT's default logging mechanism
@@ -65,19 +61,61 @@ Configuration
6561
api_key: <your coralogix API key>
6662
domain: eu2.coralogix.com # Your Coralogix domain
6763
team_hostname: my-team # Your team's hostname in coralogix
68-
labels:
69-
pod: "kubernetes.pod_name"
70-
namespace: "kubernetes.namespace_name"
71-
app: "kubernetes.labels.app"
7264
7365
kubernetes/logs:
7466
enabled: false # Disable HolmesGPT's default logging mechanism
7567
68+
Advanced Configuration
69+
^^^^^^^^^^^^^^^^^^^^^^
70+
71+
**Frequent logs and archive**
72+
73+
By default, holmes fetched the logs from the `Frequent search <https://coralogix.com/docs/user-guides/account-management/tco-optimizer/logs/#frequent-search-data-high-priority>`_
74+
tier and only fetch logs from the `Archive` tier if the frequent search returned no result.
75+
76+
This behaviour can be customised using the ``logs_retrieval_methodology`` configuration field:
77+
78+
.. code-block:: yaml
79+
80+
toolsets:
81+
coralogix/logs:
82+
enabled: true
83+
config:
84+
# Possible values are:
85+
# - FREQUENT_SEARCH_ONLY
86+
# - ARCHIVE_ONLY
87+
# - ARCHIVE_FALLBACK <- default value
88+
# - FREQUENT_SEARCH_FALLBACK
89+
# - BOTH_FREQUENT_SEARCH_AND_ARCHIVE
90+
logs_retrieval_methodology: ARCHIVE_FALLBACK # default value
91+
...
92+
93+
Here is a description of each possible log retrieval methodology:
94+
95+
- **FREQUENT_SEARCH_ONLY** Always fetch logs using a frequent search.
96+
- **ARCHIVE_ONLY** Always fetch logs using the archive.
97+
- **ARCHIVE_FALLBACK** Use a frequent search first. If there are no results, fallback to searching archived logs. **This is the default behaviour.**
98+
- **FREQUENT_SEARCH_FALLBACK** Search logs in the archive first. If there are no results, fallback to searching the frequent logs.
99+
- **BOTH_FREQUENT_SEARCH_AND_ARCHIVE** Always use both the frequent search and the archive to fetch logs. The result contains merged data which is deduplicated and sorted by timestamp.
76100

77101
**Search labels**
78102

79-
You can tweak the labels used by the toolset to identify kubernetes resources. This is only needed if your
80-
logs settings for ``pod``, ``namespace``, and ``app`` differ from the defaults in the example above.
103+
You can tweak the labels used by the toolset to identify kubernetes resources. This is **optional** and only needed if your
104+
logs settings for ``pod``, ``namespace``, ``application`` and ``subsystem`` differ from the defaults in the example below.
105+
106+
.. code-block:: yaml
107+
108+
toolsets:
109+
coralogix/logs:
110+
enabled: true
111+
config:
112+
labels: # OPTIONAL: tweak the filters used by HolmesGPT if your coralogix configuration is non standard
113+
namespace: "kubernetes.namespace_name"
114+
pod: "kubernetes.pod_name"
115+
application: "coralogix.metadata.applicationName"
116+
subsystem: "coralogix.metadata.subsystemName"
117+
...
118+
81119
82120
You can verify what labels to use by attempting to run a query in the coralogix ui:
83121

@@ -111,5 +149,5 @@ Capabilities
111149

112150
* - Tool Name
113151
- Description
114-
* - coralogix_fetch_logs
115-
- Retrieve logs from Coralogix
152+
* - fetch_coralogix_logs_for_resource
153+
- Retrieve logs using coralogix

docs/configuration/holmesgpt/toolsets/robusta.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Robusta :checkmark:`_`
33
.. include:: ./_toolset_enabled_by_default.inc.rst
44

55
By enabling this toolset, HolmesGPT will be able to fetch alerts metadata. It allows HolmesGPT to fetch information
6-
about specific issues when chatting using "Ask Holmes". This toolset is not necessary for Root Cause Analysis.
6+
about specific issues when chatting using "Ask HolmesGPT". This toolset is not necessary for Root Cause Analysis.
77

88
Configuration
99
-------------

docs/configuration/sinks/Opsgenie.rst

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ We can add Prometheus alert labels into Opsgenie alert extra details by setting
1414
Configuring the OpsGenie sink
1515
------------------------------------------------
1616

17-
.. admonition:: Add this to your generated_values.yaml
17+
The OpsGenie sink supports static and dynamic team routing, optional fallback teams, tagging, and enrichment with Kubernetes labels.
18+
19+
.. admonition:: Add this to your `generated_values.yaml`
1820

1921
.. code-block:: yaml
2022
@@ -23,13 +25,32 @@ Configuring the OpsGenie sink
2325
name: ops_genie_sink
2426
api_key: OpsGenie integration API key # configured from OpsGenie team integration
2527
teams:
26-
- "noc"
27-
- "sre"
28+
- "noc" # Static team
29+
- "$labels.team" # Dynamic routing based on alert labels or annotations.
30+
# For example, if an alert subject has `team=infra`, it routes to the "infra" team.
31+
# Use $labels.<label_name> or $annotations.<label_name> as placeholders.
32+
default_team: "oncall" # Optional fallback team for Dynamic team routing
2833
tags:
2934
- "prod a"
3035
extra_details_labels: false # optional, default is false
3136
32-
Save the file and run
37+
In this example:
38+
39+
- Alerts will be routed to the **"noc"** team by default.
40+
- If the alert includes a **"team" label**, it will also be routed to the value of that label.
41+
- If the **"team" label is missing**, the alert will be routed to the **"oncall"** team as a fallback.
42+
- The tag **"prod a"** will be included with every alert.
43+
- Kubernetes labels **will not be added** to alert details, as `extra_details_labels` is set to `false`.
44+
45+
.. note::
46+
47+
- Dynamic team routing using label-based templates (e.g., ``$labels.team``) is only supported on **OpsGenie Standard** or **Enterprise** plans.
48+
If you are on a Free or Essentials plan, these templates will not function as expected.
49+
50+
- The API key must be from a **Global API Integration** for dynamic routing to work.
51+
If you use a **team-specific API key**, alerts will be routed **only to that team**.
52+
53+
Save the file and apply the configuration:
3354

3455
.. code-block:: bash
3556
:name: cb-add-opsgenie-sink

src/robusta/core/sinks/opsgenie/opsgenie_sink.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import opsgenie_sdk
55

66
from robusta.core.reporting.base import Enrichment, Finding, FindingSeverity
7+
from robusta.core.sinks.common.channel_transformer import ChannelTransformer
78
from robusta.core.sinks.opsgenie.opsgenie_sink_params import OpsGenieSinkConfigWrapper
89
from robusta.core.sinks.sink_base import SinkBase
910
from robusta.core.sinks.transformer import Transformer
@@ -22,9 +23,18 @@ def __init__(self, sink_config: OpsGenieSinkConfigWrapper, registry):
2223

2324
self.api_key = sink_config.opsgenie_sink.api_key
2425
self.teams = sink_config.opsgenie_sink.teams
26+
self.default_team = sink_config.opsgenie_sink.default_team
2527
self.tags = sink_config.opsgenie_sink.tags
2628
self.extra_details_labels = sink_config.opsgenie_sink.extra_details_labels
2729

30+
# Check for dangerous configuration
31+
has_templates = any("$" in team for team in self.teams)
32+
if has_templates and not self.default_team:
33+
logging.warning(
34+
"OpsGenie sink is configured with templated team names but no default_team specified. "
35+
"Alerts may fail to route if the required label or annotation is missing."
36+
)
37+
2838
opsgenie_sdk.configuration.Configuration.set_default(None)
2939
self.conf = opsgenie_sdk.configuration.Configuration()
3040
self.conf.api_key["Authorization"] = self.api_key
@@ -73,17 +83,55 @@ def __ack_alert(self, fingerprint: str, user: str, note: str):
7383
except opsgenie_sdk.ApiException as err:
7484
logging.error(f"Error acking opsGenie alert {fingerprint} {err}", exc_info=True)
7585

86+
def __get_teams(self, finding: Finding) -> List[str]:
87+
"""Get the list of teams for the alert, resolving any templates in the team names."""
88+
teams = []
89+
for team_template in self.teams:
90+
try:
91+
if "$" in team_template:
92+
# Only process templates that contain variables
93+
team = ChannelTransformer.template(
94+
team_template,
95+
self.default_team, # Use default_team as fallback
96+
self.cluster_name,
97+
finding.subject.labels,
98+
finding.subject.annotations,
99+
)
100+
if team and team not in teams: # Only add non-null, de-duped teams
101+
teams.append(team)
102+
else:
103+
# Use static team name directly
104+
if team_template not in teams: # Only add de-duped teams
105+
teams.append(team_template)
106+
except Exception as e:
107+
logging.warning(
108+
f"Failed to process team template {team_template} for alert subject {finding.service_key}: {e}"
109+
)
110+
continue
111+
112+
# If no teams were resolved and we have a default team, use it
113+
if not teams and self.default_team and self.default_team not in teams:
114+
teams.append(self.default_team)
115+
elif not teams and self.teams: # dynamic routing failed and no default team configured
116+
logging.warning(f"No valid teams resolved for finding {finding.title}. Alert may not be routed properly.")
117+
118+
return teams
119+
76120
def __open_alert(self, finding: Finding, platform_enabled: bool):
77121
description = self.__to_description(finding, platform_enabled)
78122
details = self.__to_details(finding)
79123
tags = self.tags.copy()
80124
tags.insert(0, self.cluster_name)
125+
126+
# Get teams based on templates
127+
teams = self.__get_teams(finding)
128+
81129
body = opsgenie_sdk.CreateAlertPayload(
82130
source="Robusta",
83131
message=finding.title,
84132
description=description,
85133
alias=finding.fingerprint,
86-
responders=[{"name": team, "type": "team"} for team in self.teams],
134+
responders=[{"name": team, "type": "team"} for team in teams],
87135
details=details,
88136
tags=tags,
89137
entity=finding.service_key,

src/robusta/core/sinks/opsgenie/opsgenie_sink_params.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
class OpsGenieSinkParams(SinkBaseParams):
88
api_key: str
99
teams: List[str] = []
10+
default_team: Optional[str] = None
1011
tags: List[str] = []
1112
host: Optional[str] = None # NOTE: If None, the default value will be used from opsgenie_sdk
1213
extra_details_labels: Optional[bool] = False

src/robusta/integrations/slack/sender.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def __create_holmes_callback(self, finding: Finding) -> CallbackBlock:
353353

354354
return CallbackBlock(
355355
{
356-
"Ask Holmes": CallbackChoice(
356+
"Ask HolmesGPT": CallbackChoice(
357357
action=ask_holmes,
358358
action_params=AIInvestigateParams(
359359
resource=resource, investigation_type="issue", ask="Why is this alert firing?", context=context

0 commit comments

Comments
 (0)