Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@

Comment thread
albarker-rh marked this conversation as resolved.
TEAM_MARKERS = {
"chaos": ["chaos", "deprecated_api"],
"virt": ["virt", "deprecated_api"],
"network": ["network", "deprecated_api"],
"storage": ["storage", "deprecated_api"],
"iuo": ["install_upgrade_operators", "deprecated_api"],
"observability": ["observability", "deprecated_api"],
"infrastructure": ["infrastructure", "deprecated_api"],
"data_protection": ["data_protection", "deprecated_api"],
"virt": ["virt", "deprecated_api", "post_test_alerts"],
"network": ["network", "deprecated_api", "post_test_alerts"],
"storage": ["storage", "deprecated_api", "post_test_alerts"],
"iuo": ["install_upgrade_operators", "deprecated_api", "post_test_alerts"],
"observability": ["observability", "deprecated_api", "post_test_alerts"],
"infrastructure": ["infrastructure", "deprecated_api", "post_test_alerts"],
"data_protection": ["data_protection", "deprecated_api", "post_test_alerts"],
}
NAMESPACE_COLLECTION = {
"storage": [NamespacesNames.OPENSHIFT_STORAGE],
Expand Down Expand Up @@ -129,6 +129,7 @@ def pytest_addoption(parser):
ci_group = parser.getgroup(name="CI")
component_sanity_group = parser.getgroup(name="ComponentSanity")
ai_insights_group = parser.getgroup(name="ai-job-insight")
post_test_alerts_group = parser.getgroup(name="PostTestAlerts")

# Upgrade addoption
install_upgrade_group.addoption(
Expand Down Expand Up @@ -298,6 +299,13 @@ def pytest_addoption(parser):
action="store_true",
)

# Post test alerts group
post_test_alerts_group.addoption(
"--skip-post-test-alerts",
help="By default test_no_critical_alerts_after_tests will always run, pass this flag to skip it",
action="store_true",
)

# LeftoversCollector group
leftovers_collector.addoption(
"--leftovers-collector",
Expand Down Expand Up @@ -550,6 +558,20 @@ def filter_deprecated_api_tests(items: list[Item], config: Config) -> list[Item]
return items


def filter_post_test_alerts_tests(items: list[Item], config: Config) -> list[Item]:
# filter out post test alerts tests, if explicitly asked or if running upgrade/install tests
if (
config.getoption("--skip-post-test-alerts")
or config.getoption("--install")
or config.getoption("--upgrade")
or config.getoption("--upgrade_custom")
):
discard_tests, items_to_return = remove_tests_from_list(items=items, filter_str="post_test_alerts")
config.hook.pytest_deselected(items=discard_tests)
return items_to_return
return items


def filter_sno_only_tests(items: list[Item], config: Config) -> list[Item]:
if config.getoption("-m") and "sno" not in config.getoption("-m"):
discard_tests, items_to_return = remove_tests_from_list(items=items, filter_str="single_node_tests")
Expand All @@ -570,12 +592,19 @@ def remove_tests_from_list(items: list[Item], filter_str: str) -> tuple[list[Ite


def pytest_configure(config):
config._test_execution_start_time = datetime.datetime.now(tz=datetime.UTC)

# test_deprecation_audit_logs should always run regardless the path that passed to pytest.
deprecation_tests_dir_path = "tests/deprecated_api"
file_or_dir = config.option.file_or_dir
if file_or_dir and deprecation_tests_dir_path not in file_or_dir and file_or_dir != ["tests"]:
config.option.file_or_dir.append(deprecation_tests_dir_path)

# post_test_alerts tests should always run regardless the path that passed to pytest.
post_test_alerts_dir_path = "tests/post_test_alerts"
if file_or_dir and post_test_alerts_dir_path not in file_or_dir and file_or_dir != ["tests"]:
config.option.file_or_dir.append(post_test_alerts_dir_path)

if conformance_storage_class := config.getoption("conformance_storage_class"):
py_config["storage_class_matrix"] = StorageClassConfig(
name=conformance_storage_class
Expand Down Expand Up @@ -634,6 +663,7 @@ def pytest_collection_modifyitems(session, config, items):
if discard:
config.hook.pytest_deselected(items=discard)
items[:] = filter_deprecated_api_tests(items=items, config=config)
items[:] = filter_post_test_alerts_tests(items=items, config=config)
items[:] = filter_sno_only_tests(items=items, config=config)
items[:] = mark_nmstate_dependent_tests(items=items)

Expand Down
Empty file.
55 changes: 55 additions & 0 deletions tests/post_test_alerts/test_post_test_alerts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
Post-test alerts verification.

Verifies that critical CNV alerts were not triggered during test execution.

Jira: https://redhat.atlassian.net/browse/CNV-80353
"""
Comment thread
coderabbitai[bot] marked this conversation as resolved.

import datetime
import logging

import pytest

LOGGER = logging.getLogger(__name__)

POST_TEST_CRITICAL_ALERTS = [

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why those alerts specifically? did you investigate customer tickets?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i looked at customer tickets and worked with claude to identify which alerts could be potential issues in the future

"LowVirtControllersCount",
"LowVirtAPICount",
"KubeVirtCRModified",
"VirtControllerRESTErrorsHigh",
"VirtHandlerRESTErrorsHigh",
"HCOOperatorConditionsUnhealthy",
]

ALERTS_REGEX = "|".join(POST_TEST_CRITICAL_ALERTS)


@pytest.mark.s390x
@pytest.mark.polarion("CNV-16276")
@pytest.mark.order("last")
def test_no_critical_alerts_after_tests(prometheus, request):
"""

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test is not following the jira ticket description. more specifically:

The test should verify that these alrest were not triggered during tests execution.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, fixed the test

Test that critical CNV alerts were not triggered during test execution.

Preconditions:
- Prometheus is accessible on the cluster
- Test execution completed

Steps:
1. Query Prometheus for alerts that fired at any point during test execution
2. Check that none of the critical alerts were triggered

Expected:
- None of the critical alerts fired during test execution
"""
start_time = request.config._test_execution_start_time
duration_seconds = int((datetime.datetime.now(tz=datetime.UTC) - start_time).total_seconds())
Comment on lines +46 to +47

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

LOW: Clamp the lookback duration to at least 1 second.

At Line [47], int(...total_seconds()) can truncate to 0 on short runs, creating a zero-length lookback and potentially missing alerts from the run window. Clamp it to a minimum of 1 second.

Suggested fix
-    duration_seconds = int((datetime.datetime.now(tz=datetime.UTC) - start_time).total_seconds())
+    duration_seconds = max(1, int((datetime.datetime.now(tz=datetime.UTC) - start_time).total_seconds()))
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
start_time = request.config._test_execution_start_time
duration_seconds = int((datetime.datetime.now(tz=datetime.UTC) - start_time).total_seconds())
start_time = request.config._test_execution_start_time
duration_seconds = max(1, int((datetime.datetime.now(tz=datetime.UTC) - start_time).total_seconds()))
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/post_test_alerts/test_post_test_alerts.py` around lines 46 - 47, The
duration_seconds calculation on line 47 can truncate to zero on very short test
runs, which causes the lookback window for alerts to be zero-length and miss
valid alerts. Modify the calculation where duration_seconds is assigned to clamp
the result to a minimum value of 1 second, ensuring there is always at least a
1-second lookback window even for short test executions.

LOGGER.info(
f"Checking {len(POST_TEST_CRITICAL_ALERTS)} critical alerts"
f" were not triggered during test execution (last {duration_seconds}s)"
)
query = f'ALERTS{{alertname=~"{ALERTS_REGEX}", alertstate="firing"}}[{duration_seconds}s]'
results = prometheus.query_sampler(query=query)
fired_alerts = {result["metric"]["alertname"]: result for result in results}
assert not fired_alerts, f"Critical alerts fired during test execution.\n{fired_alerts}"