Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions integration_tests/tests/adapter_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any:
* Everything else is returned unchanged.
"""
if isinstance(val, Decimal):
# Match the Jinja macro: normalize, then int or float
# Match the Jinja macro: normalize, then int or float.
# Note: for special values (Infinity, NaN), as_tuple().exponent is a
# string ('F' or 'n'), not an int — convert those directly to float.
normalized = val.normalize()
if normalized.as_tuple().exponent >= 0:
exponent = normalized.as_tuple().exponent
if isinstance(exponent, str):
return float(normalized)
if exponent >= 0:
return int(normalized)
return float(normalized)
if isinstance(val, (datetime, date, time)):
Expand Down
80 changes: 80 additions & 0 deletions integration_tests/tests/test_dimension_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,83 @@ def test_anomaly_in_detection_period(
)

assert test_result["status"] == expected_status


def test_dimension_anomalies_alert_description_few_failures(
test_id: str, dbt_project: DbtProject
):
"""When ≤5 dimension values fail, description shows each one's anomaly details."""
utc_today = datetime.utcnow().date()
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))

# 3 dimension values all spike on test_date (training: 1/day, test: 10/day)
anomalous_dimensions = ["Batman", "Superman", "Spiderman"]

data: List[Dict[str, Any]] = [
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
for hero in anomalous_dimensions
for _ in range(10)
]
data += [
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
for cur_date in training_dates
for hero in anomalous_dimensions
]

test_args = {
"timestamp_column": TIMESTAMP_COLUMN,
"dimensions": ["superhero"],
"sensitivity": 2,
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"

description = test_result["test_results_description"]
# Each failing dimension value should appear in the description
for hero in anomalous_dimensions:
assert hero in description, f"Expected '{hero}' in description: {description}"
# Should NOT show the high-volume summary message
assert "dimension values are anomalous" not in description


def test_dimension_anomalies_alert_description_many_failures(
test_id: str, dbt_project: DbtProject
):
"""When >5 dimension values fail, description shows a count summary."""
utc_today = datetime.utcnow().date()
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))

# 6 dimension values all spike on test_date (>5 threshold)
anomalous_dimensions = [
"Batman",
"Superman",
"Spiderman",
"IronMan",
"Thor",
"Hulk",
]

data: List[Dict[str, Any]] = [
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero}
for hero in anomalous_dimensions
for _ in range(10)
]
data += [
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero}
for cur_date in training_dates
for hero in anomalous_dimensions
]

test_args = {
"timestamp_column": TIMESTAMP_COLUMN,
"dimensions": ["superhero"],
"sensitivity": 2,
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"

description = test_result["test_results_description"]
# Should show the count summary for many failures
assert (
"dimension values are anomalous" in description
), f"Expected summary message in description: {description}"
Original file line number Diff line number Diff line change
Expand Up @@ -74,23 +74,37 @@
and upper(column_name) = upper({{ elementary.const_as_string(column_name) }})
{%- endif %}
{%- endset -%}
{% set test_results_description %}
{% if rows_with_score %}
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }}
{% else %}
Not enough data to calculate anomaly score.
{% endif %}
{% endset %}
{% set failures = namespace(data=0) %}
{% set filtered_anomaly_scores_rows = [] %}
{% set anomalous_rows = [] %}
{% for row in anomaly_scores_rows %}
{% if row.anomaly_score is not none %}
{% do filtered_anomaly_scores_rows.append(row) %}
{% if row.is_anomalous %}
{% set failures.data = failures.data + 1 %}
{% do anomalous_rows.append(row) %}
{% endif %}
{% endif %}
{% endfor %}
{%- set max_dimension_alerts = 5 -%}
{% set test_results_description %}
{%- if rows_with_score -%}
{%- set sample_row = rows_with_score[-1] -%}
{%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%}
{%- if row_dimension is not none and anomalous_rows | length > 0 -%}
{%- if anomalous_rows | length > max_dimension_alerts -%}
{%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%}
{{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more.
{%- else -%}
{% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %}
{%- endif -%}
{%- else -%}
{{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }}
{%- endif -%}
{%- else -%}
Not enough data to calculate anomaly score.
{%- endif -%}
{% endset %}
{% set test_result_dict = {
"id": elementary.insensitive_get_dict_value(latest_row, "id"),
"data_issue_id": elementary.insensitive_get_dict_value(
Expand Down
Loading