Fix test result sample count differing from failure count (#933)

MikaKerman · claude · devin-ai-integration[bot] · web-flow · commit 8040a76b88f9 · 2026-03-12T14:36:37.000+02:00
* Fix test result sample count exceeding failure count for non-deterministic queries

Truncate result_rows to match dbt's failure count when re-execution of
non-deterministic queries (e.g. TABLESAMPLE) returns more rows than the
original run. Also add a description note when sample_percent is used.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

* Remove test_not_null_sampled macro and Change 2 tests, validate sample_percent value

Remove the custom generic test macro and associated tests that were
hard to exercise through the integration test framework. Also tighten
the sample_percent check to validate it's a number between 0 and 100.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

* Fix leading whitespace in sampling description note

Avoid prepending a space when test_results_description is empty/None.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
Co-authored-by: Devin AI &lt;158243242+devin-ai-integration[bot]@users.noreply.github.com&gt;
Co-authored-by: Itamar Hartstein &lt;haritamar@gmail.com&gt;
diff --git a/integration_tests/tests/test_result_rows_count.py b/integration_tests/tests/test_result_rows_count.py
@@ -0,0 +1,33 @@
+import json
+
+import pytest
+from dbt_project import DbtProject
+
+COLUMN_NAME = "some_column"
+
+
+@pytest.mark.skip_targets(["clickhouse"])
+def test_result_rows_do_not_exceed_failures(test_id: str, dbt_project: DbtProject):
+    """Result rows count should never exceed the dbt failure count."""
+    null_count = 20
+    data = [{COLUMN_NAME: None} for _ in range(null_count)]
+    test_result = dbt_project.test(
+        test_id,
+        "not_null",
+        dict(column_name=COLUMN_NAME),
+        data=data,
+        test_vars={
+            "enable_elementary_test_materialization": True,
+            "test_sample_row_count": 1000,
+        },
+    )
+    assert test_result["status"] == "fail"
+
+    failures = int(test_result["failures"])
+    assert failures == null_count
+
+    samples = [
+        json.loads(row["result_row"])
+        for row in dbt_project.run_query(dbt_project.samples_query(test_id))
+    ]
+    assert len(samples) <= failures
diff --git a/macros/edr/materializations/test/test.sql b/macros/edr/materializations/test/test.sql
@@ -84,6 +84,16 @@
     {% set result_rows = elementary.query_test_result_rows(
         sample_limit=sample_limit, ignore_passed_tests=true
     ) %}
+
+    {# Truncate result rows if they exceed dbt's failure count (can happen with non-deterministic queries) #}
+    {% if result_rows | length > 0 %}
+        {% set test_result = elementary.get_test_result() %}
+        {% set dbt_failures = test_result.failures | int %}
+        {% if dbt_failures > 0 and result_rows | length > dbt_failures %}
+            {% set result_rows = result_rows[:dbt_failures] %}
+        {% endif %}
+    {% endif %}
+
     {% set elementary_test_results_row = elementary.get_dbt_test_result_row(
         flattened_test, result_rows
     ) %}
diff --git a/macros/edr/tests/on_run_end/handle_tests_results.sql b/macros/edr/tests/on_run_end/handle_tests_results.sql
@@ -123,12 +123,32 @@
                 {% do elementary_test_results_row.setdefault(
                     "test_results_description", result.message
                 ) %}
+                {# Add note when test uses sampling #}
+                {% set test_params = elementary_test_results_row.get(
+                    "test_params", {}
+                ) %}
+                {% if test_params is mapping and test_params.get(
+                    "sample_percent"
+                ) is number and test_params.get(
+                    "sample_percent"
+                ) > 0 and test_params.get(
+                    "sample_percent"
+                ) < 100 %}
+                    {% set base_desc = elementary_test_results_row.get(
+                        "test_results_description"
+                    ) %}
+                    {% set note = "Note: this test uses sample_percent, so result samples may not exactly match the failure count." %}
+                    {% set new_desc = (base_desc ~ " " ~ note) if base_desc else note %}
+                    {% do elementary_test_results_row.update(
+                        {"test_results_description": new_desc}
+                    ) %}
+                {% endif %}
                 {% if render_result_rows %}
                     {% do elementary_test_results_row.update(
                         {
                             "result_rows": elementary.render_result_rows(
                                 elementary_test_results_row.result_rows
-                            )
+                            ),
                         }
                     ) %}
                 {% endif %}