Python(feat): report assertion message in report as error info for pytest plugin (#587)

alexluck-sift · web-flow · commit 2e4c9cfc67b8 · 2026-05-26T12:38:40.000-07:00
diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
@@ -86,7 +86,7 @@ def sift_client() -> SiftClient:
 | Name | Kind | Scope | Purpose |
 |---|---|---|---|
 | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. |
-| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, and `current_step`. |
+| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. |
 | `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently — see [ini options](#ini-options). |
 | `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. |
 | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. |
@@ -263,13 +263,15 @@ def test_no_fixtures_still_creates_a_step():
 def test_measure_a_single_value(step):
     """Take `step` explicitly when you want to record a measurement."""
     voltage = 4.97
-    passed = step.measure(
+    step.measure(
         name="battery_voltage",
         value=voltage,
         bounds={"min": 4.8, "max": 5.2},
         unit="V",
     )
-    assert passed, f"voltage {voltage}V out of bounds"
+    # An out-of-bounds measurement already marks the step FAILED. Call this at
+    # the end to also fail pytest, without an assertion message in error_info.
+    step.fail_if_measurements_failed()
 
 
 def test_measure_strings_and_booleans(step):
@@ -612,8 +614,8 @@ def test_only_outliers_recorded(step):
         unit="psi",
     )
     # Returns False because 99.9 is out of bounds. The step is already
-    # marked failed; raise here only if you also want pytest to fail.
-    assert all_in_bounds
+    # marked failed; call this only if you also want pytest to fail.
+    step.fail_if_measurements_failed()
 ```
 
 !!! note "`measure_all` requires at least one bound"
diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md
@@ -136,7 +136,7 @@ TestReport (FAILED, since failures propagate up from leaves)
         │   (test_excluded: @sift_exclude, runs in pytest, NOT in tree)
         ├── test_measure_series                                      PASSED
         ├── test_failed_measurement_marks_sift_step_failed           FAILED  (pytest PASSED)
-        ├── test_assert_measurements_passed_at_end                                FAILED  (pytest FAILED)
+        ├── test_fail_if_measurements_failed_at_end                               FAILED  (pytest FAILED)
         ├── test_report_level_metadata                               PASSED
         └── TestClassStep
             ├── test_parametrize
@@ -158,12 +158,13 @@ The `with_sift` module shows two patterns for handling measurement results:
 `test_failed_measurement_marks_sift_step_failed` lets the test keep passing
 in pytest while the Sift step is `FAILED` (useful when measurements are
 diagnostic data you want to collect regardless of outcome); and
-`test_assert_measurements_passed_at_end` takes every measurement first and
-then asserts `step.measurements_passed` once at the end, so every
+`test_fail_if_measurements_failed_at_end` takes every measurement first and
+then calls `step.fail_if_measurements_failed()` once at the end, so every
 measurement still lands in the report even when one fails. The end-of-test
-assertion is the recommended pattern: asserting on an individual
-`step.measure(...)` call short-circuits on the first failure and skips
-every measurement that follows. Expected
+call is the recommended pattern: it fails via `pytest.fail` (no assertion
+noise in `error_info`), and unlike asserting on an individual
+`step.measure(...)` call it does not short-circuit on the first failure and
+skip every measurement that follows. Expected
 pytest output is `16 passed, 3 failed, 1 skipped`.
 
 Flip any of the `sift_*_step` / `sift_parametrize_nesting` flags in
diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md
@@ -26,8 +26,10 @@ The statuses below come from `sift_client.sift_types.test_report.TestStatus`.
 | `pytest.fail("...")` from the body        | `pytest.fail("intentional failure")` | `FAILED` |
 | Uncaught non-assertion exception          | `raise ValueError("boom")`           | `ERROR`  |
 
-A non-assertion exception gets its formatted traceback recorded on
-`step.error_info.error_message`.
+An assertion failure records the concise assertion message (the exception
+line(s), no traceback frames) on `step.error_info.error_message` while still
+mapping to `FAILED`. A non-assertion exception gets its formatted traceback
+recorded on `step.error_info.error_message`.
 
 ## Hard exits
 
diff --git a/python/examples/pytest_plugin/README.md b/python/examples/pytest_plugin/README.md
@@ -75,7 +75,7 @@ TestReport (FAILED, since failures propagate up from leaves)
         │   (test_excluded: @sift_exclude, runs in pytest, NOT in tree)
         ├── test_measure_series                                      PASSED
         ├── test_failed_measurement_marks_sift_step_failed           FAILED  (pytest PASSED)
-        ├── test_assert_measurements_passed_at_end                                FAILED  (pytest FAILED)
+        ├── test_fail_if_measurements_failed_at_end                               FAILED  (pytest FAILED)
         ├── test_report_level_metadata                               PASSED
         └── TestClassStep
             ├── test_parametrize
@@ -97,12 +97,13 @@ The `with_sift` module shows two patterns for handling measurement results:
 `test_failed_measurement_marks_sift_step_failed` lets the test keep passing
 in pytest while the Sift step is `FAILED` (useful when measurements are
 diagnostic data you want to collect regardless of outcome); and
-`test_assert_measurements_passed_at_end` takes every measurement first and
-then asserts `step.measurements_passed` once at the end, so every
+`test_fail_if_measurements_failed_at_end` takes every measurement first and
+then calls `step.fail_if_measurements_failed()` once at the end, so every
 measurement still lands in the report even when one fails. The end-of-test
-assertion is the recommended pattern: asserting on an individual
-`step.measure(...)` call short-circuits on the first failure and skips
-every measurement that follows. Expected
+call is the recommended pattern: it fails via `pytest.fail` (no assertion
+noise in `error_info`), and unlike asserting on an individual
+`step.measure(...)` call it does not short-circuit on the first failure and
+skip every measurement that follows. Expected
 pytest output is `16 passed, 3 failed, 1 skipped`.
 
 Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in
@@ -115,5 +116,5 @@ Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in
 | `conftest.py` | Plugin registration via `pytest_plugins`; optional `load_dotenv()` |
 | `pytest.ini` | The four nesting flags + git metadata flag at their defaults |
 | `tests/pytest_only/test_pytest_only_demo.py` | Plain pytest tests with no Sift APIs. The plugin captures pass/fail automatically; covers functions, fixtures, parametrize, classes, plus one each of `AssertionError` (FAILED), `pytest.skip` (SKIPPED), and a raised `ValueError` (ERROR) |
-| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `assert step.measurements_passed` end-of-test pattern that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` |
+| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `step.fail_if_measurements_failed()` end-of-test call that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` |
 | `tests/{pytest_only,with_sift}/__init__.py` | Each Python package (directory with `__init__.py`) becomes a parent step in the report tree |
diff --git a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py
@@ -94,24 +94,25 @@ def test_failed_measurement_marks_sift_step_failed(step) -> None:
     )
 
 
-def test_assert_measurements_passed_at_end(step) -> None:
-    """Recommended pattern: take every measurement first, then assert
-    ``step.measurements_passed`` once at the end.
+def test_fail_if_measurements_failed_at_end(step) -> None:
+    """Recommended pattern: take every measurement first, then call
+    ``step.fail_if_measurements_failed()`` once at the end.
 
     Asserting on individual ``step.measure(...)`` calls raises
     ``AssertionError`` on the first failure, so any measurements after the
     failing one never run and never land in the Sift report. The end-of-test
-    assertion is strictly better for diagnostic completeness: every
-    measurement is recorded, including the failures, and the aggregate
-    result is then folded into the pytest outcome.
+    call is strictly better for diagnostic completeness: every measurement is
+    recorded, including the failures, and the aggregate result is then folded
+    into the pytest outcome. It fails via ``pytest.fail`` rather than an
+    assertion, so the failed step carries no assertion noise in ``error_info``.
 
     The ``b`` measurement below is deliberately out of bounds. ``c`` still
-    runs and is recorded; only the final ``assert`` fires.
+    runs and is recorded; only the final call fails the test.
     """
     step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0})
     step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0})  # out of bounds
     step.measure(name="c", value=1.5, bounds={"min": 0.0, "max": 2.0})  # still recorded
-    assert step.measurements_passed, "one or more measurements out of bounds"
+    step.fail_if_measurements_failed()
 
 
 def test_report_level_metadata(step, report_context) -> None:
diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py
@@ -27,6 +27,7 @@ class CapturedStep:
     step_path: str
     parent_step_id: str | None
     statuses: list[TestStatus] = field(default_factory=list)
+    error_messages: list[str] = field(default_factory=list)
 
 
 _PROTO_STATUS_NAMES = {
@@ -58,19 +59,23 @@ def parse_log(log_path: Path) -> dict[str, CapturedStep]:
     for request_type, response_id, json_str in iter_log_data_lines(log_path):
         payload = json.loads(json_str)
         test_step = payload.get("testStep", {})
+        error_message = test_step.get("errorInfo", {}).get("errorMessage")
         if request_type == "CreateTestStep" and response_id:
             steps[response_id] = CapturedStep(
                 step_id=response_id,
                 name=test_step.get("name", ""),
                 step_path=test_step.get("stepPath", ""),
                 parent_step_id=test_step.get("parentStepId") or None,
                 statuses=[_status(test_step.get("status"))],
+                error_messages=[error_message] if error_message else [],
             )
         elif request_type == "UpdateTestStep":
             step_id = test_step.get("testStepId")
             new_status = test_step.get("status")
             if step_id and step_id in steps and new_status is not None:
                 steps[step_id].statuses.append(_status(new_status))
+                if error_message:
+                    steps[step_id].error_messages.append(error_message)
     return steps
 
 
@@ -117,6 +122,11 @@ def final_status(name: str) -> TestStatus | None:
     return step.statuses[-1] if step and step.statuses else None
 
 
+def final_error_message(name: str) -> str | None:
+    step = test_step(name)
+    return step.error_messages[-1] if step and step.error_messages else None
+
+
 def load_steps(log_path: Path) -> list[dict]:
     """Load the offline log as a list of step records keyed by hierarchy fields.
 
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py
@@ -91,6 +91,12 @@ def test_x():
         """,
     )
     assert capture.final_status("test_x") == TestStatus.FAILED
+    # The concise assertion message is recorded on error_info for the UI, but
+    # without the full traceback frames.
+    message = capture.final_error_message("test_x")
+    assert message is not None
+    assert "assert 1 == 2" in message
+    assert "Traceback (most recent call last)" not in message
 
 
 def test_generic_exception_maps_to_error(inner):
@@ -131,6 +137,34 @@ def test_x():
     assert capture.final_status("test_x") == TestStatus.FAILED
 
 
+def test_fail_if_measurements_failed_fails_without_error_info(inner):
+    # An out-of-bounds measurement plus step.fail_if_measurements_failed()
+    # fails the test via pytest.fail, so the step is FAILED with no assertion
+    # message in error_info (the reason this helper exists over `assert`).
+    _run(
+        inner,
+        """
+        def test_x(step):
+            step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0})
+            step.fail_if_measurements_failed()
+        """,
+    )
+    assert capture.final_status("test_x") == TestStatus.FAILED
+    assert capture.final_error_message("test_x") is None
+
+
+def test_fail_if_measurements_failed_passes_when_in_bounds(inner):
+    _run(
+        inner,
+        """
+        def test_x(step):
+            step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0})
+            step.fail_if_measurements_failed()
+        """,
+    )
+    assert capture.final_status("test_x") == TestStatus.PASSED
+
+
 def test_keyboard_interrupt_leaves_step_in_progress(inner):
     # Case: CALL-06
     # KeyboardInterrupt aborts the session before the call-phase makereport
@@ -174,6 +208,27 @@ def test_x(step):
     assert test_x.statuses[-1] == TestStatus.FAILED
 
 
+def test_substep_assert_failure_records_message_with_failed(inner):
+    # Case: CALL-02 (substep). A substep inherits assertion_as_fail_not_error
+    # from the autouse step (False under pytest), so a failed assertion in a
+    # substep resolves to FAILED and records the concise assertion message.
+    _run(
+        inner,
+        """
+        def test_x(step):
+            with step.substep(name="inner"):
+                assert 1 == 2
+        """,
+    )
+    inner_sub = next(iter(capture.steps_by_name("inner")), None)
+    assert inner_sub is not None
+    assert inner_sub.statuses[-1] == TestStatus.FAILED
+    assert inner_sub.error_messages
+    message = inner_sub.error_messages[-1]
+    assert "assert 1 == 2" in message
+    assert "Traceback (most recent call last)" not in message
+
+
 # ---------------------------------------------------------------------------
 # Skip paths
 # ---------------------------------------------------------------------------
diff --git a/python/lib/sift_client/_tests/util/test_test_results_utils.py b/python/lib/sift_client/_tests/util/test_test_results_utils.py
@@ -463,7 +463,11 @@ def test_bad_assert(self, report_context, step):
         assert parent_step.status == TestStatus.FAILED
         assert substep.status == TestStatus.FAILED
         assert nested_substep.status == TestStatus.FAILED
-        assert nested_substep.error_info is None
+        # The assertion-as-fail path records the concise assertion message (no
+        # traceback frames) on error_info while keeping the FAILED status.
+        assert nested_substep.error_info is not None
+        assert "AssertionError" in nested_substep.error_info.error_message
+        assert "Traceback (most recent call last)" not in nested_substep.error_info.error_message
         assert nested_substep_2.status == TestStatus.ERROR
         assert "AssertionError" in nested_substep_2.error_info.error_message
         assert sibling_substep.status == TestStatus.PASSED
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
@@ -14,7 +14,10 @@
 from sift_client.errors import SiftWarning
 from sift_client.sift_types.test_report import ErrorInfo, TestStatus
 from sift_client.util.test_results import ReportContext
-from sift_client.util.test_results.context_manager import format_truncated_traceback
+from sift_client.util.test_results.context_manager import (
+    format_assertion_message,
+    format_truncated_traceback,
+)
 
 
 class SiftPytestPluginWarning(SiftWarning):
@@ -588,6 +591,7 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None:
                 status = TestStatus.FAILED
             elif isinstance(excinfo.value, AssertionError):
                 status = TestStatus.FAILED
+                error_info = format_assertion_message(excinfo.type, excinfo.value)
             elif isinstance(excinfo.value, pytest.fail.Exception):
                 status = TestStatus.FAILED
             elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)):
diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py