From 23fbdfd90c0b9f2e94a0bdf43fb8bb2f0673ae87 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 15 May 2026 17:05:41 -0700 Subject: [PATCH 01/19] initial dev version --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index ddb4c7d80..403c89bf8 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sift_stack_py" -version = "0.16.2" +version = "0.17.0.dev0" description = "Python client library for the Sift API" requires-python = ">=3.8" readme = { file = "README.md", content-type = "text/markdown" } From a3d9b3409491769785c76f516bdf5e11207e4dcb Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 19 May 2026 06:25:14 -0700 Subject: [PATCH 02/19] Python(feat): pytest plugin improvements (#567) --- python/docs/examples/pytest_plugin.md | 173 +++++-- python/lib/sift_client/_tests/conftest.py | 4 - .../_tests/pytest_plugin/__init__.py | 0 .../_tests/pytest_plugin/conftest.py | 54 +++ .../pytest_plugin/test_configuration.py | 394 ++++++++++++++++ .../_tests/pytest_plugin/test_credentials.py | 117 +++++ .../lib/sift_client/_tests/util/conftest.py | 35 +- python/lib/sift_client/pytest_plugin.py | 436 ++++++++++++++++++ .../sift_client/util/test_results/__init__.py | 120 +++-- .../util/test_results/pytest_util.py | 206 --------- python/pyproject.toml | 9 + 11 files changed, 1255 insertions(+), 293 deletions(-) create mode 100644 python/lib/sift_client/_tests/pytest_plugin/__init__.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/conftest.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_configuration.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_credentials.py create mode 100644 python/lib/sift_client/pytest_plugin.py delete mode 100644 python/lib/sift_client/util/test_results/pytest_util.py diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index cf56dd75e..3557dd9c7 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -9,11 +9,13 @@ This page walks through wiring the plugin into a project, the fixtures and hooks it provides, and the patterns you'll use day-to-day. !!! info "Where the plugin lives" - The plugin is part of `sift_client.util.test_results`. It is **not** - registered as a `pytest11` entry point. Projects opt in with a - `from sift_client.util.test_results import *` in their `conftest.py`. - That import is what wires up the fixtures, the CLI options, and the - `pytest_runtest_makereport` hook. + The plugin lives at `sift_client.pytest_plugin`. It is + **not** registered as a `pytest11` entry point. Projects opt in with a + `pytest_plugins` declaration in their top-level `conftest.py`. Pytest + then loads the module as a real plugin: the fixtures, CLI options, and + `pytest_runtest_makereport` hook all register through standard pytest + machinery, so `pytest --trace-config` lists it and + `pytest -p no:sift_client.pytest_plugin` disables it. ## Install @@ -33,9 +35,26 @@ The `SIFT_GRPC_URI` and `SIFT_REST_URI` are the gRPC and REST endpoints for your ## Wire the plugin into `conftest.py` -Two things are required: a session-scoped `sift_client` fixture (the plugin's -`report_context` fixture resolves it by name), and a star-import that registers -the plugin's fixtures into the conftest's namespace. +A single `pytest_plugins` declaration in your top-level `conftest.py` is all +that's required. The plugin ships a default `sift_client` fixture that reads +`SIFT_API_KEY`, `SIFT_GRPC_URI`, and `SIFT_REST_URI` from the environment. + +```python title="conftest.py" +from dotenv import load_dotenv + +load_dotenv() + +pytest_plugins = ["sift_client.pytest_plugin"] +``` + +That's the whole setup. Every test in the session will now create a step on a +single shared `TestReport`. + +### Customizing the `SiftClient` + +To construct the client differently (custom TLS, timeouts, alternate +credentials, etc.), override the `sift_client` fixture in your conftest. The +plugin's default falls away in favor of your definition. ```python title="conftest.py" import os @@ -45,30 +64,23 @@ from dotenv import load_dotenv from sift_client import SiftClient, SiftConnectionConfig -# Star-import wires fixtures + hooks + CLI options into pytest collection. -from sift_client.util.test_results import * - load_dotenv() +pytest_plugins = ["sift_client.pytest_plugin"] + @pytest.fixture(scope="session") def sift_client() -> SiftClient: - grpc_url = os.getenv("SIFT_GRPC_URI") - rest_url = os.getenv("SIFT_REST_URI") - api_key = os.getenv("SIFT_API_KEY") - return SiftClient( connection_config=SiftConnectionConfig( - api_key=api_key, - grpc_url=grpc_url, - rest_url=rest_url, + api_key=os.getenv("SIFT_API_KEY"), + grpc_url=os.getenv("SIFT_GRPC_URI"), + rest_url=os.getenv("SIFT_REST_URI"), + use_ssl=False, ) ) ``` -That's the whole setup. Every test in the session will now create a step on a -single shared `TestReport`. - ## Plugin provided fixtures | Name | Kind | Scope | Purpose | @@ -86,17 +98,82 @@ single shared `TestReport`. | `--no-sift-test-results-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | | `--sift-test-results-check-connection` | off | Make `report_context`, `step`, and `module_substep` no-op (yield `None`) when `client_has_connection` is `False`. Lets the same suite run locally without a Sift backend. | -These can be set permanently in `pytest.ini`: +These can be passed permanently via `addopts`: ```ini title="pytest.ini" [pytest] addopts = --sift-test-results-check-connection ``` +Or set the matching ini key directly (recommended for stable per-project +configuration). Each CLI flag has a corresponding key under +`[tool.pytest.ini_options]` in `pyproject.toml` or `[pytest]` in `pytest.ini`. +CLI flags, when passed, override the ini values. + +| Ini key | Type | Equivalent CLI flag | +|---|---|---| +| `sift_test_results_log_file` | string (`true` / `false` / `none` / path) | `--sift-test-results-log-file=` | +| `sift_test_results_git_metadata` | bool (default `true`) | `--no-sift-test-results-git-metadata` (sets to `false`) | +| `sift_test_results_check_connection` | bool (default `false`) | `--sift-test-results-check-connection` | +| `sift_test_results_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | + +The default `sift_client` fixture reads its two URIs from environment first +and falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is +intentionally env-only — keep it out of source control and supply it through +`pytest-dotenv` (see [API key handling](#api-key-handling) below). The env +var wins when both are set, so secrets injected into a CI environment +continue to override values committed to `pyproject.toml`. There are no CLI +flags for credentials. + +| Ini key | Environment variable | Notes | +|---|---|---| +| _(none)_ | `SIFT_API_KEY` | Env-only. Use `.env` + `pytest-dotenv` locally; inject from your secret store in CI. | +| `sift_grpc_uri` | `SIFT_GRPC_URI` | Stable per-org gRPC endpoint; safe to commit. | +| `sift_rest_uri` | `SIFT_REST_URI` | Stable per-org REST endpoint; safe to commit. | + +```toml title="pyproject.toml" +[tool.pytest.ini_options] +sift_test_results_check_connection = true +sift_test_results_log_file = "false" +sift_test_results_git_metadata = false +sift_grpc_uri = "your-org.sift.example:443" +sift_rest_uri = "https://your-org.sift.example" +``` + +```ini title="pytest.ini" +[pytest] +sift_test_results_check_connection = true +sift_test_results_log_file = false +sift_test_results_git_metadata = false +sift_grpc_uri = your-org.sift.example:443 +sift_rest_uri = https://your-org.sift.example +``` + +#### API key handling + +`SIFT_API_KEY` is deliberately read from the process environment only. The +recommended workflow uses the +[`pytest-dotenv`](https://pypi.org/project/pytest-dotenv/) plugin (already a +dependency of `sift-stack-py`), which loads variables from a `.env` file +into `os.environ` before tests run. + +1. Add `.env` to `.gitignore`. +2. Drop your key into `.env` at the project root: + + ```bash title=".env" + SIFT_API_KEY=sk-...your-key... + ``` + +3. In CI, set `SIFT_API_KEY` directly via your provider's secret manager + instead of committing a `.env` file. + +`pytest-dotenv` picks the file up automatically; no `pytest_configure` +glue is needed. + !!! warning "FedRAMP / shared environments" - Pass `--sift-test-results-log-file=false` to skip the temp file + worker - pipeline. Create/update calls then run inline against the API instead of - being deferred through a subprocess. + Pass `--sift-test-results-log-file=false` (or set the ini key to `"false"`) + to skip the temp file + worker pipeline. Create/update calls then run + inline against the API instead of being deferred through a subprocess. ### Report metadata captured automatically @@ -122,6 +199,50 @@ metadata), call `report_context.report.update({...})` from any test or fixture. See [Linking a Run](#linking-a-run-to-the-report) for the same pattern applied to `run_id`. +## Controlling which tests produce reports + +By default every test in the session produces a Sift step. Two markers +and one ini key let you narrow that to a specific set of tests, which is +useful when a repo holds tests that you don't want included in the Sift test report. + +| Setting | Effect | +|---------------------------------------------------------|----------------------------------------------------------------------------------------------| +| `sift_test_results_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. | +| `@pytest.mark.sift_include` on a test, class, or module | Force reporting on for that scope, regardless of the project default. | +| `@pytest.mark.sift_exclude` on a test, class, or module | Force reporting off for that scope, regardless of the project default. | + +Closest marker determines setting. `sift_exclude` beats `sift_include` when both apply. +`pytestmark` at the class or module level inherits to every test in scope. + +### Bulk-applying a marker to a directory + +To opt an entire directory in (or out) without editing each file, hook +`pytest_collection_modifyitems` in the directory's `conftest.py`: + +```python title="tests/example/conftest.py" +from pathlib import Path + +import pytest + +_HERE = Path(__file__).parent + + +def pytest_collection_modifyitems(config, items): + for item in items: + try: + item.path.relative_to(_HERE) + except ValueError: + continue + item.add_marker(pytest.mark.sift_include) +``` + +This applies `sift_include` to every test collected under `tests/example/`. +Combine with `sift_test_results_autouse = false` in `pyproject.toml` for +opting in to specific directories. + +`pytest_collection_modifyitems` receives every item in the session, not just +this directory's, so the `relative_to` filter is what scopes the marker. + ## Basic usage With the conftest in place, the simplest test needs nothing extra. The `step` @@ -585,7 +706,7 @@ automatic skip. ```python title="conftest.py" import pytest -from sift_client.util.test_results import * +pytest_plugins = ["sift_client.pytest_plugin"] @pytest.fixture(autouse=True) diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py index 5683182e5..79b079d39 100644 --- a/python/lib/sift_client/_tests/conftest.py +++ b/python/lib/sift_client/_tests/conftest.py @@ -78,10 +78,6 @@ def ci_pytest_tag(sift_client): return tag -# Import the Sift test results fixtures the way we recommend to users. -from sift_client.util.test_results import * # noqa: F403 - - def pytest_configure(config: pytest.Config) -> None: """Enable the Sift connection-check mode for the fixtures used in this test suite since we run w/ mock client in non-integration tests.""" config.option.sift_test_results_check_connection = True diff --git a/python/lib/sift_client/_tests/pytest_plugin/__init__.py b/python/lib/sift_client/_tests/pytest_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/lib/sift_client/_tests/pytest_plugin/conftest.py b/python/lib/sift_client/_tests/pytest_plugin/conftest.py new file mode 100644 index 000000000..1fbd61e46 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/conftest.py @@ -0,0 +1,54 @@ +"""Shared helpers for the pytest-plugin test suite. + +The tests in this directory drive inner pytester sessions to exercise the +plugin's behavior in isolation. The fixtures below produce the boilerplate +conftests those inner sessions need: + +- ``write_plugin_conftest``: minimal conftest that loads the plugin +- ``write_probe_conftest``: conftest that loads the plugin and runs a probe + block inside ``pytest_configure``, useful for inspecting internal state + without running tests against a real backend + +Every test in this suite invokes the inner session via +``pytester.runpytest_subprocess(...)`` rather than ``pytester.runpytest(...)``. +``runpytest`` runs the inner pytest in-process, which re-imports the Sift +plugin on each test; the plugin transitively imports numpy, whose C +extensions refuse to initialize twice in one process and raise +``cannot load module more than once per process``. Spawning a subprocess +gives each inner session a fresh interpreter and sidesteps that guard. +""" + +from __future__ import annotations + +import textwrap +from typing import Callable + +import pytest + + +@pytest.fixture +def write_plugin_conftest(pytester: pytest.Pytester) -> Callable[[], None]: + """Return a callable that writes a minimal conftest loading the plugin.""" + + def _write() -> None: + pytester.makeconftest('pytest_plugins = ["sift_client.pytest_plugin"]') + + return _write + + +@pytest.fixture +def write_probe_conftest(pytester: pytest.Pytester) -> Callable[[str], None]: + """Return a callable that writes a conftest running ``probe_body`` in ``pytest_configure``. + + ``probe_body`` is python source that runs at config time with ``config`` + in scope; use ``print(...)`` calls and capture them with + ``result.stdout.fnmatch_lines``. + """ + + def _write(probe_body: str) -> None: + pytester.makeconftest( + 'pytest_plugins = ["sift_client.pytest_plugin"]\n\n' + "def pytest_configure(config):\n" + textwrap.indent(textwrap.dedent(probe_body), " ") + ) + + return _write diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py new file mode 100644 index 000000000..9b9be2d63 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py @@ -0,0 +1,394 @@ +"""Tests for the plugin's CLI/ini configuration surface. + +Covers flag parsing, ini-key resolution, CLI-over-ini precedence, the +defaults that apply when nothing is set, and the marker-based gate that +governs the autouse fixtures. Credentials are tested in +``test_credentials.py``. +""" + +from __future__ import annotations + +import textwrap +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +class TestIniConfiguration: + """`addini` keys configure the plugin via pyproject.toml / pytest.ini.""" + + def test_ini_log_file_none( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + write_probe_conftest( + """ + from sift_client.pytest_plugin import _resolve_log_file + print("RESOLVED:", _resolve_log_file(config)) + """, + ) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_log_file = "none" + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines(["RESOLVED: None"]) + + def test_python_false_disables_log_file( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + """`config.option.sift_test_results_log_file = False` disables logging. + + Conftests use this pattern (see lib/sift_client/_tests/util/conftest.py) + to opt their subtree out of log-file mode. Regression test for the + resolver case where Python `False` was previously confused with `None` + and silently kept the temp-file default. + """ + write_probe_conftest( + """ + config.option.sift_test_results_log_file = False + from sift_client.pytest_plugin import _resolve_log_file + print("RESOLVED:", _resolve_log_file(config)) + """, + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines(["RESOLVED: None"]) + + def test_ini_log_file_path( + self, + pytester: pytest.Pytester, + tmp_path: Path, + write_probe_conftest: Callable[[str], None], + ) -> None: + log_path = tmp_path / "sift-run.jsonl" + write_probe_conftest( + """ + from sift_client.pytest_plugin import _resolve_log_file + print("RESOLVED:", _resolve_log_file(config)) + """, + ) + pytester.makepyprojecttoml( + f""" + [tool.pytest.ini_options] + sift_test_results_log_file = "{log_path}" + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines([f"RESOLVED: {log_path}"]) + + def test_ini_check_connection_true( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + write_probe_conftest( + """ + from sift_client.pytest_plugin import _check_connection_enabled + print("CHECK:", _check_connection_enabled(config)) + """, + ) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_check_connection = true + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines(["CHECK: True"]) + + def test_ini_git_metadata_false( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + write_probe_conftest( + """ + print("INI_GIT:", config.getini("sift_test_results_git_metadata")) + """, + ) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_git_metadata = false + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines(["INI_GIT: False"]) + + def test_cli_overrides_ini( + self, + pytester: pytest.Pytester, + tmp_path: Path, + write_probe_conftest: Callable[[str], None], + ) -> None: + """A CLI flag takes precedence over the matching ini key.""" + cli_path = tmp_path / "cli-wins.jsonl" + write_probe_conftest( + """ + from sift_client.pytest_plugin import _resolve_log_file + print("RESOLVED:", _resolve_log_file(config)) + """, + ) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_log_file = "none" + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess( + "-s", "--co", f"--sift-test-results-log-file={cli_path}" + ) + result.stdout.fnmatch_lines([f"RESOLVED: {cli_path}"]) + + def test_cli_check_connection_flag( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + """The ``--sift-test-results-check-connection`` CLI flag flips the resolver to True.""" + write_probe_conftest( + """ + from sift_client.pytest_plugin import _check_connection_enabled + print("CHECK:", _check_connection_enabled(config)) + """, + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co", "--sift-test-results-check-connection") + result.stdout.fnmatch_lines(["CHECK: True"]) + + def test_cli_no_git_metadata_flag( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + """The ``--no-sift-test-results-git-metadata`` CLI flag flips git_metadata to False. + + Guards the negation flag's ``dest`` binding: the flag name doesn't match + the ini key, so a broken ``dest`` would silently fall back to the ini + default and pass every other test in this file. + """ + write_probe_conftest( + """ + print("CLI_GIT:", config.getoption("sift_test_results_git_metadata")) + """, + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co", "--no-sift-test-results-git-metadata") + result.stdout.fnmatch_lines(["CLI_GIT: False"]) + + def test_defaults_when_neither_set( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + write_probe_conftest( + """ + from sift_client.pytest_plugin import ( + _check_connection_enabled, + _resolve_log_file, + ) + print("RESOLVED:", _resolve_log_file(config)) + print("CHECK:", _check_connection_enabled(config)) + print("INI_GIT:", config.getini("sift_test_results_git_metadata")) + """, + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines( + [ + "RESOLVED: True", + "CHECK: False", + "INI_GIT: True", + ] + ) + + +# A session-scoped `report_context` stub for the autouse-gate tests. Overrides +# the plugin's real `report_context` so the inner pytest sessions don't try to +# talk to a Sift backend; the gate tests only need to observe whether `step` +# resolves to a real value or to None. +_GATE_INNER_CONFTEST = textwrap.dedent( + """ + from unittest.mock import MagicMock + + import pytest + + pytest_plugins = ["sift_client.pytest_plugin"] + + + @pytest.fixture(scope="session") + def report_context(): + yield MagicMock() + """ +) + + +class TestAutouseGate: + """`sift_include` / `sift_exclude` markers and the `sift_test_results_autouse` ini gate.""" + + def test_default_ini_true_activates(self, pytester: pytest.Pytester) -> None: + """Plugin default (ini absent) keeps the autouse fixtures active.""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyfile( + """ + def test_inner(step): + assert step is not None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_default_ini_false_skips(self, pytester: pytest.Pytester) -> None: + """`sift_test_results_autouse = false` makes the autouse fixtures no-op by default.""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_autouse = false + """ + ) + pytester.makepyfile( + """ + def test_inner(step): + assert step is None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_sift_include_marker_forces_on(self, pytester: pytest.Pytester) -> None: + """`@pytest.mark.sift_include` overrides ini-false to enable the gate.""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_autouse = false + """ + ) + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_include + def test_inner(step): + assert step is not None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_sift_exclude_marker_forces_off(self, pytester: pytest.Pytester) -> None: + """`@pytest.mark.sift_exclude` overrides ini-true to disable the gate.""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_exclude + def test_inner(step): + assert step is None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_exclude_beats_include(self, pytester: pytest.Pytester) -> None: + """When both markers are present, `sift_exclude` wins (safer default).""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_include + @pytest.mark.sift_exclude + def test_inner(step): + assert step is None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_module_pytestmark_inherits(self, pytester: pytest.Pytester) -> None: + """Module-level `pytestmark = pytest.mark.sift_include` covers every test in the module.""" + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_autouse = false + """ + ) + pytester.makepyfile( + """ + import pytest + + pytestmark = pytest.mark.sift_include + + def test_inner_a(step): + assert step is not None + + def test_inner_b(step): + assert step is not None + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=2) + + def test_bulk_apply_via_conftest_hook(self, pytester: pytest.Pytester) -> None: + """A subtree opts in via `pytest_collection_modifyitems`; siblings stay off. + + Regression test for this repo's wiring pattern: the project default is + autouse-off, the integration subtree's conftest bulk-applies + `sift_include`, and sibling subtrees remain disabled. Verifies the + per-directory mechanism works in a single pytest invocation. + """ + pytester.makeconftest(_GATE_INNER_CONFTEST) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_test_results_autouse = false + """ + ) + included = pytester.mkdir("included_subtree") + (included / "conftest.py").write_text( + textwrap.dedent( + """ + from pathlib import Path + + import pytest + + _HERE = Path(__file__).parent + + + def pytest_collection_modifyitems(config, items): + for item in items: + try: + item.path.relative_to(_HERE) + except ValueError: + continue + item.add_marker(pytest.mark.sift_include) + """ + ) + ) + (included / "test_included.py").write_text( + "def test_included(step):\n assert step is not None\n" + ) + untouched = pytester.mkdir("untouched_subtree") + (untouched / "test_untouched.py").write_text( + "def test_untouched(step):\n assert step is None\n" + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=2) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py new file mode 100644 index 000000000..9ee628e69 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py @@ -0,0 +1,117 @@ +"""Tests for the default ``sift_client`` fixture's credential resolution. + +Covers the env-var-then-ini fallback for URIs, the env-only handling of +``SIFT_API_KEY``, and the error path that names missing credentials. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + import pytest + + +class TestCredentials: + """The default ``sift_client`` fixture's resolution of env vars and ini keys.""" + + def test_uris_from_ini( + self, + pytester: pytest.Pytester, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """The default sift_client fixture reads URI credentials from ini when env vars are unset.""" + monkeypatch.setenv("SIFT_API_KEY", "env-key") + monkeypatch.delenv("SIFT_GRPC_URI", raising=False) + monkeypatch.delenv("SIFT_REST_URI", raising=False) + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_grpc_uri = "ini-grpc:1234" + sift_rest_uri = "https://ini-rest" + sift_test_results_check_connection = true + sift_test_results_log_file = "false" + """ + ) + pytester.makepyfile( + """ + def test_credentials_loaded(sift_client): + cfg = sift_client.grpc_client._config + assert cfg.api_key == "env-key" + assert "ini-grpc:1234" in cfg.uri + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_env_var_overrides_ini_uri( + self, + pytester: pytest.Pytester, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """When both env var and ini set a URI, the env var wins.""" + monkeypatch.setenv("SIFT_API_KEY", "env-key") + monkeypatch.setenv("SIFT_GRPC_URI", "env-grpc:9999") + monkeypatch.delenv("SIFT_REST_URI", raising=False) + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_grpc_uri = "ini-grpc:1234" + sift_rest_uri = "https://ini-rest" + sift_test_results_check_connection = true + sift_test_results_log_file = "false" + """ + ) + pytester.makepyfile( + """ + def test_env_wins(sift_client): + assert "env-grpc:9999" in sift_client.grpc_client._config.uri + """ + ) + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_api_key_ignored_from_ini( + self, + pytester: pytest.Pytester, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """`sift_api_key` is not registered as an ini key; the fixture refuses to use it.""" + for name in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"): + monkeypatch.delenv(name, raising=False) + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_api_key = "should-be-ignored" + sift_grpc_uri = "ini-grpc:1234" + sift_rest_uri = "https://ini-rest" + """ + ) + pytester.makepyfile("def test_should_not_run(): pass") + result = pytester.runpytest_subprocess() + assert result.ret != 0 + combined = "\n".join(result.outlines + result.errlines) + assert "SIFT_API_KEY" in combined, combined + + def test_missing_credentials_named_in_error( + self, + pytester: pytest.Pytester, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """A missing credential aborts with all missing names listed.""" + for name in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"): + monkeypatch.delenv(name, raising=False) + write_plugin_conftest() + pytester.makepyfile("def test_should_not_run(): pass") + result = pytester.runpytest_subprocess() + assert result.ret != 0 + combined = "\n".join(result.outlines + result.errlines) + for name in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"): + assert name in combined, combined diff --git a/python/lib/sift_client/_tests/util/conftest.py b/python/lib/sift_client/_tests/util/conftest.py index 45279cca6..2f371e69e 100644 --- a/python/lib/sift_client/_tests/util/conftest.py +++ b/python/lib/sift_client/_tests/util/conftest.py @@ -1,14 +1,35 @@ -import pytest +from pathlib import Path +import pytest -def pytest_addoption(parser: pytest.Parser) -> None: - existing_options = [opt.names() for opt in parser._anonymous.options] - # Flatten the list of lists into a single list of strings - flat_options = [item for sublist in existing_options for item in sublist] - if not any("--sift-test-results-log-file" in name for name in flat_options): - parser.addoption("--sift-test-results-log-file", action="store_true", default=False) +_HERE = Path(__file__).parent def pytest_configure(config: pytest.Config) -> None: """Configure the pytest configuration to disable the Sift test results log file.""" config.option.sift_test_results_log_file = False + + +def pytest_collection_modifyitems(config: pytest.Config, items: "list[pytest.Item]") -> None: + """Bulk-apply ``@pytest.mark.sift_include`` to integration tests under util/. + + The project-wide default in ``pyproject.toml`` is ``sift_test_results_autouse + = false`` so unit tests pay nothing for the globally-loaded Sift plugin. + Integration tests in this subtree still need the autouse fixtures, so this + hook flips the gate back on for any test already marked + ``@pytest.mark.integration``. Unit tests in the same directory (e.g. + ``test_cel_utils.py``) are left alone. + + ``pytest_collection_modifyitems`` receives all items in the session (pytest + does not auto-scope it to the conftest's directory), so we filter by path + explicitly. ``Path.relative_to`` is the 3.8-compatible form of the path + containment check (``Path.is_relative_to`` arrived in 3.9). + """ + for item in items: + try: + item.path.relative_to(_HERE) + except ValueError: + continue + if item.get_closest_marker("integration") is None: + continue + item.add_marker(pytest.mark.sift_include) diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py new file mode 100644 index 000000000..f2699a954 --- /dev/null +++ b/python/lib/sift_client/pytest_plugin.py @@ -0,0 +1,436 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any, Generator + +import pytest + +from sift_client import SiftClient, SiftConnectionConfig +from sift_client.sift_types.test_report import TestStatus +from sift_client.util.test_results import ReportContext + +if TYPE_CHECKING: + from sift_client.util.test_results.context_manager import NewStep + +REPORT_CONTEXT: ReportContext | None = None + + +@dataclass(frozen=True) +class _Option: + """A single Sift plugin setting, registered as a CLI flag and/or an ini key. + + ``ini_name`` is used as both the ini key and the CLI ``dest``, so a value + set either way lands on the same config slot. ``cli_flag=None`` makes the + option ini-only (e.g. the URI fallbacks). + """ + + ini_name: str + ini_help: str + cli_flag: str | None = None + cli_help: str | None = None + action: str | None = None + ini_type: str | None = None + ini_default: Any = None + + +_LOG_FILE = _Option( + cli_flag="--sift-test-results-log-file", + ini_name="sift_test_results_log_file", + cli_help="Path to write the Sift test result log file. " + "Use 'true' (default) to auto-create a temp file, " + "False, 'false', or 'none' to disable logging, " + "or a file path to write to a specific location.", + ini_help="Default value for --sift-test-results-log-file. Same values " + "accepted as the CLI flag (path, 'true', 'false', 'none').", +) + +_GIT_METADATA = _Option( + cli_flag="--no-sift-test-results-git-metadata", + ini_name="sift_test_results_git_metadata", + action="store_false", + cli_help="Exclude git metadata from the Sift test results. " + "Git metadata (repo, branch, commit) is included by default.", + ini_help="Include git repo/branch/commit in the report (true/false). " + "Defaults to true. The --no-sift-test-results-git-metadata CLI flag " + "overrides this when passed.", + ini_type="bool", + ini_default=True, +) + +_CHECK_CONNECTION = _Option( + cli_flag="--sift-test-results-check-connection", + ini_name="sift_test_results_check_connection", + action="store_true", + cli_help="Skip the sift test-result fixtures (report_context, step, module_substep) " + "when the Sift client has no connection to the server. Requires a " + "`client_has_connection` fixture to be available in the test session.", + ini_help="When true, skip the sift test-result fixtures if the client has " + "no connection (same effect as --sift-test-results-check-connection). " + "Defaults to false.", + ini_type="bool", + ini_default=False, +) + +_GRPC_URI = _Option( + ini_name="sift_grpc_uri", + ini_help="Sift gRPC endpoint URI. The default `sift_client` fixture " + "prefers the SIFT_GRPC_URI environment variable and falls back to " + "this ini value.", +) + +_REST_URI = _Option( + ini_name="sift_rest_uri", + ini_help="Sift REST endpoint URI. The default `sift_client` fixture " + "prefers the SIFT_REST_URI environment variable and falls back to " + "this ini value.", +) + +_AUTOUSE = _Option( + ini_name="sift_test_results_autouse", + ini_help="Default for the Sift autouse fixtures (report_context, step, " + "module_substep). When true (default), tests are included unless marked " + "with @pytest.mark.sift_exclude. When false, tests are skipped unless " + "marked with @pytest.mark.sift_include. Bulk-apply markers in a " + "directory's conftest via `pytest_collection_modifyitems`.", + ini_type="bool", + ini_default=True, +) + +_OPTIONS: tuple[_Option, ...] = ( + _LOG_FILE, + _GIT_METADATA, + _CHECK_CONNECTION, + _GRPC_URI, + _REST_URI, + _AUTOUSE, +) + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Register Sift-specific command-line options and ini keys. + + Each option can be set on the command line or under ``[tool.pytest.ini_options]`` + in ``pyproject.toml`` (or ``[pytest]`` in ``pytest.ini``). CLI values take + precedence over ini values, which take precedence over the built-in default. + """ + group = parser.getgroup("sift", description="Sift test results") + for opt in _OPTIONS: + if opt.cli_flag is not None: + cli_kwargs: dict[str, Any] = { + "dest": opt.ini_name, + "default": None, + "help": opt.cli_help, + } + if opt.action is not None: + cli_kwargs["action"] = opt.action + group.addoption(opt.cli_flag, **cli_kwargs) + + ini_kwargs: dict[str, Any] = {"help": opt.ini_help, "default": opt.ini_default} + if opt.ini_type is not None: + ini_kwargs["type"] = opt.ini_type + parser.addini(opt.ini_name, **ini_kwargs) + + +def pytest_configure(config: pytest.Config) -> None: + """Register the Sift gate markers so they show up in `pytest --markers`.""" + config.addinivalue_line( + "markers", + "sift_include: force the Sift autouse fixtures to activate for this test " + "regardless of the `sift_test_results_autouse` ini default.", + ) + config.addinivalue_line( + "markers", + "sift_exclude: force the Sift autouse fixtures to skip this test " + "regardless of the `sift_test_results_autouse` ini default.", + ) + + +def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool: + """Resolve the Sift gate for a node: sift_exclude > sift_include > default. + + `get_closest_marker` walks the node hierarchy upward, so markers applied + at any level (function, class, module, package, session) are honored. + """ + if node.get_closest_marker("sift_exclude"): + return False + if node.get_closest_marker("sift_include"): + return True + return default + + +def _module_has_included_tests(request: pytest.FixtureRequest, default: bool) -> bool: + """True when at least one test in `request`'s module is gated on. + + Used by the module-scoped `module_substep` fixture to decide whether to + activate without triggering `report_context` creation for modules where + every test is excluded. + """ + module_path = request.path + for item in request.session.items: + if item.path != module_path: + continue + if _sift_enabled_for(item, default): + return True + return False + + +def _option_or_ini(pytestconfig: pytest.Config | None, opt: _Option) -> Any: + """Resolve a Sift plugin setting from CLI > ini > None. + + The ``addoption`` registrations use ``default=None`` so we can tell whether + the CLI was actually used. When the CLI didn't set a value, fall back to + the matching ``addini`` key. + """ + if pytestconfig is None: + return None + cli = pytestconfig.getoption(opt.ini_name, default=None) + if cli is not None: + return cli + try: + return pytestconfig.getini(opt.ini_name) + except (KeyError, ValueError): + return None + + +def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | None: + """Determine log_file value from CLI flag or ini key. + + Three signal types arrive here: + + * ``None`` — unset; nothing was passed on the CLI and the ini key is + absent. Treat as the default "use a temp file." + * Python ``False`` — an explicit disable, typically set in a conftest via + ``config.option.sift_test_results_log_file = False``. Return ``None`` so + the rest of the pipeline knows to skip logging entirely. + * A string (from CLI or ini) — interpret ``"true"`` / ``"1"`` as the temp + file default, ``"false"`` / ``"none"`` as disable, anything else as a + file path. + """ + raw = _option_or_ini(pytestconfig, _LOG_FILE) + if raw is False: + return None + if not raw: + return True + lower = str(raw).lower() + if lower in ("true", "1"): + return True + if lower in ("false", "none"): + return None + return Path(raw) + + +@pytest.hookimpl(tryfirst=True, hookwrapper=True) +def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): + """Capture pytest outcomes so assertion failures and skips land on the Sift step.""" + outcome = yield + report = outcome.get_result() + if report.outcome == "skipped": + # Skipped tests bypass the autouse `step` fixture, so we record the step manually here. + if REPORT_CONTEXT: + with REPORT_CONTEXT.new_step(name=item.name) as new_step: + new_step.current_step.update({"status": TestStatus.SKIPPED}) + setattr(item, "rep_" + report.when, call) + + +def _report_context_impl( + sift_client: SiftClient, + request: pytest.FixtureRequest, + pytestconfig: pytest.Config | None = None, +) -> Generator[ReportContext | None, None, None]: + args = request.config.invocation_params.args + test_path = Path(args[0]) if args else None + if test_path is not None and test_path.exists(): + base_name = test_path.name + test_case: Path | str = test_path + else: + base_name = "pytest " + " ".join(args) if args else "pytest" + test_case = base_name + log_file = _resolve_log_file(pytestconfig) + git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA) + include_git_metadata = True if git_metadata is None else bool(git_metadata) + with ReportContext( + sift_client, + name=f"{base_name} {datetime.now(timezone.utc).isoformat()}", + test_case=str(test_case), + log_file=log_file, + include_git_metadata=include_git_metadata, + ) as context: + global REPORT_CONTEXT + REPORT_CONTEXT = context + yield context + + +def _check_connection_enabled(pytestconfig: pytest.Config | None) -> bool: + """Return True when the caller opted into the check-connection mode via CLI or ini.""" + return bool(_option_or_ini(pytestconfig, _CHECK_CONNECTION)) + + +def _has_sift_connection(request: pytest.FixtureRequest) -> bool: + """Resolve the `client_has_connection` fixture lazily; only called when the check is enabled.""" + return bool(request.getfixturevalue("client_has_connection")) + + +_CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = ( + ("SIFT_API_KEY", None), # env-only; never read from ini to keep secrets out of source control. + ("SIFT_GRPC_URI", _GRPC_URI), + ("SIFT_REST_URI", _REST_URI), +) + + +def _resolve_credential( + pytestconfig: pytest.Config | None, env_name: str, opt: _Option | None +) -> str | None: + """Resolve a Sift credential: env var first, then ini key (if registered), else None.""" + env_value = os.getenv(env_name) + if env_value: + return env_value + if opt is None or pytestconfig is None: + return None + ini_value = pytestconfig.getini(opt.ini_name) + return ini_value if isinstance(ini_value, str) and ini_value else None + + +@pytest.fixture(scope="session") +def sift_client(pytestconfig: pytest.Config) -> SiftClient: + """Default ``SiftClient`` resolved from environment variables and ini keys. + + Each credential is read from its environment variable first. The URIs + (``SIFT_GRPC_URI``, ``SIFT_REST_URI``) additionally fall back to the + ``sift_grpc_uri`` / ``sift_rest_uri`` ini keys, since they are stable + per-org values that are safe to commit. ``SIFT_API_KEY`` is intentionally + env-only — use ``pytest-dotenv`` (already a project dependency) to load + it from a ``.env`` file kept out of version control. + + Projects that need custom construction (TLS toggles, custom timeouts, + etc.) can override this fixture by defining their own ``sift_client`` + in their ``conftest.py``; pytest fixture resolution prefers the local + definition. + """ + resolved = {env: _resolve_credential(pytestconfig, env, opt) for env, opt in _CREDENTIAL_KEYS} + missing = [env for env, value in resolved.items() if not value] + if missing: + raise pytest.UsageError( + "Sift credentials missing: " + + ", ".join(missing) + + ". Set the environment variable(s) — pytest-dotenv loads them " + "from a `.env` file automatically — or set the URIs via " + "`sift_grpc_uri` / `sift_rest_uri` under `[tool.pytest.ini_options]` " + "in pyproject.toml, or override the sift_client fixture in your " + "conftest.py." + ) + # `or ""` is unreachable in practice since the `missing` check above guarantees + # non-None values + return SiftClient( + connection_config=SiftConnectionConfig( + api_key=resolved.get("SIFT_API_KEY") or "", + grpc_url=resolved.get("SIFT_GRPC_URI") or "", + rest_url=resolved.get("SIFT_REST_URI") or "", + ) + ) + + +@pytest.fixture(scope="session") +def report_context( + sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config +) -> Generator[ReportContext | None, None, None]: + """Lazy session-scoped Sift ReportContext. + + The fixture is no longer autouse; it's instantiated on the first call to + ``request.getfixturevalue("report_context")``, which today happens inside + the gated ``step`` and ``module_substep`` fixtures. If every test in the + session is excluded via the marker gate, this fixture is never resolved + and no ReportContext (and no teardown subprocess) is created. + + The log file destination is controlled by ``--sift-test-results-log-file``. + Defaults to a temp file when not set. + + When ``--sift-test-results-check-connection`` is passed, this fixture will + yield ``None`` if the Sift client has no connection to the server. That mode + requires a ``client_has_connection`` fixture to be available in the session. + """ + if _check_connection_enabled(pytestconfig) and not _has_sift_connection(request): + yield None + return + yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig) + + +def _step_impl( + report_context: ReportContext, request: pytest.FixtureRequest +) -> Generator[NewStep | None, None, None]: + name = str(request.node.name) + existing_docstring = request.node.obj.__doc__ or None + with report_context.new_step( + name=name, description=existing_docstring, assertion_as_fail_not_error=False + ) as new_step: + yield new_step + if hasattr(request.node, "rep_call") and request.node.rep_call.excinfo: + new_step.update_step_from_result( + request.node.rep_call.excinfo, + request.node.rep_call.excinfo.value, + request.node.rep_call.excinfo.tb, + ) + + +@pytest.fixture(autouse=True) +def step( + request: pytest.FixtureRequest, + pytestconfig: pytest.Config, +) -> Generator[NewStep | None, None, None]: + """Create an outer step for the function when the Sift gate is on. + + Resolves the gate via `_sift_enabled_for(request.node, ini_default)`: + `sift_exclude` marker forces off, `sift_include` forces on, otherwise the + `sift_test_results_autouse` ini default applies. When on, requests the + session `report_context` lazily — the first gated test in the session + triggers its creation, subsequent gated tests reuse it. + """ + default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + if not _sift_enabled_for(request.node, default): + yield None + return + rc = request.getfixturevalue("report_context") + if rc is None: + yield None + return + yield from _step_impl(rc, request) + + +@pytest.fixture(scope="module", autouse=True) +def module_substep( + request: pytest.FixtureRequest, + pytestconfig: pytest.Config, +) -> Generator[NewStep | None, None, None]: + """Create a per-module step when at least one test in the module is gated on. + + Inspects the module's collected items rather than gating on a single marker, + so a module with mixed inclusion/exclusion still produces the module-level + step (individual `step` fixtures then decide per-test). When every test in + the module is excluded, the substep is skipped without requesting + `report_context`. + """ + default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + if not _module_has_included_tests(request, default): + yield None + return + rc = request.getfixturevalue("report_context") + if rc is None: + yield None + return + yield from _step_impl(rc, request) + + +@pytest.fixture(scope="session") +def client_has_connection(sift_client): + """Check if the SiftClient has a connection to the Sift server. + + Can be used to skip tests that require a connection to the Sift server, and is + consulted by the Sift fixtures when ``--sift-test-results-check-connection`` is set. + """ + try: + sift_client.ping.ping() + return True + except Exception: + return False diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py index e7a82866c..ea213056e 100644 --- a/python/lib/sift_client/util/test_results/__init__.py +++ b/python/lib/sift_client/util/test_results/__init__.py @@ -49,78 +49,98 @@ def main(self): cleanup() ``` -## Pytest Fixtures +## Pytest Plugin -The report context and steps can also be accessed in pytest by importing the `report_context` and `step` fixtures. +The pytest plugin lives at `sift_client.pytest_plugin`. Opt in +from your `conftest.py`: -### How to use: -- These fixtures are set to autouse and will automatically create a report and steps for each test function. - - If you want each module(file) to be marked as a step w/ each test as a substep, import the `module_substep` fixture as well. -- The `report_context` fixture requires a fixture `sift_client` returning an `SiftClient` instance to be passed in. +```python +# conftest.py +pytest_plugins = ["sift_client.pytest_plugin"] +``` -Note: FedRAMP users: report_context will log test results to a temp file to avoid API calls during test execution. If this is a shared environment, you can disable logging by passing ``--sift-test-results-log-file=false``. +By default, every test in the session produces a Sift report: one +`TestReport` per session, one step per test function (`step`), and one +parent step per test file (`module_substep`). The plugin also registers a +default `sift_client` fixture that reads `SIFT_API_KEY`, `SIFT_GRPC_URI`, +and `SIFT_REST_URI` from the environment. Override it by defining your own +`sift_client` fixture in your conftest. -#### Configuration +Note: FedRAMP users: results are buffered to a temp file and uploaded by a +subprocess at session end (no API calls during the run). Disable the buffer +entirely with `--sift-test-results-log-file=false` for inline uploads. -Import the `pytest_addoption` function to add configuration options for Test Results to the commandline or add the options to your pyproject.toml file (https://docs.pytest.org/en/stable/reference/customize.html#configuration). If ommitted, will use the default values described below. +### Controlling which tests produce reports -- Git metadata: Include git metadata (repo, branch, commit) in the test results. Default is True. You can disable it by passing `--no-sift-test-results-git-metadata`. -- Log file: Write test results to a file. This happens automatically but you can configure specify a specific log file by passing `--sift-test-results-log-file=` or disable logging by passing `--sift-test-results-log-file=false`. -- Check connection: Pass `--sift-test-results-check-connection` (off by default) to make the `report_context`, `step`, and `module_substep` fixtures no-op when the Sift client has no connection to the server. Requires a `client_has_connection` fixture to be available. +The autouse fixtures fire for every test by default. To narrow that: -###### Example at top of your test file or in your conftest.py file: +- Set `sift_test_results_autouse = false` in `pyproject.toml` to flip the + project default off, then opt tests back in below. +- `@pytest.mark.sift_include` forces reporting on for a test, class, or + module. `@pytest.mark.sift_exclude` forces it off. Closest marker wins. + `sift_exclude` beats `sift_include` when both apply. +- `pytestmark` at the class or module level inherits to every test in scope. +- For a whole directory, apply the marker in bulk from that directory's + `conftest.py`: ```python -import pytest +# tests/integration/conftest.py +from pathlib import Path -@pytest.fixture(scope="session") -def sift_client() -> SiftClient: - grpc_url = os.getenv("SIFT_GRPC_URI", "localhost:50051") - rest_url = os.getenv("SIFT_REST_URI", "localhost:8080") - api_key = os.getenv("SIFT_API_KEY", "") +import pytest - client = SiftClient(api_key=api_key, grpc_url=grpc_url, rest_url=rest_url) +_HERE = Path(__file__).parent - return client -from sift_client.util.test_results import * +def pytest_collection_modifyitems(config, items): + for item in items: + try: + item.path.relative_to(_HERE) + except ValueError: + continue + item.add_marker(pytest.mark.sift_include) ``` -###### Then in your test file: +#### Configuration -```python -# Because step was already imported and set autouse=True, this test will automatically get a step created for it. -def test_no_includes(): - assert condition, "Example failure" - -# Passing the fixtures to the test function allows you to take measurements or create substeps. -def test_example(report_context, step): - # This will add a measurement to the current step for this function - step.measure(name="Example Measurement", value=test_string_value, bounds="expected_string_value") - - with report_context.new_step(name="Example Step") as substep: - example_measurement = tlm.read(channel_name) - substep.measure(name="Substep Measurement", value=example_measurement, bounds=(min=74.9, max=75.1)) +CLI options registered by the plugin: + +- `--sift-test-results-log-file`: Path to write the JSONL log file. `true` + (default) auto-creates a temp file. `false` or `none` disables logging. + Any other value is treated as a file path. +- `--no-sift-test-results-git-metadata`: Exclude git metadata (repo, branch, + commit) from the test report. Included by default. +- `--sift-test-results-check-connection`: Make `report_context`, `step`, and + `module_substep` no-op when the client has no connection. Requires a + `client_has_connection` fixture (the plugin ships a default). + +Each option has a matching ini key for per-project configuration under +``[tool.pytest.ini_options]`` in ``pyproject.toml`` (or ``[pytest]`` in +``pytest.ini``). CLI flags override ini values. The +``sift_test_results_autouse`` ini key (bool, default ``true``) sets the +project-wide default for the gate described above. The default +``sift_client`` fixture reads ``sift_grpc_uri`` and ``sift_rest_uri`` as +fallbacks when the corresponding env vars are unset (env vars win when +both are set). ``SIFT_API_KEY`` is env-only. Load it from a ``.env`` file +via the ``pytest-dotenv`` plugin or inject it via your CI secret manager. + +```toml +[tool.pytest.ini_options] +sift_test_results_autouse = false +sift_test_results_log_file = "false" +sift_test_results_check_connection = true +sift_test_results_git_metadata = false +sift_grpc_uri = "your-org.sift.example:443" +sift_rest_uri = "https://your-org.sift.example" ``` + +To disable the plugin for a single run: +`pytest -p no:sift_client.pytest_plugin`. """ from .context_manager import NewStep, ReportContext -from .pytest_util import ( - client_has_connection, - module_substep, - pytest_addoption, - pytest_runtest_makereport, - report_context, - step, -) __all__ = [ "NewStep", "ReportContext", - "client_has_connection", - "module_substep", - "pytest_addoption", - "pytest_runtest_makereport", - "report_context", - "step", ] diff --git a/python/lib/sift_client/util/test_results/pytest_util.py b/python/lib/sift_client/util/test_results/pytest_util.py deleted file mode 100644 index a96a47fb3..000000000 --- a/python/lib/sift_client/util/test_results/pytest_util.py +++ /dev/null @@ -1,206 +0,0 @@ -from __future__ import annotations - -from datetime import datetime, timezone -from pathlib import Path -from typing import TYPE_CHECKING, Any, Generator - -import pytest - -from sift_client.sift_types.test_report import TestStatus -from sift_client.util.test_results import ReportContext - -if TYPE_CHECKING: - from sift_client.client import SiftClient - from sift_client.util.test_results.context_manager import NewStep - -REPORT_CONTEXT: ReportContext | None = None - - -def pytest_addoption(parser: pytest.Parser) -> None: - """Register Sift-specific command-line options.""" - parser.addoption( - "--sift-test-results-log-file", - default=None, - help="Path to write the Sift test result log file. " - "Use 'true' (default) to auto-create a temp file, " - "False, 'false', or 'none' to disable logging, " - "or a file path to write to a specific location.", - ) - parser.addoption( - "--no-sift-test-results-git-metadata", - action="store_false", - dest="sift_test_results_git_metadata", - default=True, - help="Exclude git metadata from the Sift test results. " - "Git metadata (repo, branch, commit) is included by default.", - ) - parser.addoption( - "--sift-test-results-check-connection", - action="store_true", - default=False, - help="Skip the sift test-result fixtures (report_context, step, module_substep) " - "when the Sift client has no connection to the server. Requires a " - "`client_has_connection` fixture to be available in the test session.", - ) - - -def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | None: - """Determine log_file value from --sift-test-results-log-file option.""" - raw = None - if pytestconfig is not None: - raw = pytestconfig.getoption("--sift-test-results-log-file", default=None) - if raw is None: - return True - lower = str(raw).lower() - if lower in ("true", "1"): - return True - if lower in ("false", "none"): - return None - return Path(raw) - - -@pytest.hookimpl(tryfirst=True, hookwrapper=True) -def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): - """You should import this hook to capture any AssertionErrors that occur during the test. If not included, any assert failures in a test will not automatically fail the step.""" - outcome = yield - report = outcome.get_result() - if report.outcome == "skipped": - # Skipped steps won't invoke the method/fixtures at all, so we need to manually record a step. - if REPORT_CONTEXT: - with REPORT_CONTEXT.new_step(name=item.name) as new_step: - new_step.current_step.update({"status": TestStatus.SKIPPED}) - setattr(item, "rep_" + report.when, call) - - -def _report_context_impl( - sift_client: SiftClient, - request: pytest.FixtureRequest, - pytestconfig: pytest.Config | None = None, -) -> Generator[ReportContext | None, None, None]: - args = request.config.invocation_params.args - test_path = Path(args[0]) if args else None - if test_path is not None and test_path.exists(): - base_name = test_path.name - test_case: Path | str = test_path - else: - base_name = "pytest " + " ".join(args) if args else "pytest" - test_case = base_name - log_file = _resolve_log_file(pytestconfig) - include_git_metadata = ( - bool(pytestconfig.getoption("sift_test_results_git_metadata", default=True)) - if pytestconfig - else True - ) - with ReportContext( - sift_client, - name=f"{base_name} {datetime.now(timezone.utc).isoformat()}", - test_case=str(test_case), - log_file=log_file, - include_git_metadata=include_git_metadata, - ) as context: - # Set a global so we can access this in pytest hooks. - global REPORT_CONTEXT - REPORT_CONTEXT = context - yield context - - -def _check_connection_enabled(pytestconfig: pytest.Config | None) -> bool: - """Return True when the caller opted into `--sift-test-results-check-connection`.""" - if pytestconfig is None: - return False - return bool(pytestconfig.getoption("sift_test_results_check_connection", default=False)) - - -def _has_sift_connection(request: pytest.FixtureRequest) -> bool: - """Resolve the `client_has_connection` fixture lazily; only called when the check is enabled.""" - return bool(request.getfixturevalue("client_has_connection")) - - -@pytest.fixture(scope="session", autouse=True) -def report_context( - sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config -) -> Generator[ReportContext | None, None, None]: - """Create a report context for the session. - - The log file destination is controlled by ``--sift-test-results-log-file``. - Defaults to a temp file when not set. - - When ``--sift-test-results-check-connection`` is passed, this fixture will no-op - (yield None) if the Sift client has no connection to the server. That mode - requires a ``client_has_connection`` fixture to be available in the session. - """ - if _check_connection_enabled(pytestconfig) and not _has_sift_connection(request): - yield None - return - yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig) - - -def _step_impl( - report_context: ReportContext, request: pytest.FixtureRequest -) -> Generator[NewStep | None, None, None]: - name = str(request.node.name) - existing_docstring = request.node.obj.__doc__ or None - with report_context.new_step( - name=name, description=existing_docstring, assertion_as_fail_not_error=False - ) as new_step: - yield new_step - if hasattr(request.node, "rep_call") and request.node.rep_call.excinfo: - new_step.update_step_from_result( - request.node.rep_call.excinfo, - request.node.rep_call.excinfo.value, - request.node.rep_call.excinfo.tb, - ) - - -@pytest.fixture(autouse=True) -def step( - report_context: ReportContext | None, - request: pytest.FixtureRequest, - pytestconfig: pytest.Config, -) -> Generator[NewStep | None, None, None]: - """Create an outer step for the function. - - No-ops when ``--sift-test-results-check-connection`` is set and the client - has no connection (or when the session-scoped ``report_context`` resolved to None). - """ - if report_context is None or ( - _check_connection_enabled(pytestconfig) and not _has_sift_connection(request) - ): - yield None - return - yield from _step_impl(report_context, request) - - -@pytest.fixture(scope="module", autouse=True) -def module_substep( - report_context: ReportContext | None, - request: pytest.FixtureRequest, - pytestconfig: pytest.Config, -) -> Generator[NewStep | None, None, None]: - """Create a step per module. - - No-ops when ``--sift-test-results-check-connection`` is set and the client - has no connection (or when the session-scoped ``report_context`` resolved to None). - """ - if report_context is None or ( - _check_connection_enabled(pytestconfig) and not _has_sift_connection(request) - ): - yield None - return - yield from _step_impl(report_context, request) - - -@pytest.fixture(scope="session") -def client_has_connection(sift_client): - """Check if the SiftClient has a connection to the Sift server. - - Can be used to skip tests that require a connection to the Sift server, and is - consulted by the Sift fixtures when ``--sift-test-results-check-connection`` is set. - """ - has_connection = False - try: - sift_client.ping.ping() - has_connection = True - except Exception: - has_connection = False - return has_connection diff --git a/python/pyproject.toml b/python/pyproject.toml index 403c89bf8..79afdf464 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -409,6 +409,15 @@ select = [ env_files = [ ".env" ] +# `pytester` is registered globally because pytest 8+ disallows `pytest_plugins` +# in non-top-level conftests. Only the plugin test suite uses it; activating it +# globally is harmless since the fixture is opt-in. +addopts = "-p pytester" +# The Sift plugin is loaded for the whole project via `python/conftest.py`. +# The autouse gate defaults to off here so unit tests don't use it. The +# integration subtree (lib/sift_client/_tests/util/) opts back in via +# `pytest.mark.sift_include` applied in its conftest. +sift_test_results_autouse = false testpaths = [ "lib/sift_py", "lib/sift_client/_tests", From 74011c698be25bdf8322c318cdde7ab50542685d Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Thu, 21 May 2026 14:40:46 -0700 Subject: [PATCH 03/19] Python(feat): pytest graceful handling missing connection (#569) --- python/docs/examples/pytest_plugin.md | 230 ++++++++--------- .../low_level_wrappers/test_results.py | 33 ++- python/lib/sift_client/_tests/conftest.py | 12 +- .../_tests/pytest_plugin/conftest.py | 9 + .../pytest_plugin/test_configuration.py | 106 +++++--- .../_tests/pytest_plugin/test_credentials.py | 8 +- .../_tests/pytest_plugin/test_disabled.py | 183 +++++++++++++ .../_tests/pytest_plugin/test_offline.py | 135 ++++++++++ .../_tests/pytest_plugin/test_online.py | 133 ++++++++++ .../lib/sift_client/_tests/util/conftest.py | 4 +- .../_tests/util/test_report_context.py | 95 +++++++ python/lib/sift_client/client.py | 5 + python/lib/sift_client/pytest_plugin.py | 243 +++++++++++++----- .../lib/sift_client/resources/test_results.py | 23 +- .../sift_types/_mixins/simulated.py | 32 +++ .../lib/sift_client/sift_types/test_report.py | 13 +- .../sift_client/util/test_results/__init__.py | 28 +- .../sift_client/util/test_results/bounds.py | 111 ++++++-- .../util/test_results/context_manager.py | 131 ++++++---- python/pyproject.toml | 18 +- python/scripts/dev | 3 +- 21 files changed, 1203 insertions(+), 352 deletions(-) create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_disabled.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_offline.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_online.py create mode 100644 python/lib/sift_client/_tests/util/test_report_context.py create mode 100644 python/lib/sift_client/sift_types/_mixins/simulated.py diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index 3557dd9c7..2ac298256 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -88,21 +88,22 @@ def sift_client() -> SiftClient: | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | | `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, and `current_step`. | | `module_substep` | fixture (autouse) | module | One step per test file with each function nested as a substep. | -| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted only when `--sift-test-results-check-connection` is set. | +| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | ### CLI options | Flag | Default | Effect | |---|---|---| -| `--sift-test-results-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. | -| `--no-sift-test-results-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | -| `--sift-test-results-check-connection` | off | Make `report_context`, `step`, and `module_substep` no-op (yield `None`) when `client_has_connection` is `False`. Lets the same suite run locally without a Sift backend. | +| `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. | +| `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). | +| `--sift-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. | +| `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | These can be passed permanently via `addopts`: ```ini title="pytest.ini" [pytest] -addopts = --sift-test-results-check-connection +addopts = --sift-offline ``` Or set the matching ini key directly (recommended for stable per-project @@ -112,10 +113,11 @@ CLI flags, when passed, override the ini values. | Ini key | Type | Equivalent CLI flag | |---|---|---| -| `sift_test_results_log_file` | string (`true` / `false` / `none` / path) | `--sift-test-results-log-file=` | -| `sift_test_results_git_metadata` | bool (default `true`) | `--no-sift-test-results-git-metadata` (sets to `false`) | -| `sift_test_results_check_connection` | bool (default `false`) | `--sift-test-results-check-connection` | -| `sift_test_results_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | +| `sift_log_file` | string (`true` / `false` / `none` / path) | `--sift-log-file=` | +| `sift_git_metadata` | bool (default `true`) | `--no-sift-git-metadata` (sets to `false`) | +| `sift_offline` | bool (default `false`) | `--sift-offline` | +| `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) | +| `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | The default `sift_client` fixture reads its two URIs from environment first and falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is @@ -133,18 +135,16 @@ flags for credentials. ```toml title="pyproject.toml" [tool.pytest.ini_options] -sift_test_results_check_connection = true -sift_test_results_log_file = "false" -sift_test_results_git_metadata = false +sift_offline = true +sift_git_metadata = false sift_grpc_uri = "your-org.sift.example:443" sift_rest_uri = "https://your-org.sift.example" ``` ```ini title="pytest.ini" [pytest] -sift_test_results_check_connection = true -sift_test_results_log_file = false -sift_test_results_git_metadata = false +sift_offline = true +sift_git_metadata = false sift_grpc_uri = your-org.sift.example:443 sift_rest_uri = https://your-org.sift.example ``` @@ -171,7 +171,7 @@ into `os.environ` before tests run. glue is needed. !!! warning "FedRAMP / shared environments" - Pass `--sift-test-results-log-file=false` (or set the ini key to `"false"`) + Pass `--sift-log-file=false` (or set the ini key to `"false"`) to skip the temp file + worker pipeline. Create/update calls then run inline against the API instead of being deferred through a subprocess. @@ -184,7 +184,7 @@ Every report the plugin creates includes: - `system_operator`: `getpass.getuser()`. - `start_time` / `end_time`: set on session enter/exit. - `status`: starts at `IN_PROGRESS`, finalized to `PASSED` or `FAILED` on session exit (failure if any step failed or an exception escaped the session). -- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-test-results-git-metadata` or when not in a git repo. +- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-git-metadata` or when not in a git repo. Example invocations: @@ -207,7 +207,7 @@ useful when a repo holds tests that you don't want included in the Sift test rep | Setting | Effect | |---------------------------------------------------------|----------------------------------------------------------------------------------------------| -| `sift_test_results_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. | +| `sift_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. | | `@pytest.mark.sift_include` on a test, class, or module | Force reporting on for that scope, regardless of the project default. | | `@pytest.mark.sift_exclude` on a test, class, or module | Force reporting off for that scope, regardless of the project default. | @@ -237,7 +237,7 @@ def pytest_collection_modifyitems(config, items): ``` This applies `sift_include` to every test collected under `tests/example/`. -Combine with `sift_test_results_autouse = false` in `pyproject.toml` for +Combine with `sift_autouse = false` in `pyproject.toml` for opting in to specific directories. `pytest_collection_modifyitems` receives every item in the session, not just @@ -657,151 +657,129 @@ The `unit` argument is a free-form string label (e.g. `"V"`, `"C"`, `"psi"`). pytest # Pin the log file so you can replay it later if the import worker dies -pytest --sift-test-results-log-file=./sift-results.jsonl +pytest --sift-log-file=./sift-results.jsonl ``` -See [Running offline](#running-offline) for the same suite running with or -without a reachable Sift server. +See [Running modes](#running-modes) for the offline and disabled flags +that let the same suite run without (or without contacting) Sift. -## Running offline +## Running modes -The plugin supports two offline workflows, depending on whether you want a -Sift report at all when the test environment can't reach Sift. The first -turns the plugin into a no-op when the server is unreachable. The second -keeps the plugin running normally and writes every create/update to a local -JSONL file that you upload from a connected machine afterward. +The plugin runs in one of three modes, picked at invocation: -| Pattern | Flag | Runtime behavior | Follow-up | -|---|---|---|---| -| Skip when offline | `--sift-test-results-check-connection` | Fixtures yield `None`, no log file, no report. Pytest still reports pass/fail. | None. | -| Capture locally, upload later | `--sift-test-results-log-file=` | Plugin writes every create/update to the JSONL file. | `import-test-result-log ` from a connected machine. | +| Mode | Flag | Network | Log file | `step.measure(...)` | When to use | +|---|---|---|---|---|---| +| Online (default) | _(none)_ | yes (pings at session start, aborts if it fails) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant | +| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, air-gapped labs, CI without network | +| Disabled | `--sift-disabled` | none | none | bounds eval; returns a real bool | local dev or CI that doesn't have (or want) Sift | -Pattern 1 suits laptop dev and CI without Sift secrets. Pattern 2 suits -field tests, vehicles on remote sites, and air-gapped labs. +Pass both flags? Disabled wins. It's the "skip Sift entirely" hammer and +supersedes everything else. -### Pattern 1: skip when offline +### Online mode (default) -`--sift-test-results-check-connection` makes the plugin ping Sift once at -session start through the `client_has_connection` fixture (which by default -calls `sift_client.ping.ping()`). On a failed ping, `report_context`, -`step`, and `module_substep` yield `None` for the rest of the session. -Pytest still runs the tests and still reports pass/fail. +`report_context` resolves `client_has_connection` at session start. The +default implementation calls `sift_client.ping.ping()`. A failed ping +aborts the whole session with `pytest.UsageError` and points at +`--sift-offline` and `--sift-disabled` as escape hatches. -```bash -pytest --sift-test-results-check-connection -``` +This is loud on purpose. A CI run that silently no-ops on a flaky network +won't get noticed until somebody goes looking for the report, which is +usually weeks later, which is usually too late. -```ini title="pytest.ini" -[pytest] -addopts = --sift-test-results-check-connection -``` +With the default `--sift-log-file` setting on, create/update calls are +written to a JSONL log file during the run and an +`import-test-result-log --incremental` worker replays them against Sift +in the background. If the worker crashes mid-session (connection failure, +API error) or is still draining its backlog at session end, the failure +is logged at session end with a `replay-test-result-log` command for +manual recovery — test outcomes are unaffected and the local log file is +preserved. Pass `--sift-log-file=false` to make every create/update +synchronous against the API instead. -#### Handling `None` in tests +#### Overriding the connection check -Calls on `step` raise `AttributeError` when it's `None`, so tests that take -`step` as a parameter need a guard. The cleanest fix is to shadow the -plugin's `step` fixture in your conftest and turn the `None` case into an -automatic skip. +Override `client_has_connection` when ping isn't the right signal, for +example a token cache that's only warm when authenticated: ```python title="conftest.py" -import pytest +from pathlib import Path -pytest_plugins = ["sift_client.pytest_plugin"] +import pytest -@pytest.fixture(autouse=True) -def step(step): - if step is None: - pytest.skip("Sift unavailable") - yield step +@pytest.fixture(scope="session") +def client_has_connection(sift_client) -> bool: + return Path("~/.sift-token-cache").expanduser().is_file() ``` -The `step` parameter on the override resolves to the plugin's fixture, not -to the override itself. `autouse=True` is required so the skip applies to -tests that don't request `step` directly. The same shadowing trick works -for `module_substep` and `report_context`. +The override is ignored under `--sift-offline` and `--sift-disabled`. -For one-off tests that don't share a conftest, an inline guard works just -as well: +### Offline mode (`--sift-offline`) -```python -def test_battery_voltage(step): - if step is None: - pytest.skip("Sift unavailable") - step.measure(name="battery_voltage", value=4.97, bounds={"min": 4.8, "max": 5.2}) -``` +Same fixtures, same `step.measure(...)` semantics as online. The +difference is where the writes go: every create/update lands in a JSONL +log file instead of hitting the Sift API. The session-start ping is +skipped, missing `SIFT_*` env vars are tolerated (placeholders are +filled), and the replay worker (`import-test-result-log --incremental`) +does not get spawned at session end. -If you'd rather have tests pass through silently than skip them, wrap the -calls in a helper that no-ops on `None`: - -```python -def safe_measure(step, **kwargs): - if step is None: - return True - return step.measure(**kwargs) +```bash +pytest --sift-offline --sift-log-file=./run.jsonl ``` -#### Overriding the connection check +Once you have connectivity, replay it: -The default `client_has_connection` fixture calls `sift_client.ping.ping()`. -Override it in your conftest if pinging is the wrong signal for your -environment, for example a token cache that's only warm when authenticated: +```bash +import-test-result-log ./run.jsonl +``` -```python title="conftest.py" -from pathlib import Path +That replay creates the report, steps, and measurements against Sift. +See [Replaying a saved log file](#replaying-a-saved-log-file) for cleanup +and the incremental flag. -import pytest +`--sift-log-file=none` is rejected when offline is set. The +log file is the only sink in offline mode, so without it the results are +gone. +!!! warning "Pin the log path" + Without `--sift-log-file=`, offline mode writes to + a `tempfile.NamedTemporaryFile` and only surfaces the path via a + `logger.info` line. Pin a known path when you intend to replay later. -@pytest.fixture(scope="session") -def client_has_connection(sift_client) -> bool: - return Path("~/.sift-token-cache").expanduser().is_file() -``` +### Disabled mode (`--sift-disabled`) -The plugin only consults this fixture when `--sift-test-results-check-connection` -is set, so an unused override has no effect on a normal run. +The plugin stays loaded with the same fixtures and markers as the other +modes. Nothing contacts Sift, no log file is written, and no `SIFT_*` +env vars are required. `step.measure(...)`, `step.measure_avg(...)`, +`step.measure_all(...)`, `step.substep(...)`, and +`report_context.report.update({...})` all behave normally — bounds +evaluate and you get a real pass/fail boolean back. -### Pattern 2: capture locally, upload later +Entities returned in disabled mode report `is_simulated == True` (on +`TestReport`, `TestStep`, `TestMeasurement`, and `ReportContext`) so +consumers and tests can branch on provenance. Offline-mode entities +also report `is_simulated == True`. -This pattern keeps the plugin running normally even when Sift is -unreachable. The plugin writes to the log file, the worker dies on connect, -and the file is left on disk for you to upload later. Pin the log file path -so you can find it afterward, and don't pass -`--sift-test-results-check-connection`, which would suppress the logging -this pattern relies on. +How to turn it on, in the order most projects pick: ```bash -pytest --sift-test-results-log-file=./run.jsonl -``` - -What happens during the run: +# In an .envrc, devcontainer, or CI job config +export SIFT_DISABLED=1 -- Every report, step, and measurement create/update is written to - `run.jsonl`. The plugin doesn't contact the Sift API for any of these - calls; they return simulated responses keyed by UUIDs that the replay - later maps to real IDs. -- The `import-test-result-log --incremental` worker subprocess starts and - exits early when it can't reach Sift. The session does not fail when the - worker exits before the run ends. -- Tests run against a real `step` fixture, so `step.measure(...)`, - substeps, parametrize, fixtures, and `module_substep` behave exactly as - they do online. No conftest changes are needed. +# Per-invocation kill-switch +pytest --sift-disabled -Once you have connectivity, replay the file: - -```bash -import-test-result-log ./run.jsonl +# Per-project default (uncommon; online is usually the right default) +# pyproject.toml: +# [tool.pytest.ini_options] +# sift_disabled = true ``` -The replay creates the report, steps, and measurements against Sift in one -batch. See [Replaying a saved log file](#replaying-a-saved-log-file) for -details on cleanup and the incremental flag. - -!!! warning "Pin the log path for Pattern 2" - Without `--sift-test-results-log-file=`, the plugin writes to a - `tempfile.NamedTemporaryFile` and only surfaces the path via a - `logger.info` line. Always pin a known path when you intend to replay - the file later. +Good fit for local dev without Sift credentials. Also for library +consumers who don't have a Sift tenant. Also useful in CI for runs that +shouldn't add noise to the report stream, like a PR job re-running the +same suite five times in a row. ## Replaying a saved log file diff --git a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py index d15f86c48..ff0c2b515 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py @@ -3,7 +3,7 @@ import logging import uuid from pathlib import Path -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, TypeVar, cast from google.protobuf import json_format from sift.test_reports.v1.test_reports_pb2 import ( @@ -68,6 +68,9 @@ logger = logging.getLogger(__name__) +_EntityT = TypeVar("_EntityT", TestReport, TestStep, TestMeasurement) + + class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient): """Low-level client for the TestResultsAPI. @@ -82,6 +85,16 @@ def __init__(self, grpc_client: GrpcClient): """ super().__init__(grpc_client) + @staticmethod + def _mark_simulated(instance: _EntityT) -> _EntityT: + """Stamp an entity as having been produced by the simulate path. + + Mirrors the ``__dict__`` write used by ``BaseType._apply_client_to_instance`` + to bypass pydantic's frozen-model guard. + """ + instance.__dict__["_simulated"] = True + return instance + @staticmethod def simulate_create_test_report_response( request: CreateTestReportRequest, @@ -387,7 +400,7 @@ async def create_test_report( request, response_id=simulated_proto.test_report_id, ) - return TestReport._from_proto(simulated_proto) + return self._mark_simulated(TestReport._from_proto(simulated_proto)) response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestReport(request) grpc_test_report = cast("CreateTestReportResponse", response).test_report @@ -505,7 +518,9 @@ async def update_test_report( if log_file is not None or simulate: if log_file is not None: log_request_to_file(log_file, "UpdateTestReport", request) - return self.simulate_update_test_report_response(request, existing=existing) + return self._mark_simulated( + self.simulate_update_test_report_response(request, existing=existing) + ) response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestReport(request) grpc_test_report = cast("UpdateTestReportResponse", response).test_report @@ -560,7 +575,7 @@ async def create_test_step( request, response_id=simulated_proto.test_step_id, ) - return TestStep._from_proto(simulated_proto) + return self._mark_simulated(TestStep._from_proto(simulated_proto)) response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestStep(request) grpc_test_step = cast("CreateTestStepResponse", response).test_step @@ -661,7 +676,9 @@ async def update_test_step( if log_file is not None or simulate: if log_file is not None: log_request_to_file(log_file, "UpdateTestStep", request) - return self.simulate_update_test_step_response(request, existing=existing) + return self._mark_simulated( + self.simulate_update_test_step_response(request, existing=existing) + ) response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestStep(request) grpc_test_step = cast("UpdateTestStepResponse", response).test_step @@ -716,7 +733,7 @@ async def create_test_measurement( request, response_id=simulated_proto.measurement_id, ) - return TestMeasurement._from_proto(simulated_proto) + return self._mark_simulated(TestMeasurement._from_proto(simulated_proto)) response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestMeasurement( request @@ -861,7 +878,9 @@ async def update_test_measurement( if log_file is not None or simulate: if log_file is not None: log_request_to_file(log_file, "UpdateTestMeasurement", request) - return self.simulate_update_test_measurement_response(request, existing=existing) + return self._mark_simulated( + self.simulate_update_test_measurement_response(request, existing=existing) + ) response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestMeasurement( request diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py index 79b079d39..0b939ae39 100644 --- a/python/lib/sift_client/_tests/conftest.py +++ b/python/lib/sift_client/_tests/conftest.py @@ -79,5 +79,13 @@ def ci_pytest_tag(sift_client): def pytest_configure(config: pytest.Config) -> None: - """Enable the Sift connection-check mode for the fixtures used in this test suite since we run w/ mock client in non-integration tests.""" - config.option.sift_test_results_check_connection = True + """Pick a Sift plugin mode based on whether integration tests are running. + + Integration runs (``-m integration``) stay online with the default + log-file pipeline enabled so CI exercises the JSONL write + import + worker replay path that production users hit. Every other run defaults + to ``--sift-disabled`` so unit tests don't need credentials. + """ + is_integration_run = "integration" in (config.option.markexpr or "") + if not is_integration_run: + config.option.sift_disabled = True diff --git a/python/lib/sift_client/_tests/pytest_plugin/conftest.py b/python/lib/sift_client/_tests/pytest_plugin/conftest.py index 1fbd61e46..783a12bf4 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/conftest.py +++ b/python/lib/sift_client/_tests/pytest_plugin/conftest.py @@ -25,6 +25,15 @@ import pytest +_SIFT_ENV_VARS = ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI", "SIFT_DISABLED") + + +@pytest.fixture +def clear_sift_env(monkeypatch: pytest.MonkeyPatch) -> None: + """Unset all ``SIFT_*`` environment variables for the duration of the test.""" + for name in _SIFT_ENV_VARS: + monkeypatch.delenv(name, raising=False) + @pytest.fixture def write_plugin_conftest(pytester: pytest.Pytester) -> Callable[[], None]: diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py index 9b9be2d63..4efb9f554 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py @@ -34,7 +34,7 @@ def test_ini_log_file_none( pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_log_file = "none" + sift_log_file = "none" """ ) pytester.makepyfile("def test_noop(): pass") @@ -46,7 +46,7 @@ def test_python_false_disables_log_file( pytester: pytest.Pytester, write_probe_conftest: Callable[[str], None], ) -> None: - """`config.option.sift_test_results_log_file = False` disables logging. + """`config.option.sift_log_file = False` disables logging. Conftests use this pattern (see lib/sift_client/_tests/util/conftest.py) to opt their subtree out of log-file mode. Regression test for the @@ -55,7 +55,7 @@ def test_python_false_disables_log_file( """ write_probe_conftest( """ - config.option.sift_test_results_log_file = False + config.option.sift_log_file = False from sift_client.pytest_plugin import _resolve_log_file print("RESOLVED:", _resolve_log_file(config)) """, @@ -80,33 +80,54 @@ def test_ini_log_file_path( pytester.makepyprojecttoml( f""" [tool.pytest.ini_options] - sift_test_results_log_file = "{log_path}" + sift_log_file = "{log_path}" """ ) pytester.makepyfile("def test_noop(): pass") result = pytester.runpytest_subprocess("-s", "--co") result.stdout.fnmatch_lines([f"RESOLVED: {log_path}"]) - def test_ini_check_connection_true( + def test_ini_offline_true( self, pytester: pytest.Pytester, write_probe_conftest: Callable[[str], None], ) -> None: write_probe_conftest( """ - from sift_client.pytest_plugin import _check_connection_enabled - print("CHECK:", _check_connection_enabled(config)) + from sift_client.pytest_plugin import _is_offline + print("OFFLINE:", _is_offline(config)) """, ) pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_check_connection = true + sift_offline = true """ ) pytester.makepyfile("def test_noop(): pass") result = pytester.runpytest_subprocess("-s", "--co") - result.stdout.fnmatch_lines(["CHECK: True"]) + result.stdout.fnmatch_lines(["OFFLINE: True"]) + + def test_ini_disabled_true( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + write_probe_conftest( + """ + from sift_client.pytest_plugin import _is_disabled + print("DISABLED:", _is_disabled(config)) + """, + ) + pytester.makepyprojecttoml( + """ + [tool.pytest.ini_options] + sift_disabled = true + """ + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co") + result.stdout.fnmatch_lines(["DISABLED: True"]) def test_ini_git_metadata_false( self, @@ -115,13 +136,13 @@ def test_ini_git_metadata_false( ) -> None: write_probe_conftest( """ - print("INI_GIT:", config.getini("sift_test_results_git_metadata")) + print("INI_GIT:", config.getini("sift_git_metadata")) """, ) pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_git_metadata = false + sift_git_metadata = false """ ) pytester.makepyfile("def test_noop(): pass") @@ -145,37 +166,51 @@ def test_cli_overrides_ini( pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_log_file = "none" + sift_log_file = "none" """ ) pytester.makepyfile("def test_noop(): pass") - result = pytester.runpytest_subprocess( - "-s", "--co", f"--sift-test-results-log-file={cli_path}" - ) + result = pytester.runpytest_subprocess("-s", "--co", f"--sift-log-file={cli_path}") result.stdout.fnmatch_lines([f"RESOLVED: {cli_path}"]) - def test_cli_check_connection_flag( + def test_cli_offline_flag( + self, + pytester: pytest.Pytester, + write_probe_conftest: Callable[[str], None], + ) -> None: + """The ``--sift-offline`` CLI flag flips the resolver to True.""" + write_probe_conftest( + """ + from sift_client.pytest_plugin import _is_offline + print("OFFLINE:", _is_offline(config)) + """, + ) + pytester.makepyfile("def test_noop(): pass") + result = pytester.runpytest_subprocess("-s", "--co", "--sift-offline") + result.stdout.fnmatch_lines(["OFFLINE: True"]) + + def test_cli_disabled_flag( self, pytester: pytest.Pytester, write_probe_conftest: Callable[[str], None], ) -> None: - """The ``--sift-test-results-check-connection`` CLI flag flips the resolver to True.""" + """The ``--sift-disabled`` CLI flag flips the resolver to True.""" write_probe_conftest( """ - from sift_client.pytest_plugin import _check_connection_enabled - print("CHECK:", _check_connection_enabled(config)) + from sift_client.pytest_plugin import _is_disabled + print("DISABLED:", _is_disabled(config)) """, ) pytester.makepyfile("def test_noop(): pass") - result = pytester.runpytest_subprocess("-s", "--co", "--sift-test-results-check-connection") - result.stdout.fnmatch_lines(["CHECK: True"]) + result = pytester.runpytest_subprocess("-s", "--co", "--sift-disabled") + result.stdout.fnmatch_lines(["DISABLED: True"]) def test_cli_no_git_metadata_flag( self, pytester: pytest.Pytester, write_probe_conftest: Callable[[str], None], ) -> None: - """The ``--no-sift-test-results-git-metadata`` CLI flag flips git_metadata to False. + """The ``--no-sift-git-metadata`` CLI flag flips git_metadata to False. Guards the negation flag's ``dest`` binding: the flag name doesn't match the ini key, so a broken ``dest`` would silently fall back to the ini @@ -183,11 +218,11 @@ def test_cli_no_git_metadata_flag( """ write_probe_conftest( """ - print("CLI_GIT:", config.getoption("sift_test_results_git_metadata")) + print("CLI_GIT:", config.getoption("sift_git_metadata")) """, ) pytester.makepyfile("def test_noop(): pass") - result = pytester.runpytest_subprocess("-s", "--co", "--no-sift-test-results-git-metadata") + result = pytester.runpytest_subprocess("-s", "--co", "--no-sift-git-metadata") result.stdout.fnmatch_lines(["CLI_GIT: False"]) def test_defaults_when_neither_set( @@ -198,12 +233,14 @@ def test_defaults_when_neither_set( write_probe_conftest( """ from sift_client.pytest_plugin import ( - _check_connection_enabled, + _is_disabled, + _is_offline, _resolve_log_file, ) print("RESOLVED:", _resolve_log_file(config)) - print("CHECK:", _check_connection_enabled(config)) - print("INI_GIT:", config.getini("sift_test_results_git_metadata")) + print("OFFLINE:", _is_offline(config)) + print("DISABLED:", _is_disabled(config)) + print("INI_GIT:", config.getini("sift_git_metadata")) """, ) pytester.makepyfile("def test_noop(): pass") @@ -211,7 +248,8 @@ def test_defaults_when_neither_set( result.stdout.fnmatch_lines( [ "RESOLVED: True", - "CHECK: False", + "OFFLINE: False", + "DISABLED: False", "INI_GIT: True", ] ) @@ -238,7 +276,7 @@ def report_context(): class TestAutouseGate: - """`sift_include` / `sift_exclude` markers and the `sift_test_results_autouse` ini gate.""" + """`sift_include` / `sift_exclude` markers and the `sift_autouse` ini gate.""" def test_default_ini_true_activates(self, pytester: pytest.Pytester) -> None: """Plugin default (ini absent) keeps the autouse fixtures active.""" @@ -253,12 +291,12 @@ def test_inner(step): result.assert_outcomes(passed=1) def test_default_ini_false_skips(self, pytester: pytest.Pytester) -> None: - """`sift_test_results_autouse = false` makes the autouse fixtures no-op by default.""" + """`sift_autouse = false` makes the autouse fixtures no-op by default.""" pytester.makeconftest(_GATE_INNER_CONFTEST) pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_autouse = false + sift_autouse = false """ ) pytester.makepyfile( @@ -276,7 +314,7 @@ def test_sift_include_marker_forces_on(self, pytester: pytest.Pytester) -> None: pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_autouse = false + sift_autouse = false """ ) pytester.makepyfile( @@ -328,7 +366,7 @@ def test_module_pytestmark_inherits(self, pytester: pytest.Pytester) -> None: pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_autouse = false + sift_autouse = false """ ) pytester.makepyfile( @@ -359,7 +397,7 @@ def test_bulk_apply_via_conftest_hook(self, pytester: pytest.Pytester) -> None: pytester.makepyprojecttoml( """ [tool.pytest.ini_options] - sift_test_results_autouse = false + sift_autouse = false """ ) included = pytester.mkdir("included_subtree") diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py index 9ee628e69..3f6d22a6e 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py @@ -31,8 +31,8 @@ def test_uris_from_ini( [tool.pytest.ini_options] sift_grpc_uri = "ini-grpc:1234" sift_rest_uri = "https://ini-rest" - sift_test_results_check_connection = true - sift_test_results_log_file = "false" + sift_offline = true + """ ) pytester.makepyfile( @@ -62,8 +62,8 @@ def test_env_var_overrides_ini_uri( [tool.pytest.ini_options] sift_grpc_uri = "ini-grpc:1234" sift_rest_uri = "https://ini-rest" - sift_test_results_check_connection = true - sift_test_results_log_file = "false" + sift_offline = true + """ ) pytester.makepyfile( diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py new file mode 100644 index 000000000..cba4bc1ee --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py @@ -0,0 +1,183 @@ +"""Tests for ``--sift-disabled`` mode. + +Disabled mode skips Sift entirely. Autouse fixtures yield stub objects so +test code that calls ``step.measure(...)`` keeps working without any Sift +configuration; ``measure*`` evaluates bounds locally and returns the real +pass/fail boolean. Nothing reaches Sift and no log file is written. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +class TestDisabledMode: + def test_in_bounds_passes_out_of_bounds_fails( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Stub measure* evaluates bounds locally; pass/fail matches the real plugin.""" + write_plugin_conftest() + pytester.makepyfile( + """ + def test_passes_in_bounds(step): + assert step.measure(name="v", value=5.0, bounds={"min": 4.8, "max": 5.2}) + + def test_fails_out_of_bounds(step): + assert step.measure(name="v", value=99.0, bounds={"max": 5.2}) is False + + def test_substep_and_report_outcome(step): + with step.substep(name="inner") as inner: + assert inner.report_outcome(name="ok", result=True) is True + + def test_string_bounds(step): + assert step.measure(name="fw", value="1.0", bounds="1.0") is True + assert step.measure(name="fw", value="1.0", bounds="2.0") is False + + def test_measure_avg(step): + assert step.measure_avg( + name="bus", values=[4.97, 5.01, 5.03], bounds={"min": 4.9, "max": 5.1} + ) is True + + def test_measure_all_outlier(step): + assert step.measure_all( + name="p", values=[10.1, 10.2, 99.9], bounds={"max": 11.0} + ) is False + """ + ) + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=6) + + def test_disabled_does_not_require_credentials( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Disabled mode never reads SIFT_* env vars; runs cleanly without them.""" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=1) + + def test_disabled_via_env_var( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """``SIFT_DISABLED=1`` triggers disabled mode without the CLI flag.""" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + monkeypatch.setenv("SIFT_DISABLED", "1") + result = pytester.runpytest_subprocess() + result.assert_outcomes(passed=1) + + def test_disabled_supersedes_offline( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``--sift-disabled`` wins when combined with ``--sift-offline``. + + Disabled is the "skip Sift entirely" hammer; passing it alongside + offline shouldn't error. The session runs without credentials, without + a log file, and without the offline-mode replay machinery. + """ + write_plugin_conftest() + pytester.makepyfile( + """ + def test_runs(step): + assert step.measure(name="v", value=5.0, bounds={"max": 10.0}) is True + """ + ) + result = pytester.runpytest_subprocess("--sift-disabled", "--sift-offline") + result.assert_outcomes(passed=1) + + def test_disabled_yields_stub_fixtures( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """`report_context` / `step` / `module_substep` are real instances backed by a simulate client.""" + write_plugin_conftest() + pytester.makepyfile( + """ + from sift_client.util.test_results import ReportContext + from sift_client.util.test_results.context_manager import NewStep + + def test_types(step, report_context, module_substep): + assert isinstance(report_context, ReportContext) + assert report_context.is_simulated is True + assert report_context.report.is_simulated is True + assert step.current_step.is_simulated is True + assert isinstance(step, NewStep) + assert isinstance(module_substep, NewStep) + """ + ) + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=1) + + def test_disabled_writes_no_log_file_even_when_path_pinned( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Disabled mode skips the log-file pipeline even when a path is pinned.""" + log_path = tmp_path / "should-not-exist.jsonl" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + assert not log_path.exists(), f"log file unexpectedly created at {log_path}" + + def test_disabled_skips_client_has_connection_and_sift_client( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + ) -> None: + """Disabled mode never resolves ``client_has_connection`` or ``sift_client``. + + The plugin's ``report_context`` short-circuits to the stub before + consulting either fixture. Overrides that raise on resolution stay + un-triggered, so the inner test passes cleanly. + """ + pytester.makeconftest( + """ + import pytest + + pytest_plugins = ["sift_client.pytest_plugin"] + + + @pytest.fixture(scope="session") + def sift_client(): + raise AssertionError("sift_client should not resolve in disabled mode") + + + @pytest.fixture(scope="session") + def client_has_connection(): + raise AssertionError( + "client_has_connection should not resolve in disabled mode" + ) + """ + ) + pytester.makepyfile( + """ + def test_runs(step): + assert step.measure(name="v", value=5.0, bounds={"max": 10.0}) is True + """ + ) + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=1) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py new file mode 100644 index 000000000..f0470bad3 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py @@ -0,0 +1,135 @@ +"""Tests for ``--sift-offline`` mode. + +Offline mode routes every create/update through the JSONL log file without +contacting Sift. The session-start ping is skipped, the import worker is not +spawned, and missing ``SIFT_*`` env vars are tolerated (placeholders are +filled). Offline + ``--sift-log-file=none`` is rejected as a +usage error since the log file is the sole sink in this mode. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +class TestOfflineMode: + def test_offline_runs_without_network( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Offline mode constructs the client locally and never pings.""" + write_plugin_conftest() + pytester.makepyfile( + """ + def test_in_bounds(step): + assert step.measure(name="v", value=5.0, bounds={"min": 4.8, "max": 5.2}) + + def test_out_of_bounds(step): + assert step.measure(name="v", value=10.0, bounds={"max": 5.2}) is False + """ + ) + result = pytester.runpytest_subprocess("--sift-offline") + result.assert_outcomes(passed=2) + + def test_log_file_none_incompatible_with_offline( + self, + pytester: pytest.Pytester, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``--sift-log-file=none`` + ``--sift-offline`` is a usage error.""" + write_plugin_conftest() + pytester.makepyfile("def test_should_not_run(): pass") + result = pytester.runpytest_subprocess("--sift-offline", "--sift-log-file=none") + assert result.ret != 0 + combined = "\n".join(result.outlines + result.errlines) + assert "incompatible with --sift-offline" in combined, combined + + def test_offline_yields_real_fixtures( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Offline mode runs a real ReportContext; entities still report `is_simulated=True` because the log-file path synthesizes responses prior to replay.""" + write_plugin_conftest() + pytester.makepyfile( + """ + from sift_client.util.test_results import ReportContext + from sift_client.util.test_results.context_manager import NewStep + + def test_types(step, report_context): + assert isinstance(report_context, ReportContext) + assert isinstance(step, NewStep) + assert report_context.client._simulate is False + # log-file mode synthesizes responses, so entities are flagged simulated. + assert report_context.is_simulated is True + assert step.current_step.is_simulated is True + """ + ) + result = pytester.runpytest_subprocess("--sift-offline") + result.assert_outcomes(passed=1) + + def test_offline_writes_jsonl_to_pinned_log_file( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Offline mode populates the pinned JSONL file with create/update entries.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile( + """ + def test_one(step): + assert step.measure( + name="v", value=5.0, bounds={"min": 4.8, "max": 5.2} + ) is True + """ + ) + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + assert log_path.exists(), f"offline mode did not create {log_path}" + content = log_path.read_text() + assert content.strip(), "log file is empty" + # Each non-empty line is ``[Operation:uuid] {json}``. A successful + # session produces at least the report create + step create lines. + lines = [line for line in content.splitlines() if line.strip()] + assert any(line.startswith("[CreateTestReport:") for line in lines), content + assert any(line.startswith("[CreateTestStep:") for line in lines), content + + def test_offline_skips_client_has_connection( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + ) -> None: + """Offline mode never resolves ``client_has_connection``. + + Override the fixture to raise on resolution. If the override is + invoked, the session aborts. If it isn't, the inner test passes + cleanly, which confirms the offline path skipped the ping check. + """ + pytester.makeconftest( + """ + import pytest + + pytest_plugins = ["sift_client.pytest_plugin"] + + + @pytest.fixture(scope="session") + def client_has_connection(): + raise AssertionError( + "client_has_connection should not resolve in offline mode" + ) + """ + ) + pytester.makepyfile("def test_runs(step): pass") + result = pytester.runpytest_subprocess("--sift-offline") + result.assert_outcomes(passed=1) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_online.py b/python/lib/sift_client/_tests/pytest_plugin/test_online.py new file mode 100644 index 000000000..876fffb0e --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_online.py @@ -0,0 +1,133 @@ +"""Tests for online mode (the default). + +Online mode requires connectivity to Sift. The plugin pings via +``client_has_connection`` at session start and aborts with +``pytest.UsageError`` on failure. Missing ``SIFT_API_KEY`` / +``SIFT_GRPC_URI`` / ``SIFT_REST_URI`` env vars are reported as a usage error +so the failure is actionable. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +class TestOnlineMode: + def test_ping_failure_aborts( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + ) -> None: + """Online mode with an unreachable ping aborts the session via UsageError.""" + pytester.makeconftest( + """ + import pytest + from unittest.mock import MagicMock + + pytest_plugins = ["sift_client.pytest_plugin"] + + + @pytest.fixture(scope="session") + def sift_client(): + client = MagicMock() + client.ping.ping.side_effect = ConnectionError("unreachable") + return client + """ + ) + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_include + def test_should_not_run(): + assert True + """ + ) + result = pytester.runpytest_subprocess() + assert result.ret != 0 + combined = "\n".join(result.outlines + result.errlines) + assert "Sift ping failed" in combined, combined + + def test_missing_env_vars_named_in_error( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """The default ``sift_client`` fixture names missing env vars in its error.""" + write_plugin_conftest() + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_include + def test_should_not_run(): + pass + """ + ) + result = pytester.runpytest_subprocess() + assert result.ret != 0 + combined = "\n".join(result.outlines + result.errlines) + for var in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"): + assert var in combined, combined + + def test_online_resolves_client_has_connection_once( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + ) -> None: + """Online mode resolves ``client_has_connection`` exactly once at session start. + + Overrides the fixture to bump a counter persisted to a file the outer + test reads after the inner session finishes. Outcomes aren't asserted + because the real ``ReportContext`` constructed against a ``MagicMock`` + client crashes downstream when Pydantic sees mock IDs; what we're + verifying is the ping path itself, which runs before construction. + """ + counter_file = tmp_path / "ping_calls.txt" + pytester.makeconftest( + f""" + from pathlib import Path + from unittest.mock import MagicMock + + import pytest + + pytest_plugins = ["sift_client.pytest_plugin"] + + _COUNTER = Path({str(counter_file)!r}) + + + @pytest.fixture(scope="session") + def sift_client(): + return MagicMock() + + + @pytest.fixture(scope="session") + def client_has_connection(): + prior = int(_COUNTER.read_text()) if _COUNTER.exists() else 0 + _COUNTER.write_text(str(prior + 1)) + return True + """ + ) + pytester.makepyfile( + """ + import pytest + + @pytest.mark.sift_include + def test_a(): pass + + @pytest.mark.sift_include + def test_b(): pass + """ + ) + pytester.runpytest_subprocess() + assert counter_file.exists(), "client_has_connection was not resolved" + assert counter_file.read_text() == "1", ( + f"expected session-scoped fixture to resolve once, got {counter_file.read_text()}" + ) diff --git a/python/lib/sift_client/_tests/util/conftest.py b/python/lib/sift_client/_tests/util/conftest.py index 2f371e69e..9e255da8a 100644 --- a/python/lib/sift_client/_tests/util/conftest.py +++ b/python/lib/sift_client/_tests/util/conftest.py @@ -7,13 +7,13 @@ def pytest_configure(config: pytest.Config) -> None: """Configure the pytest configuration to disable the Sift test results log file.""" - config.option.sift_test_results_log_file = False + config.option.sift_log_file = False def pytest_collection_modifyitems(config: pytest.Config, items: "list[pytest.Item]") -> None: """Bulk-apply ``@pytest.mark.sift_include`` to integration tests under util/. - The project-wide default in ``pyproject.toml`` is ``sift_test_results_autouse + The project-wide default in ``pyproject.toml`` is ``sift_autouse = false`` so unit tests pay nothing for the globally-loaded Sift plugin. Integration tests in this subtree still need the autouse fixtures, so this hook flips the gate back on for any test already marked diff --git a/python/lib/sift_client/_tests/util/test_report_context.py b/python/lib/sift_client/_tests/util/test_report_context.py new file mode 100644 index 000000000..f12247c7a --- /dev/null +++ b/python/lib/sift_client/_tests/util/test_report_context.py @@ -0,0 +1,95 @@ +"""Tier 1 tests for `ReportContext.__exit__`'s replay-worker handling. + +Each test substitutes the `import-test-result-log` argv with a tiny Python +`-c` invocation that produces a controlled end-state (clean exit / hang / +non-zero exit), then enters and exits a `ReportContext` against a +simulate-mode `SiftClient`. This validates that real subprocess outcomes +route to the right branch of `__exit__` without depending on the real +replay binary or a Sift backend. +""" + +from __future__ import annotations + +import logging +import sys +from typing import TYPE_CHECKING + +from sift_client import SiftClient, SiftConnectionConfig +from sift_client.util.test_results import ReportContext + +if TYPE_CHECKING: + import pytest + + +def _make_simulate_client() -> SiftClient: + """Build a SiftClient flagged for in-process simulation. + + Constructor URLs are placeholders; nothing dials them because every + test-results write short-circuits through the simulate path. + """ + client = SiftClient( + connection_config=SiftConnectionConfig( + api_key="test", + grpc_url="test.invalid:0", + rest_url="http://test.invalid", + ) + ) + client._simulate = True + return client + + +def _make_context(command: list[str]) -> ReportContext: + """Build a ReportContext whose replay subprocess is the provided command. + + `log_file=True` triggers the temp-file path so `_open_import_proc` fires + on `__enter__`. The substitute argv is swapped in via the public-ish + `_build_replay_command` hook so the production Popen kwargs stay + exercised. + """ + rc = ReportContext(_make_simulate_client(), name="test", log_file=True) + rc._build_replay_command = lambda: command # type: ignore[method-assign] + return rc + + +def test_worker_clean_exit_is_silent(caplog: pytest.LogCaptureFixture) -> None: + """Worker exits with code 0 → __exit__ is silent (case 1).""" + rc = _make_context([sys.executable, "-c", "pass"]) + with caplog.at_level(logging.ERROR): + with rc: + pass + assert "Import process" not in caplog.text + assert "replay-test-result-log" not in caplog.text + assert rc._import_proc is not None + assert rc._import_proc.returncode == 0 + + +def test_worker_timeout_kills_and_logs(caplog: pytest.LogCaptureFixture) -> None: + """Worker still running at session end → kill + log, no raise (case 2).""" + rc = _make_context([sys.executable, "-c", "import time; time.sleep(30)"]) + with caplog.at_level(logging.ERROR): + with rc: + pass + assert rc._import_proc is not None + # `kill()` + `wait()` were called; process is dead. + assert rc._import_proc.poll() is not None + assert "did not exit in 1s" in caplog.text + assert "replay-test-result-log" in caplog.text + + +def test_worker_nonzero_exit_logs_stderr_no_raise(caplog: pytest.LogCaptureFixture) -> None: + """Worker exits non-zero with stderr → log stderr + replay hint, no raise (case 3).""" + rc = _make_context( + [ + sys.executable, + "-c", + "import sys; sys.stderr.write('rpc deadline exceeded'); sys.exit(2)", + ] + ) + with caplog.at_level(logging.ERROR): + with rc: + pass + assert rc._import_proc is not None + assert rc._import_proc.returncode == 2 + assert "exited with code 2" in caplog.text + assert "rpc deadline exceeded" in caplog.text + assert "replay-test-result-log" in caplog.text diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py index 95fd25b71..ff574adba 100644 --- a/python/lib/sift_client/client.py +++ b/python/lib/sift_client/client.py @@ -152,6 +152,11 @@ def __init__( WithGrpcClient.__init__(self, grpc_client=grpc_client) WithRestClient.__init__(self, rest_client=rest_client) + # When set, test-results writes return synthesized responses without + # contacting Sift. Read by `TestResultsAPIAsync._simulate`. Used by the + # pytest plugin's ``--sift-disabled`` mode. + self._simulate: bool = False + self.ping = PingAPI(self) self.assets = AssetsAPI(self) self.calculated_channels = CalculatedChannelsAPI(self) diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index f2699a954..494ded3b6 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from sift_client.util.test_results.context_manager import NewStep -REPORT_CONTEXT: ReportContext | None = None +REPORT_CONTEXT: Any = None @dataclass(frozen=True) @@ -37,39 +37,53 @@ class _Option: _LOG_FILE = _Option( - cli_flag="--sift-test-results-log-file", - ini_name="sift_test_results_log_file", + cli_flag="--sift-log-file", + ini_name="sift_log_file", cli_help="Path to write the Sift test result log file. " "Use 'true' (default) to auto-create a temp file, " "False, 'false', or 'none' to disable logging, " "or a file path to write to a specific location.", - ini_help="Default value for --sift-test-results-log-file. Same values " - "accepted as the CLI flag (path, 'true', 'false', 'none').", + ini_help="Default value for --sift-log-file. Same values accepted as " + "the CLI flag (path, 'true', 'false', 'none').", ) _GIT_METADATA = _Option( - cli_flag="--no-sift-test-results-git-metadata", - ini_name="sift_test_results_git_metadata", + cli_flag="--no-sift-git-metadata", + ini_name="sift_git_metadata", action="store_false", cli_help="Exclude git metadata from the Sift test results. " "Git metadata (repo, branch, commit) is included by default.", ini_help="Include git repo/branch/commit in the report (true/false). " - "Defaults to true. The --no-sift-test-results-git-metadata CLI flag " - "overrides this when passed.", + "Defaults to true. The --no-sift-git-metadata CLI flag overrides " + "this when passed.", ini_type="bool", ini_default=True, ) -_CHECK_CONNECTION = _Option( - cli_flag="--sift-test-results-check-connection", - ini_name="sift_test_results_check_connection", +_OFFLINE = _Option( + cli_flag="--sift-offline", + ini_name="sift_offline", action="store_true", - cli_help="Skip the sift test-result fixtures (report_context, step, module_substep) " - "when the Sift client has no connection to the server. Requires a " - "`client_has_connection` fixture to be available in the test session.", - ini_help="When true, skip the sift test-result fixtures if the client has " - "no connection (same effect as --sift-test-results-check-connection). " - "Defaults to false.", + cli_help="Run without contacting Sift. All create/update calls are written " + "to a JSONL log file for later replay via `import-test-result-log`. " + "No session-start ping is attempted.", + ini_help="When true, run in offline mode (same effect as --sift-offline). Defaults to false.", + ini_type="bool", + ini_default=False, +) + +_DISABLED = _Option( + cli_flag="--sift-disabled", + ini_name="sift_disabled", + action="store_true", + cli_help="Disable Sift integration entirely. Nothing contacts the API " + "and no log file is written. `step.measure(...)` still returns real " + "pass/fail booleans. Returned entities expose `is_simulated == True`. " + "Also honored via the `SIFT_DISABLED` env var. Supersedes every other " + "flag.", + ini_help="When true, run in disabled mode (same effect as --sift-disabled). " + "Also honored via the SIFT_DISABLED env var. Supersedes every other " + "setting. Defaults to false.", ini_type="bool", ini_default=False, ) @@ -89,7 +103,7 @@ class _Option: ) _AUTOUSE = _Option( - ini_name="sift_test_results_autouse", + ini_name="sift_autouse", ini_help="Default for the Sift autouse fixtures (report_context, step, " "module_substep). When true (default), tests are included unless marked " "with @pytest.mark.sift_exclude. When false, tests are skipped unless " @@ -102,7 +116,8 @@ class _Option: _OPTIONS: tuple[_Option, ...] = ( _LOG_FILE, _GIT_METADATA, - _CHECK_CONNECTION, + _OFFLINE, + _DISABLED, _GRPC_URI, _REST_URI, _AUTOUSE, @@ -139,15 +154,25 @@ def pytest_configure(config: pytest.Config) -> None: config.addinivalue_line( "markers", "sift_include: force the Sift autouse fixtures to activate for this test " - "regardless of the `sift_test_results_autouse` ini default.", + "regardless of the `sift_autouse` ini default.", ) config.addinivalue_line( "markers", "sift_exclude: force the Sift autouse fixtures to skip this test " - "regardless of the `sift_test_results_autouse` ini default.", + "regardless of the `sift_autouse` ini default.", ) +def _is_offline(pytestconfig: pytest.Config | None) -> bool: + return bool(_option_or_ini(pytestconfig, _OFFLINE)) + + +def _is_disabled(pytestconfig: pytest.Config | None) -> bool: + if bool(_option_or_ini(pytestconfig, _DISABLED)): + return True + return os.getenv("SIFT_DISABLED", "").lower() in ("1", "true", "yes") + + def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool: """Resolve the Sift gate for a node: sift_exclude > sift_include > default. @@ -203,13 +228,23 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | * ``None`` — unset; nothing was passed on the CLI and the ini key is absent. Treat as the default "use a temp file." * Python ``False`` — an explicit disable, typically set in a conftest via - ``config.option.sift_test_results_log_file = False``. Return ``None`` so + ``config.option.sift_log_file = False``. Return ``None`` so the rest of the pipeline knows to skip logging entirely. * A string (from CLI or ini) — interpret ``"true"`` / ``"1"`` as the temp file default, ``"false"`` / ``"none"`` as disable, anything else as a file path. + + Rejects ``--sift-log-file=none`` combined with ``--sift-offline`` since + offline mode needs the log file as its sole sink. """ raw = _option_or_ini(pytestconfig, _LOG_FILE) + disabled = raw is False or (isinstance(raw, str) and raw.lower() in ("false", "none")) + if disabled and _is_offline(pytestconfig): + raise pytest.UsageError( + "--sift-log-file=none is incompatible with --sift-offline; offline " + "mode requires a log file. Pin one with --sift-log-file=, or " + "drop --sift-log-file=none to use a temp file." + ) if raw is False: return None if not raw: @@ -239,7 +274,7 @@ def _report_context_impl( sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config | None = None, -) -> Generator[ReportContext | None, None, None]: +) -> Generator[ReportContext, None, None]: args = request.config.invocation_params.args test_path = Path(args[0]) if args else None if test_path is not None and test_path.exists(): @@ -248,7 +283,13 @@ def _report_context_impl( else: base_name = "pytest " + " ".join(args) if args else "pytest" test_case = base_name - log_file = _resolve_log_file(pytestconfig) + # Mode → ReportContext flags: + # online (default): log_file=, replay_log_file=True + # --sift-offline: log_file=, replay_log_file=False + # --sift-disabled: log_file=False, replay_log_file=False + disabled = sift_client._simulate + offline = False if disabled else _is_offline(pytestconfig) + log_file: str | Path | bool | None = False if disabled else _resolve_log_file(pytestconfig) git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA) include_git_metadata = True if git_metadata is None else bool(git_metadata) with ReportContext( @@ -257,28 +298,46 @@ def _report_context_impl( test_case=str(test_case), log_file=log_file, include_git_metadata=include_git_metadata, + replay_log_file=not (disabled or offline), ) as context: global REPORT_CONTEXT REPORT_CONTEXT = context yield context -def _check_connection_enabled(pytestconfig: pytest.Config | None) -> bool: - """Return True when the caller opted into the check-connection mode via CLI or ini.""" - return bool(_option_or_ini(pytestconfig, _CHECK_CONNECTION)) - - -def _has_sift_connection(request: pytest.FixtureRequest) -> bool: - """Resolve the `client_has_connection` fixture lazily; only called when the check is enabled.""" - return bool(request.getfixturevalue("client_has_connection")) - - _CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = ( ("SIFT_API_KEY", None), # env-only; never read from ini to keep secrets out of source control. ("SIFT_GRPC_URI", _GRPC_URI), ("SIFT_REST_URI", _REST_URI), ) +# Placeholder credentials used in --sift-offline mode when env/ini values +# are missing. Offline mode never makes network calls, so the values are +# only syntactically required by SiftConnectionConfig. +_OFFLINE_DEFAULTS = { + "SIFT_API_KEY": "offline", + "SIFT_GRPC_URI": "offline.invalid:0", + "SIFT_REST_URI": "http://offline.invalid", +} + + +def _build_disabled_client() -> SiftClient: + """Construct a SiftClient for ``--sift-disabled`` mode. + + Tagged with ``_simulate=True`` so test-results writes short-circuit through + the existing low-level simulate path without contacting Sift. The URLs are + syntactically valid but unreachable; nothing dials them. + """ + client = SiftClient( + connection_config=SiftConnectionConfig( + api_key="disabled", + grpc_url="disabled.invalid:0", + rest_url="http://disabled.invalid", + ) + ) + client._simulate = True + return client + def _resolve_credential( pytestconfig: pytest.Config | None, env_name: str, opt: _Option | None @@ -308,10 +367,19 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: etc.) can override this fixture by defining their own ``sift_client`` in their ``conftest.py``; pytest fixture resolution prefers the local definition. + + In ``--sift-offline`` mode the missing-credential check is relaxed: + real env vars and ini values still win when set (so the client is + constructible against a real backend even though no calls are made), but + anything still missing is filled with a placeholder. In ``--sift-disabled`` + mode the credential resolution is skipped entirely and placeholders are + always used. """ + if _is_disabled(pytestconfig): + return _build_disabled_client() resolved = {env: _resolve_credential(pytestconfig, env, opt) for env, opt in _CREDENTIAL_KEYS} missing = [env for env, value in resolved.items() if not value] - if missing: + if missing and not _is_offline(pytestconfig): raise pytest.UsageError( "Sift credentials missing: " + ", ".join(missing) @@ -319,8 +387,11 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: "from a `.env` file automatically — or set the URIs via " "`sift_grpc_uri` / `sift_rest_uri` under `[tool.pytest.ini_options]` " "in pyproject.toml, or override the sift_client fixture in your " - "conftest.py." + "conftest.py, or pass --sift-offline / --sift-disabled to run " + "without contacting Sift." ) + for env in missing: + resolved[env] = _OFFLINE_DEFAULTS[env] # `or ""` is unreachable in practice since the `missing` check above guarantees # non-None values return SiftClient( @@ -334,32 +405,61 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: @pytest.fixture(scope="session") def report_context( - sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config -) -> Generator[ReportContext | None, None, None]: + request: pytest.FixtureRequest, pytestconfig: pytest.Config +) -> Generator[ReportContext, None, None]: """Lazy session-scoped Sift ReportContext. - The fixture is no longer autouse; it's instantiated on the first call to - ``request.getfixturevalue("report_context")``, which today happens inside - the gated ``step`` and ``module_substep`` fixtures. If every test in the - session is excluded via the marker gate, this fixture is never resolved - and no ReportContext (and no teardown subprocess) is created. - - The log file destination is controlled by ``--sift-test-results-log-file``. - Defaults to a temp file when not set. - - When ``--sift-test-results-check-connection`` is passed, this fixture will - yield ``None`` if the Sift client has no connection to the server. That mode - requires a ``client_has_connection`` fixture to be available in the session. + The fixture is no longer autouse; it's instantiated on the first call + to ``request.getfixturevalue("report_context")``, which today happens + inside the gated ``step`` and ``module_substep`` fixtures. If every + test in the session is excluded via the marker gate, this fixture is + never resolved and no ReportContext (or teardown subprocess) is created. + + What gets yielded depends on the mode: + + * ``--sift-disabled``: a real ``ReportContext`` against a placeholder + ``SiftClient`` with ``_simulate=True``. Every test-results write + returns a synthesized response without contacting Sift; no log file + is written; the replay subprocess never spawns. Test code that calls + ``step.measure(...)`` keeps working because bounds are evaluated as + usual and routed through the simulate path. + * ``--sift-offline``: a real ReportContext, but the session-start ping + is skipped, all create/update calls go to the JSONL log file, and + the import-test-result-log replay subprocess is not spawned at + session end. + * default (online): verify connectivity via ``client_has_connection`` + before constructing the context. A failed ping aborts the session + with ``pytest.UsageError`` and points at ``--sift-offline`` and + ``--sift-disabled`` as escape hatches. + + The log-file destination is controlled by + ``--sift-log-file``; defaults to a temp file when unset. """ - if _check_connection_enabled(pytestconfig) and not _has_sift_connection(request): - yield None + if _is_disabled(pytestconfig): + yield from _report_context_impl( + _build_disabled_client(), request, pytestconfig=pytestconfig + ) return + sift_client = request.getfixturevalue("sift_client") + if not _is_offline(pytestconfig): + try: + request.getfixturevalue("client_has_connection") + except pytest.UsageError: + raise + except Exception as exc: + grpc_config = getattr(getattr(sift_client, "grpc_client", None), "_config", None) + grpc_url = getattr(grpc_config, "uri", "") + raise pytest.UsageError( + f"Sift ping failed against {grpc_url}: {exc}. " + "Pass --sift-offline to run without contacting Sift, or " + "--sift-disabled to skip Sift entirely." + ) from exc yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig) def _step_impl( report_context: ReportContext, request: pytest.FixtureRequest -) -> Generator[NewStep | None, None, None]: +) -> Generator[NewStep, None, None]: name = str(request.node.name) existing_docstring = request.node.obj.__doc__ or None with report_context.new_step( @@ -383,18 +483,18 @@ def step( Resolves the gate via `_sift_enabled_for(request.node, ini_default)`: `sift_exclude` marker forces off, `sift_include` forces on, otherwise the - `sift_test_results_autouse` ini default applies. When on, requests the + `sift_autouse` ini default applies. When on, requests the session `report_context` lazily — the first gated test in the session - triggers its creation, subsequent gated tests reuse it. + triggers its creation, subsequent gated tests reuse it. In + ``--sift-disabled`` mode the report context is backed by a + ``SiftClient(_simulate=True)`` placeholder, so every write returns a + synthesized response without contacting Sift. """ default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) if not _sift_enabled_for(request.node, default): yield None return rc = request.getfixturevalue("report_context") - if rc is None: - yield None - return yield from _step_impl(rc, request) @@ -416,21 +516,22 @@ def module_substep( yield None return rc = request.getfixturevalue("report_context") - if rc is None: - yield None - return yield from _step_impl(rc, request) @pytest.fixture(scope="session") -def client_has_connection(sift_client): - """Check if the SiftClient has a connection to the Sift server. - - Can be used to skip tests that require a connection to the Sift server, and is - consulted by the Sift fixtures when ``--sift-test-results-check-connection`` is set. +def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRequest) -> bool: + """Verify the ``SiftClient`` can reach Sift via ``/ping``. + + Consulted at session start by ``report_context`` in online mode. A failed + ping raises through ``report_context`` and aborts the session with + ``pytest.UsageError``. Override this fixture in your conftest to use a + different reachability signal (e.g. a cached auth token) for environments + where pinging is the wrong check. Returns ``False`` in ``--sift-disabled`` + mode without constructing a client. """ - try: - sift_client.ping.ping() - return True - except Exception: + if _is_disabled(pytestconfig): return False + sift_client = request.getfixturevalue("sift_client") + sift_client.ping.ping() + return True diff --git a/python/lib/sift_client/resources/test_results.py b/python/lib/sift_client/resources/test_results.py index 22e984b5e..9e88b6081 100644 --- a/python/lib/sift_client/resources/test_results.py +++ b/python/lib/sift_client/resources/test_results.py @@ -96,6 +96,7 @@ async def create( created_report = await self._low_level_client.create_test_report( test_report=test_report, log_file=log_file, + simulate=self.client._simulate, ) return self._finalize(created_report, log_file) @@ -271,7 +272,7 @@ async def update( update.resource_id = test_report_id existing = test_report if isinstance(test_report, TestReport) else None updated_test_report = await self._low_level_client.update_test_report( - update, log_file=log_file, existing=existing + update, log_file=log_file, existing=existing, simulate=self.client._simulate ) return self._finalize(updated_test_report, log_file) @@ -319,7 +320,7 @@ async def create_step( if isinstance(test_step, dict): test_step = TestStepCreate.model_validate(test_step) test_step_result = await self._low_level_client.create_test_step( - test_step, log_file=log_file + test_step, log_file=log_file, simulate=self.client._simulate ) return self._finalize(test_step_result, log_file) @@ -450,7 +451,7 @@ async def update_step( update.resource_id = test_step_id existing = test_step if isinstance(test_step, TestStep) else None updated_test_step = await self._low_level_client.update_test_step( - update, log_file=log_file, existing=existing + update, log_file=log_file, existing=existing, simulate=self.client._simulate ) return self._finalize(updated_test_step, log_file) @@ -484,10 +485,10 @@ async def create_measurement( if isinstance(test_measurement, dict): test_measurement = TestMeasurementCreate.model_validate(test_measurement) test_measurement_result = await self._low_level_client.create_test_measurement( - test_measurement, log_file=log_file + test_measurement, log_file=log_file, simulate=self.client._simulate ) measurement = self._finalize(test_measurement_result, log_file) - if update_step and log_file is None: + if update_step and log_file is None and not self.client._simulate: step = await self.get_step(test_step=test_measurement_result.test_step_id) if step.status == TestStatus.PASSED and not measurement.passed: await self.update_step(test_step=step, update={"status": TestStatus.FAILED}) @@ -508,7 +509,7 @@ async def create_measurements( A tuple of (measurements_created_count, measurement_ids). """ return await self._low_level_client.create_test_measurements( - test_measurements, log_file=log_file + test_measurements, log_file=log_file, simulate=self.client._simulate ) async def list_measurements( @@ -621,10 +622,16 @@ async def update_measurement( update.resource_id = test_measurement.id_ updated_test_measurement = await self._low_level_client.update_test_measurement( - update, log_file=log_file, existing=test_measurement + update, log_file=log_file, existing=test_measurement, simulate=self.client._simulate ) updated_test_measurement = self._finalize(updated_test_measurement, log_file) - if update_step and log_file is None and update.passed is not None and not update.passed: + if ( + update_step + and log_file is None + and not self.client._simulate + and update.passed is not None + and not update.passed + ): step = await self.get_step(test_step=updated_test_measurement.test_step_id) if step.status == TestStatus.PASSED: await self.update_step(test_step=step, update={"status": TestStatus.FAILED}) diff --git a/python/lib/sift_client/sift_types/_mixins/simulated.py b/python/lib/sift_client/sift_types/_mixins/simulated.py new file mode 100644 index 000000000..bdc2c572a --- /dev/null +++ b/python/lib/sift_client/sift_types/_mixins/simulated.py @@ -0,0 +1,32 @@ +"""Mixin that exposes ``is_simulated`` on test-results entity types.""" + +from __future__ import annotations + + +class SimulatedMixin: + """Mixin for sift_types whose response can be produced by the simulate path. + + The low-level wrapper stamps ``_simulated=True`` on entities it returns from + a simulated branch (see ``TestResultsLowLevelClient._mark_simulated``). This + mixin exposes that flag as a read-only ``is_simulated`` property so + consumers and tests can detect when an instance was synthesized rather than + round-tripped through Sift. + + Inheriting classes are expected to declare a private field + ``_simulated: bool = False`` so pydantic tracks the default correctly. + """ + + _simulated: bool + + @property + def is_simulated(self) -> bool: + """True when this instance was returned from the simulate path. + + Set by the low-level wrapper when the call short-circuited to a + synthesized response (either ``SiftClient._simulate`` mode or per-call + ``log_file`` / ``simulate=True``). False for entities returned from a + normal online call or constructed manually outside the SDK. Offline + mode also reports True since responses are synthesized prior to + replay. + """ + return self._simulated diff --git a/python/lib/sift_client/sift_types/test_report.py b/python/lib/sift_client/sift_types/test_report.py index ecc24f52f..c4abfc548 100644 --- a/python/lib/sift_client/sift_types/test_report.py +++ b/python/lib/sift_client/sift_types/test_report.py @@ -36,6 +36,7 @@ ModelUpdate, ) from sift_client.sift_types._mixins.file_attachments import FileAttachmentsMixin +from sift_client.sift_types._mixins.simulated import SimulatedMixin from sift_client.sift_types.channel import Channel from sift_client.util.metadata import metadata_dict_to_proto, metadata_proto_to_dict @@ -153,7 +154,7 @@ def to_proto(self) -> TestStepProto: return proto -class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin): +class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin, SimulatedMixin): """TestStep model representing a step in a test.""" test_report_id: str @@ -169,6 +170,8 @@ class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin): metadata: dict[str, str | float | bool] | None = None # Set by the resource layer when this instance was produced from a logging-mode call _log_file: str | Path | None = None + # Set by the low-level wrapper when this instance came from the simulate path + _simulated: bool = False @classmethod def _from_proto(cls, proto: TestStepProto, sift_client: SiftClient | None = None) -> TestStep: @@ -383,7 +386,7 @@ def to_proto(self) -> TestMeasurementProto: return proto -class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"]): +class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"], SimulatedMixin): """TestMeasurement model representing a measurement in a test.""" measurement_type: TestMeasurementType @@ -404,6 +407,8 @@ class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"]): # Set by the resource layer when this instance was produced from a logging-mode call _log_file: str | Path | None = None + # Set by the low-level wrapper when this instance came from the simulate path + _simulated: bool = False @classmethod def _from_proto( @@ -599,7 +604,7 @@ def _to_proto(self) -> ErrorInfoProto: ) -class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin): +class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin, SimulatedMixin): """TestReport model representing a test report.""" status: TestStatus @@ -617,6 +622,8 @@ class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin): is_archived: bool # Set by the resource layer when this instance was produced from a logging-mode call _log_file: str | Path | None = None + # Set by the low-level wrapper when this instance came from the simulate path + _simulated: bool = False @classmethod def _from_proto( diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py index ea213056e..ddce0326c 100644 --- a/python/lib/sift_client/util/test_results/__init__.py +++ b/python/lib/sift_client/util/test_results/__init__.py @@ -68,13 +68,13 @@ def main(self): Note: FedRAMP users: results are buffered to a temp file and uploaded by a subprocess at session end (no API calls during the run). Disable the buffer -entirely with `--sift-test-results-log-file=false` for inline uploads. +entirely with `--sift-log-file=false` for inline uploads. ### Controlling which tests produce reports The autouse fixtures fire for every test by default. To narrow that: -- Set `sift_test_results_autouse = false` in `pyproject.toml` to flip the +- Set `sift_autouse = false` in `pyproject.toml` to flip the project default off, then opt tests back in below. - `@pytest.mark.sift_include` forces reporting on for a test, class, or module. `@pytest.mark.sift_exclude` forces it off. Closest marker wins. @@ -105,19 +105,24 @@ def pytest_collection_modifyitems(config, items): CLI options registered by the plugin: -- `--sift-test-results-log-file`: Path to write the JSONL log file. `true` +- `--sift-offline`: Run without contacting Sift. All create/update calls are + written to the JSONL log file for later replay via `import-test-result-log`. + No session-start ping is attempted. +- `--sift-disabled`: Skip Sift entirely. Nothing contacts the API and no + log file is written. `step.measure(...)` still evaluates bounds and + returns a real pass/fail boolean. Returned entities expose + ``is_simulated == True``. Also honored via the `SIFT_DISABLED` env + var. Supersedes every other flag. +- `--sift-log-file`: Path to write the JSONL log file. `true` (default) auto-creates a temp file. `false` or `none` disables logging. Any other value is treated as a file path. -- `--no-sift-test-results-git-metadata`: Exclude git metadata (repo, branch, +- `--no-sift-git-metadata`: Exclude git metadata (repo, branch, commit) from the test report. Included by default. -- `--sift-test-results-check-connection`: Make `report_context`, `step`, and - `module_substep` no-op when the client has no connection. Requires a - `client_has_connection` fixture (the plugin ships a default). Each option has a matching ini key for per-project configuration under ``[tool.pytest.ini_options]`` in ``pyproject.toml`` (or ``[pytest]`` in ``pytest.ini``). CLI flags override ini values. The -``sift_test_results_autouse`` ini key (bool, default ``true``) sets the +``sift_autouse`` ini key (bool, default ``true``) sets the project-wide default for the gate described above. The default ``sift_client`` fixture reads ``sift_grpc_uri`` and ``sift_rest_uri`` as fallbacks when the corresponding env vars are unset (env vars win when @@ -126,10 +131,9 @@ def pytest_collection_modifyitems(config, items): ```toml [tool.pytest.ini_options] -sift_test_results_autouse = false -sift_test_results_log_file = "false" -sift_test_results_check_connection = true -sift_test_results_git_metadata = false +sift_autouse = false +sift_offline = true +sift_git_metadata = false sift_grpc_uri = "your-org.sift.example:443" sift_rest_uri = "https://your-org.sift.example" ``` diff --git a/python/lib/sift_client/util/test_results/bounds.py b/python/lib/sift_client/util/test_results/bounds.py index ef5c67ce5..b734cc126 100644 --- a/python/lib/sift_client/util/test_results/bounds.py +++ b/python/lib/sift_client/util/test_results/bounds.py @@ -1,5 +1,10 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + from sift_client.sift_types.test_report import ( NumericBounds, TestMeasurement, @@ -8,6 +13,55 @@ TestMeasurementUpdate, ) +if TYPE_CHECKING: + from numpy.typing import NDArray + + +def to_numpy_array( + values: list[float | int] | NDArray[np.float64] | pd.Series, +) -> NDArray[np.float64]: + """Normalize a list / ndarray / pandas Series into a numpy array. + + Shared by ``measure_avg`` and ``measure_all`` on ``NewStep`` so the + accepted input types stay in sync across measurement variants. + """ + if isinstance(values, list): + return np.array(values) + if isinstance(values, np.ndarray): + return values + if isinstance(values, pd.Series): + return values.to_numpy() + raise ValueError(f"Invalid value type: {type(values)}") + + +def out_of_bounds_mask( + arr: NDArray[np.float64], + bounds: dict[str, float] | NumericBounds, +) -> NDArray[np.bool_]: + """Return a boolean mask selecting elements of ``arr`` that violate ``bounds``. + + Raises ``ValueError`` when ``bounds`` has neither ``min`` nor ``max`` set. + """ + if isinstance(bounds, dict): + bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max")) + mask: NDArray[np.bool_] | None = None + if bounds.min is not None: + mask = arr < bounds.min + if bounds.max is not None: + above = arr > bounds.max + mask = mask | above if mask is not None else above + if mask is None: + raise ValueError("No bounds provided") + return mask + + +def all_within_bounds( + arr: NDArray[np.float64], + bounds: dict[str, float] | NumericBounds, +) -> bool: + """Return True when every element of ``arr`` is within ``bounds``.""" + return bool(arr[out_of_bounds_mask(arr, bounds)].size == 0) + def assign_value_to_measurement( measurement: TestMeasurement | TestMeasurementCreate | TestMeasurementUpdate, @@ -32,6 +86,38 @@ def assign_value_to_measurement( raise ValueError(f"Invalid value type: {type(value)}") +def value_passes_bounds( + value: float | str | bool, + bounds: dict[str, float] | NumericBounds | str | bool | None, +) -> bool: + """Evaluate a value against bounds without recording a measurement.""" + if bounds is None: + return True + if isinstance(bounds, dict): + bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max")) + if isinstance(bounds, bool): + if isinstance(value, str): + return str(value).lower() == str(bounds).lower() + return bool(value) == bounds + if isinstance(bounds, str): + if not (isinstance(value, str) or isinstance(value, bool)): + raise ValueError("Value must be a string if bounds provided is a string") + if isinstance(value, bool): + return str(value).lower() == str(bounds).lower() + return value == bounds + # NumericBounds + try: + if bounds.min is not None and bounds.min > value: # type: ignore[operator] + return False + if bounds.max is not None and bounds.max < value: # type: ignore[operator] + return False + except TypeError: + raise TypeError( + f"Value must be a float or int to evaluate numeric bounds but gave {type(value)}" + ) from None + return True + + def evaluate_measurement_bounds( measurement: TestMeasurement | TestMeasurementCreate | TestMeasurementUpdate, value: float | str | bool, @@ -53,31 +139,10 @@ def evaluate_measurement_bounds( if isinstance(bounds, dict): bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max")) - if isinstance(bounds, bool): - if isinstance(value, str): - measurement.passed = str(value).lower() == str(bounds).lower() - else: - measurement.passed = bool(value) == bounds - return bool(measurement.passed) - elif isinstance(bounds, str): - if not (isinstance(value, str) or isinstance(value, bool)): - raise ValueError("Value must be a string if bounds provided is a string") + if isinstance(bounds, str) and not isinstance(bounds, bool): measurement.string_expected_value = bounds - if isinstance(value, bool): - measurement.passed = str(value).lower() == str(bounds).lower() - else: - measurement.passed = value == bounds elif isinstance(bounds, NumericBounds): measurement.numeric_bounds = bounds - measurement.passed = True - try: - if measurement.numeric_bounds.min is not None: - measurement.passed = measurement.passed and measurement.numeric_bounds.min <= value # type: ignore - if measurement.numeric_bounds.max is not None: - measurement.passed = measurement.passed and measurement.numeric_bounds.max >= value # type: ignore - except TypeError: - raise TypeError( - f"Value must be a float or int to evaluate numeric bounds but gave {type(value)}" - ) from None + measurement.passed = value_passes_bounds(value, bounds) return bool(measurement.passed) diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 354f8564d..3d375814a 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -13,7 +13,6 @@ from typing import TYPE_CHECKING import numpy as np -import pandas as pd from sift_client.sift_types.test_report import ( ErrorInfo, @@ -28,9 +27,12 @@ ) from sift_client.util.test_results.bounds import ( evaluate_measurement_bounds, + out_of_bounds_mask, + to_numpy_array, ) if TYPE_CHECKING: + import pandas as pd from numpy.typing import NDArray from sift_client.client import SiftClient @@ -118,6 +120,7 @@ def __init__( test_case: str | None = None, log_file: str | Path | bool | None = None, include_git_metadata: bool = False, + replay_log_file: bool = True, ): """Initialize a new report context. @@ -128,10 +131,18 @@ def __init__( system_operator: The operator of the test system. Will default to the current user if not provided. test_case: The name of the test case. Will default to the basename of the file containing the test if not provided. log_file: If True, create a temp log file. If a path, use that path. - All create/update operations will be logged to this file. + If False/None, no log file is written and create/update calls + the API. include_git_metadata: If True, include git metadata in the report. + replay_log_file: When True (the default) and ``log_file`` is set, + spawn ``import-test-result-log --incremental`` to push log + entries to Sift in the background during the session. When + False, the log file is just a record and no worker is spawned. + Replay happens later via ``replay-test-result-log ``. + Has no effect when ``log_file`` is None. """ self.client = client + self.replay_log_file = replay_log_file self.step_is_open = False self.step_stack = [] self.step_number_at_depth = {} @@ -163,28 +174,41 @@ def __init__( ) self.report = client.test_results.create(create, log_file=self.log_file) + def _build_replay_command(self) -> list[str]: + """Build the argv for the import-test-result-log replay subprocess. + + Factored out for testability — tests substitute commands that exit + with controlled returncodes / stderr to exercise the ``__exit__`` + branches without depending on the real replay binary. + """ + return [ + "import-test-result-log", + "--incremental", + str(self.log_file), + "--grpc-url", + self.client.grpc_client._config.uri, + "--rest-url", + self.client.rest_client._config.base_url, + "--api-key", + self.client.grpc_client._config.api_key, + ] + def _open_import_proc(self): - """Open a subprocess to import the log file.""" + """Open a subprocess to import the log file. + + ``stderr`` is captured so a worker crash mid-session can surface its + error at session end via ``__exit__`` rather than failing silently. + """ with _quiet_fork_stderr(): self._import_proc = subprocess.Popen( - [ - "import-test-result-log", - "--incremental", - str(self.log_file), - "--grpc-url", - self.client.grpc_client._config.uri, - "--rest-url", - self.client.rest_client._config.base_url, - "--api-key", - self.client.grpc_client._config.api_key, - ], + self._build_replay_command(), stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, + stderr=subprocess.PIPE, ) def __enter__(self): - if self.log_file: + if self.log_file and self.replay_log_file: self._open_import_proc() return self @@ -199,17 +223,49 @@ def __exit__(self, exc_type, exc_value, traceback): self.report.update(update) if self._import_proc is not None: + # Three outcomes for the replay worker at session end. None of + # them fail the session — tests already ran and their outcome + # is independent of delivery. The local log file is the source + # of recovery for both failure modes via + # `replay-test-result-log `: + # 1. Exits cleanly (returncode 0). Silent. + # 2. Still running after the 1s grace window (TimeoutExpired). + # Healthy worker with a large backlog; kill and surface + # replay instructions. + # 3. Exited with non-zero. Connection failures and API call + # errors land here — the worker's replay loop has no retry, + # so the first failed RPC crashes the subprocess. Log the + # captured stderr at ERROR with replay instructions. try: - self._import_proc.communicate(timeout=1) + _, stderr_bytes = self._import_proc.communicate(timeout=1) except subprocess.TimeoutExpired: - logger.error("Import process did not exit in 10s, killing it") + logger.error("Import process did not exit in 1s, killing it") self._import_proc.kill() self._import_proc.wait() log_replay_instructions(self.log_file) - raise + return True # Ensures the session is marked as passed in pytest + if self._import_proc.returncode != 0: + stderr_text = ( + stderr_bytes.decode("utf-8", errors="replace").strip() if stderr_bytes else "" + ) + logger.error( + "Import process exited with code %d. stderr: %s", + self._import_proc.returncode, + stderr_text or "", + ) + log_replay_instructions(self.log_file) return True + @property + def is_simulated(self) -> bool: + """True when this context's report came from the simulate path. + + Delegates to ``self.report.is_simulated``; see ``TestReport.is_simulated`` + for the full semantics. + """ + return self.report.is_simulated + def new_step( self, name: str, @@ -505,15 +561,7 @@ def measure_avg( returns: The true if the average of the values is within the bounds, false otherwise. """ timestamp = timestamp if timestamp else datetime.now(timezone.utc) - np_array = None - if isinstance(values, list): - np_array = np.array(values) - elif isinstance(values, np.ndarray): - np_array = values - elif isinstance(values, pd.Series): - np_array = values.to_numpy() - else: - raise ValueError(f"Invalid value type: {type(values)}") + np_array = to_numpy_array(values) avg = float(np.mean(np_array)) result = self.measure( name=name, @@ -561,31 +609,8 @@ def measure_all( returns: The true if all values are within the bounds, false otherwise. """ timestamp = timestamp if timestamp else datetime.now(timezone.utc) - np_array = None - if isinstance(values, list): - np_array = np.array(values) - elif isinstance(values, np.ndarray): - np_array = values - elif isinstance(values, pd.Series): - np_array = values.to_numpy() - else: - raise ValueError(f"Invalid value type: {type(values)}") - - numeric_bounds = bounds - if isinstance(numeric_bounds, dict): - numeric_bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max")) # type: ignore - - # Construct a mask of the values that are outside the bounds. - mask = None - if numeric_bounds.min is not None: - mask = np_array < numeric_bounds.min - if numeric_bounds.max is not None: - val_above_max = np_array > numeric_bounds.max - mask = mask | val_above_max if mask is not None else val_above_max - if mask is None: - raise ValueError("No bounds provided") - - rows_outside_bounds = np_array[mask] + np_array = to_numpy_array(values) + rows_outside_bounds = np_array[out_of_bounds_mask(np_array, bounds)] for row in rows_outside_bounds: self.measure( name=name, diff --git a/python/pyproject.toml b/python/pyproject.toml index 79afdf464..a2cd6a410 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -412,12 +412,18 @@ env_files = [ # `pytester` is registered globally because pytest 8+ disallows `pytest_plugins` # in non-top-level conftests. Only the plugin test suite uses it; activating it # globally is harmless since the fixture is opt-in. -addopts = "-p pytester" -# The Sift plugin is loaded for the whole project via `python/conftest.py`. -# The autouse gate defaults to off here so unit tests don't use it. The -# integration subtree (lib/sift_client/_tests/util/) opts back in via -# `pytest.mark.sift_include` applied in its conftest. -sift_test_results_autouse = false +# The Sift pytest plugin is loaded so the project's own integration tests can +# use its fixtures. Unit-test runs are flipped to `--sift-disabled` mode by +# `lib/sift_client/_tests/conftest.py`. +# `--import-mode=importlib` loads test files by path with unique synthetic +# module names. The default `prepend` mode would try to import +# `lib/sift_client/_tests/conftest.py` as `sift_client._tests.conftest`, which +# fails because `_tests` is excluded from the wheel (see packages.find above). +addopts = "-p pytester -p sift_client.pytest_plugin --import-mode=importlib" +# The autouse gate defaults to off so unit tests don't use the Sift +# fixtures. The integration subtree (lib/sift_client/_tests/util/) opts +# back in via `pytest.mark.sift_include` applied in its conftest. +sift_autouse = false testpaths = [ "lib/sift_py", "lib/sift_client/_tests", diff --git a/python/scripts/dev b/python/scripts/dev index 510d66d95..ce572dba4 100755 --- a/python/scripts/dev +++ b/python/scripts/dev @@ -220,4 +220,5 @@ case "$1" in ;; esac -exit 0 +# Leave the script's exit code as the subcommand's. A trailing `exit 0` here +# silently masked ruff / mypy / pytest failures from the pre-push hook. From 654a59c237b73786a420f9e8d081bdbe9dfb2a49 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 22 May 2026 17:42:06 -0700 Subject: [PATCH 04/19] Python(feat): hierarchical pytest report tree (packages, modules, classes, parametrize) (#570) --- python/docs/examples/index.md | 1 + python/docs/examples/pytest_plugin.md | 120 ++- .../docs/examples/pytest_plugin_quickstart.md | 177 ++++ python/examples/pytest_plugin/.env.example | 3 + python/examples/pytest_plugin/README.md | 119 +++ python/examples/pytest_plugin/conftest.py | 15 + python/examples/pytest_plugin/pytest.ini | 11 + .../tests/pytest_only/__init__.py | 7 + .../pytest_only/test_pytest_only_demo.py | 49 + .../pytest_plugin/tests/with_sift/__init__.py | 8 + .../tests/with_sift/test_with_sift_demo.py | 159 ++++ .../_tests/pytest_plugin/_fakes.py | 132 +++ .../_tests/pytest_plugin/test_disabled.py | 5 +- .../_tests/pytest_plugin/test_hierarchy.py | 889 ++++++++++++++++++ .../_tests/util/test_report_context.py | 58 +- .../_tests/util/test_test_results_utils.py | 40 + python/lib/sift_client/pytest_plugin.py | 506 +++++++++- .../sift_client/util/test_results/__init__.py | 11 +- .../util/test_results/context_manager.py | 73 +- python/mkdocs.yml | 2 + python/pyproject.toml | 6 +- 21 files changed, 2239 insertions(+), 152 deletions(-) create mode 100644 python/docs/examples/pytest_plugin_quickstart.md create mode 100644 python/examples/pytest_plugin/.env.example create mode 100644 python/examples/pytest_plugin/README.md create mode 100644 python/examples/pytest_plugin/conftest.py create mode 100644 python/examples/pytest_plugin/pytest.ini create mode 100644 python/examples/pytest_plugin/tests/pytest_only/__init__.py create mode 100644 python/examples/pytest_plugin/tests/pytest_only/test_pytest_only_demo.py create mode 100644 python/examples/pytest_plugin/tests/with_sift/__init__.py create mode 100644 python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/_fakes.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py diff --git a/python/docs/examples/index.md b/python/docs/examples/index.md index b6a964b35..936a35cfd 100644 --- a/python/docs/examples/index.md +++ b/python/docs/examples/index.md @@ -7,6 +7,7 @@ This section contains interactive Jupyter notebook examples demonstrating how to - **[Basic Usage](basic.ipynb)** - Introduction to the Sift Python client, covering basic operations and API usage - **[Data Ingestion](ingestion.ipynb)** - Learn how to ingest telemetry data into Sift using various methods - **[Pytest Plugin](pytest_plugin.md)** - Turn a pytest run into a Sift TestReport with measurements, nested steps, and pass/fail outcomes +- **[Pytest Plugin Quickstart](pytest_plugin_quickstart.md)** - Guided tour of the runnable demo project under `python/examples/pytest_plugin/` ## Running Examples Locally diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index 2ac298256..c464e564e 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -87,7 +87,8 @@ def sift_client() -> SiftClient: |---|---|---|---| | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | | `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, and `current_step`. | -| `module_substep` | fixture (autouse) | module | One step per test file with each function nested as a substep. | +| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently — see [ini options](#ini-options). | +| `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | ### CLI options @@ -118,6 +119,10 @@ CLI flags, when passed, override the ini values. | `sift_offline` | bool (default `false`) | `--sift-offline` | | `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) | | `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | +| `sift_package_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each Python package (directory with `__init__.py`) in the test path. | +| `sift_module_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each test module (file). | +| `sift_class_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each test class, including nested classes. | +| `sift_parametrize_nesting` | bool (default `true`) | _(ini-only)_ — cluster parametrized tests under shared parents (`test_x → axis=value`) instead of flat leaves (`test_x[value]`). | The default `sift_client` fixture reads its two URIs from environment first and falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is @@ -302,8 +307,8 @@ outcomes into `TestStatus`: | Manual `step.current_step.update({"status": ...})` | Whatever you set; the step exit handler honors a manually-resolved status | A failure or error at any depth propagates upward: the parent substep, the -function step, the module step (if `module_substep` is active), and the -session report all get marked failed. +function step, the class/module/package steps above it, and the session +report all get marked failed. ## Nested steps @@ -339,12 +344,14 @@ Each step gets a hierarchical `step_path` (`1`, `1.1`, `1.1.2`, `2`, …) assigned by `ReportContext`. Sibling substeps within the same parent auto-increment; opening a new top-level step starts a new branch. -### One step per file +### Mirroring the test layout -`module_substep` is autouse and module-scoped. When it's active (it's pulled -in by the star-import in `conftest.py`), each file becomes a parent step and -every function in it nests one level down. Its name is the test file's -basename and its description is the module's docstring (if any). +The plugin opens a parent step for each Python package (`__init__.py` +directory), test file, and test class above every test, plus a parent step +for each `@pytest.mark.parametrize` axis. Every layer is on by default and +individually opt-out via ini flags (`sift_package_step`, `sift_module_step`, +`sift_class_step`, `sift_parametrize_nesting`). Class/module/package +docstrings become the matching step's description. ### Linking a Run to the report @@ -384,50 +391,43 @@ TestReport └── test_temperature ``` -### One step per file with `module_substep` +### Modules nested under a package -`module_substep` is autouse and module-scoped. Every file becomes a parent -step and every function in it nests one level down. +Two test files under the same Python package (directory with `__init__.py`) +share that package step as their parent. -```python title="test_battery.py" +```python title="suites/__init__.py" +``` + +```python title="suites/test_battery.py" def test_voltage(step): ... def test_current(step): ... ``` -```python title="test_thermal.py" +```python title="suites/test_thermal.py" def test_idle_temp(step): ... def test_load_temp(step): ... ``` ```text title="Sift report" TestReport -├── test_battery.py -│ ├── test_voltage -│ └── test_current -└── test_thermal.py - ├── test_idle_temp - └── test_load_temp +└── suites + ├── test_battery.py + │ ├── test_voltage + │ └── test_current + └── test_thermal.py + ├── test_idle_temp + └── test_load_temp ``` -### Test classes +### Test classes (and nested classes) -Pytest classes (`class TestFoo: ...`) do not create a parent step on their -own. The plugin keys off the test node's `name`, which is just the method -name. To group a class's methods under a class-level step, add a class-scoped -fixture that opens a step with `report_context.new_step(...)`: +`class TestFoo:` and `class TestOuter: class TestInner:` produce class and +nested class steps automatically — no manual fixture needed. ```python title="test_charging.py" -import pytest - - class TestCharging: - @pytest.fixture(scope="class", autouse=True) - def class_step(self, report_context): - with report_context.new_step( - name="TestCharging", - description="Charging subsystem", - ) as parent: - yield parent + """Charging subsystem.""" def test_starts_at_zero(self, step): ... def test_reaches_full(self, step): ... @@ -436,23 +436,20 @@ class TestCharging: ```text title="Sift report" TestReport -└── TestCharging - ├── test_starts_at_zero - ├── test_reaches_full - └── test_thermal_throttle +└── test_charging.py + └── TestCharging + ├── test_starts_at_zero + ├── test_reaches_full + └── test_thermal_throttle ``` -!!! note "Combining with `module_substep`" - `module_substep` and a class-scoped step both open at module/class scope, - so they each grab the next sibling slot under the report and the inner - one nests under the outer. If you want both layers (file → class → - method), make the class step itself open via the active outer step - rather than the report root. +The class's docstring becomes the step description. ### Parametrized tests -Each parametrize case is a distinct pytest node, so each gets its own step. -The step name includes the parameter id pytest generates. +Parametrized tests cluster under a parent step named after the test function, +with one inner parent per parametrize axis (outer-to-inner in +decorator-on-page order). Stacked parametrize produces nested step levels. ```python @pytest.mark.parametrize("voltage", [3.3, 5.0, 12.0]) @@ -462,11 +459,36 @@ def test_rail(step, voltage): ```text title="Sift report" TestReport -├── test_rail[3.3] -├── test_rail[5.0] -└── test_rail[12.0] +└── test_module.py + └── test_rail + ├── voltage=3.3 + ├── voltage=5.0 + └── voltage=12.0 ``` +Stacked parametrize: + +```python +@pytest.mark.parametrize("voltage", ["high", "low"]) +@pytest.mark.parametrize("component", ["motor", "valve"]) +def test_iso(step, voltage, component): ... +``` + +```text title="Sift report" +TestReport +└── test_module.py + └── test_iso + ├── voltage='high' + │ ├── component='motor' + │ └── component='valve' + └── voltage='low' + ├── component='motor' + └── component='valve' +``` + +Set `sift_parametrize_nesting = false` in `pytest.ini` to fall back to flat +leaf names (`test_rail[3.3]`). + ### Helper functions Helpers called from a test do not auto-create a step. The plugin only sees diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md new file mode 100644 index 000000000..54328c707 --- /dev/null +++ b/python/docs/examples/pytest_plugin_quickstart.md @@ -0,0 +1,177 @@ +# Pytest Plugin Quickstart + +A walkthrough of the runnable demo at +[`python/examples/pytest_plugin/`](https://github.com/sift-stack/sift/tree/main/python/examples/pytest_plugin). +The demo is a self-contained pytest project that exercises every layer of the +plugin's step tree: packages, modules, classes (including nested), parametrize +axes, manual substeps, and gate markers. It also includes a tests directory +that uses no Sift APIs at all, to show how the autouse fixtures capture plain +pytest tests for free. + +For a conceptual reference (fixtures, ini flags, status semantics), see +[Pytest Plugin](pytest_plugin.md). + +## Project layout + +``` +examples/pytest_plugin/ +├── conftest.py # registers the plugin +├── pytest.ini # available ini knobs (all commented at defaults) +├── .env.example # credential template +└── tests/ + ├── pytest_only/ # subpackage step + │ ├── __init__.py + │ └── test_pytest_only_demo.py # plain pytest, no Sift APIs + └── with_sift/ # subpackage step + ├── __init__.py + └── test_with_sift_demo.py # measurements, substeps, classes, parametrize, gates +``` + +Every Python package (directory with `__init__.py`), test file, and test class +above each test becomes its own parent step in the report tree. + +## `conftest.py` + +A single `pytest_plugins` declaration loads the plugin; `load_dotenv()` is +optional and just lets the default `sift_client` fixture pick up +`SIFT_API_KEY` / `SIFT_GRPC_URI` / `SIFT_REST_URI` from a local `.env`. + +```python title="conftest.py" +--8<-- "examples/pytest_plugin/conftest.py" +``` + +## `pytest.ini` + +Every knob is commented at its default value. Uncomment any line to opt out of +a layer of the step tree. + +```ini title="pytest.ini" +--8<-- "examples/pytest_plugin/pytest.ini" +``` + +## `.env.example` + +```bash title=".env.example" +--8<-- "examples/pytest_plugin/.env.example" +``` + +## The pytest_only module + +Plain pytest tests with no `sift_client` imports, no `step` fixture, no +markers. Each one still becomes a leaf step in the report tree. The plugin's +autouse fixtures capture pass/fail automatically. + +```python title="tests/pytest_only/test_pytest_only_demo.py" +--8<-- "examples/pytest_plugin/tests/pytest_only/test_pytest_only_demo.py" +``` + +## The with_sift module + +Exercises the plugin's full surface: numeric / string / bool bounds, nested +`step.substep`, `@pytest.mark.sift_exclude`, class steps with docstring +descriptions, nested classes, stacked `@pytest.mark.parametrize`, and +`step.report_outcome`. + +```python title="tests/with_sift/test_with_sift_demo.py" +--8<-- "examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py" +``` + +## Run it + +### Without Sift credentials + +```bash +cd python/examples/pytest_plugin +pytest --sift-disabled -v +``` + +`--sift-disabled` makes the plugin a no-op transport: `step.measure(...)` +still evaluates bounds and returns a real pass/fail boolean, but nothing +contacts Sift and no log file is written. Useful for previewing the report +tree or unit-testing measurement logic. + +### Against a real Sift org + +```bash +cp .env.example .env +# Fill in SIFT_API_KEY / SIFT_GRPC_URI / SIFT_REST_URI +pytest -v +``` + +A `TestReport` shows up in Sift once the session finishes. + +### Offline (record now, replay later) + +```bash +pytest --sift-offline --sift-log-file=/tmp/sift-demo.jsonl -v +# Later, from anywhere with credentials: +import-test-result-log /tmp/sift-demo.jsonl +``` + +## Expected report tree + +With the plugin's defaults (every layer enabled), the demo produces: + +``` +TestReport (FAILED, since failures propagate up from leaves) +├── pytest_only ← package step (FAILED) +│ └── test_pytest_only_demo.py ← module step (FAILED) +│ ├── test_passes PASSED +│ ├── test_uses_a_pytest_fixture PASSED +│ ├── test_assertion_failure_marks_step_failed FAILED +│ ├── test_skipped SKIPPED +│ ├── test_unexpected_exception_marks_step_errored ERROR +│ ├── test_parametrize_without_step +│ │ ├── value='v1' PASSED +│ │ └── value='v2' PASSED +│ └── TestPytestClass +│ └── test_method PASSED +└── with_sift ← package step (FAILED) + └── test_with_sift_demo.py ← module step (FAILED) + ├── test_measurements PASSED + ├── test_substeps PASSED + │ ├── phase_1 + │ └── phase_2 + │ └── phase_2a + │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) + ├── test_measure_series PASSED + ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) + ├── test_assert_measurements_passed_at_end FAILED (pytest FAILED) + ├── test_report_level_metadata PASSED + └── TestClassStep + ├── test_parametrize + │ ├── axis_a='a1' + │ │ ├── axis_b='b1' PASSED + │ │ └── axis_b='b2' PASSED + │ └── axis_a='a2' + │ ├── axis_b='b1' PASSED + │ └── axis_b='b2' PASSED + └── TestNested + └── test_report_outcome + └── check PASSED +``` + +The `pytest_only` module deliberately includes one failing, one skipped, and +one erroring test so the demo shows every `TestStatus` mapping (`FAILED` for +assertions, `SKIPPED` for `pytest.skip`, `ERROR` for any other exception). +The `with_sift` module shows two patterns for handling measurement results: +`test_failed_measurement_marks_sift_step_failed` lets the test keep passing +in pytest while the Sift step is `FAILED` (useful when measurements are +diagnostic data you want to collect regardless of outcome); and +`test_assert_measurements_passed_at_end` takes every measurement first and +then asserts `step.measurements_passed` once at the end, so every +measurement still lands in the report even when one fails. The end-of-test +assertion is the recommended pattern: asserting on an individual +`step.measure(...)` call short-circuits on the first failure and skips +every measurement that follows. Expected +pytest output is `16 passed, 3 failed, 1 skipped`. + +Flip any of the `sift_*_step` / `sift_parametrize_nesting` flags in +`pytest.ini` to `false` to collapse a layer. + +## Next steps + +- [Pytest Plugin](pytest_plugin.md): conceptual reference covering fixtures, + ini flags, status semantics, and layout-mapping examples. +- The demo's [README](https://github.com/sift-stack/sift/blob/main/python/examples/pytest_plugin/README.md) + on GitHub mirrors this page and is the canonical source. diff --git a/python/examples/pytest_plugin/.env.example b/python/examples/pytest_plugin/.env.example new file mode 100644 index 000000000..a8c028598 --- /dev/null +++ b/python/examples/pytest_plugin/.env.example @@ -0,0 +1,3 @@ +SIFT_API_KEY=your-api-key +SIFT_GRPC_URI=your-org.grpc.example.com +SIFT_REST_URI=https://your-org.rest.example.com diff --git a/python/examples/pytest_plugin/README.md b/python/examples/pytest_plugin/README.md new file mode 100644 index 000000000..c74a9c939 --- /dev/null +++ b/python/examples/pytest_plugin/README.md @@ -0,0 +1,119 @@ +# Pytest plugin demo + +A self-contained pytest project that exercises every feature of +`sift_client.pytest_plugin`: package / module / class / parametrize step +nesting, nested classes, manual substeps, `step.measure(...)` against +numeric / string / bool bounds, gate markers, and the ini opt-outs. + +``` +examples/pytest_plugin/ +├── conftest.py # registers the plugin +├── pytest.ini # available ini knobs (all commented at defaults) +├── .env.example # credential template (copy to .env for local runs) +└── tests/ + ├── pytest_only/ # subpackage step: `pytest_only` opens a parent step + │ ├── __init__.py + │ └── test_pytest_only_demo.py # plain pytest tests with no Sift APIs + └── with_sift/ # subpackage step: `with_sift` opens a parent step + ├── __init__.py + └── test_with_sift_demo.py # measurements, substeps, classes, nested classes, + # stacked parametrize, sift_exclude marker +``` + +Every layer of organization shows up in the report tree: Python packages +(directories with `__init__.py`), modules (test files), classes (including +nested classes), and parametrize axes each open a parent step. Flip +`sift_package_step`, `sift_module_step`, `sift_class_step`, or +`sift_parametrize_nesting` to `false` in `pytest.ini` to disable this behavior. + +## Run it + +**Against a real Sift org**: + +```bash +cp .env.example .env +# Fill in SIFT_API_KEY / SIFT_GRPC_URI / SIFT_REST_URI +pytest -v +``` + +A `TestReport` shows up in Sift once the session finishes. + +**Offline (record now, replay later - intended for offline environments)**: + +```bash +pytest --sift-offline --sift-log-file=/tmp/sift-demo.jsonl -v +# Later, from anywhere with credentials: +import-test-result-log /tmp/sift-demo.jsonl +``` + +## What the report tree looks like + +With the plugin's defaults (everything in `pytest.ini` left commented), running +this demo produces a tree like: + +``` +TestReport (FAILED, since failures propagate up from leaves) +├── pytest_only ← package step (FAILED) +│ └── test_pytest_only_demo.py ← module step (FAILED) +│ ├── test_passes PASSED +│ ├── test_uses_a_pytest_fixture PASSED +│ ├── test_assertion_failure_marks_step_failed FAILED +│ ├── test_skipped SKIPPED +│ ├── test_unexpected_exception_marks_step_errored ERROR +│ ├── test_parametrize_without_step +│ │ ├── value='v1' PASSED +│ │ └── value='v2' PASSED +│ └── TestPytestClass +│ └── test_method PASSED +└── with_sift ← package step (FAILED) + └── test_with_sift_demo.py ← module step (FAILED) + ├── test_measurements PASSED + ├── test_substeps PASSED + │ ├── phase_1 + │ └── phase_2 + │ └── phase_2a + │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) + ├── test_measure_series PASSED + ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) + ├── test_assert_measurements_passed_at_end FAILED (pytest FAILED) + ├── test_report_level_metadata PASSED + └── TestClassStep + ├── test_parametrize + │ ├── axis_a='a1' + │ │ ├── axis_b='b1' PASSED + │ │ └── axis_b='b2' PASSED + │ └── axis_a='a2' + │ ├── axis_b='b1' PASSED + │ └── axis_b='b2' PASSED + └── TestNested + └── test_report_outcome + └── check PASSED +``` + +The `pytest_only` module deliberately includes one failing, one skipped, and +one erroring test so the demo shows every `TestStatus` mapping (`FAILED` for +assertions, `SKIPPED` for `pytest.skip`, `ERROR` for any other exception). +The `with_sift` module shows two patterns for handling measurement results: +`test_failed_measurement_marks_sift_step_failed` lets the test keep passing +in pytest while the Sift step is `FAILED` (useful when measurements are +diagnostic data you want to collect regardless of outcome); and +`test_assert_measurements_passed_at_end` takes every measurement first and +then asserts `step.measurements_passed` once at the end, so every +measurement still lands in the report even when one fails. The end-of-test +assertion is the recommended pattern: asserting on an individual +`step.measure(...)` call short-circuits on the first failure and skips +every measurement that follows. Expected +pytest output is `16 passed, 3 failed, 1 skipped`. + +Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in +`pytest.ini` to `false` to collapse a layer. + +## What each file demonstrates + +| File | Feature | +|---|---| +| `conftest.py` | Plugin registration via `pytest_plugins`; optional `load_dotenv()` | +| `pytest.ini` | The four nesting flags + git metadata flag at their defaults | +| `tests/pytest_only/test_pytest_only_demo.py` | Plain pytest tests with no Sift APIs. The plugin captures pass/fail automatically; covers functions, fixtures, parametrize, classes, plus one each of `AssertionError` (FAILED), `pytest.skip` (SKIPPED), and a raised `ValueError` (ERROR) | +| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `assert step.measurements_passed` end-of-test pattern that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | +| `tests/{pytest_only,with_sift}/__init__.py` | Each Python package (directory with `__init__.py`) becomes a parent step in the report tree | diff --git a/python/examples/pytest_plugin/conftest.py b/python/examples/pytest_plugin/conftest.py new file mode 100644 index 000000000..88253bd73 --- /dev/null +++ b/python/examples/pytest_plugin/conftest.py @@ -0,0 +1,15 @@ +"""Project-level conftest for the pytest plugin demo. + +A single ``pytest_plugins`` declaration is enough to load the plugin — its +fixtures, hooks, and CLI options register through standard pytest machinery +from there. ``load_dotenv()`` is optional; it just lets the default +``sift_client`` fixture pick up ``SIFT_API_KEY`` / ``SIFT_GRPC_URI`` / +``SIFT_REST_URI`` from a local ``.env`` when running against a real Sift org. +These can also be set as environment variables using your preferred method. +""" + +from dotenv import load_dotenv + +load_dotenv() + +pytest_plugins = ["sift_client.pytest_plugin"] diff --git a/python/examples/pytest_plugin/pytest.ini b/python/examples/pytest_plugin/pytest.ini new file mode 100644 index 000000000..90a1a824b --- /dev/null +++ b/python/examples/pytest_plugin/pytest.ini @@ -0,0 +1,11 @@ +[pytest] +# Defaults give you the full step tree: every package, module, class, and +# parametrize axis becomes a parent step. These are the available ini options +# and their defaults. +# +# sift_autouse = true # autouse fixtures (default: true) +# sift_package_step = true # Python package (dir with __init__.py) parent step (default: true) +# sift_module_step = true # module (test file) parent step (default: true) +# sift_class_step = true # class parent step incl. nested (default: true) +# sift_parametrize_nesting = true # parametrize parent steps (default: true) +# sift_git_metadata = true # git repo/branch/commit included on the report (default: true) diff --git a/python/examples/pytest_plugin/tests/pytest_only/__init__.py b/python/examples/pytest_plugin/tests/pytest_only/__init__.py new file mode 100644 index 000000000..939562d5f --- /dev/null +++ b/python/examples/pytest_plugin/tests/pytest_only/__init__.py @@ -0,0 +1,7 @@ +"""Subpackage of plain pytest tests with no Sift awareness. + +Demonstrates that the plugin captures any test's pass/fail with no opt-in +needed — no ``step`` fixture, no markers, no imports from ``sift_client``. +The package directory itself becomes a parent step in the report tree (via +``sift_package_step``, on by default). +""" diff --git a/python/examples/pytest_plugin/tests/pytest_only/test_pytest_only_demo.py b/python/examples/pytest_plugin/tests/pytest_only/test_pytest_only_demo.py new file mode 100644 index 000000000..77790d301 --- /dev/null +++ b/python/examples/pytest_plugin/tests/pytest_only/test_pytest_only_demo.py @@ -0,0 +1,49 @@ +"""Plain pytest tests are automatically captured by the plugin as steps. + +No imports from ``sift_client`` or fixture usage required. Each test +becomes a step in the report tree: passing tests resolve to ``PASSED``, +failing tests to ``FAILED``. This allows integrating existing tests +with Sift Test Results without modification. +""" + +import pytest + + +def test_passes(): + """Functions become steps in the report tree. The function docstring is used as the step description.""" + assert 1 + 1 == 2 + + +@pytest.mark.parametrize("value", ["v1", "v2"]) +def test_parametrize_without_step(value): + """Parametrized tests are nested under a common step with sub steps for each permutation.""" + assert value.startswith("v") + + +class TestPytestClass: + """Test classes are turned into parent steps for their methods. Class docstrings are used as step the description.""" + + def test_method(self): + assert True + + +def test_uses_a_pytest_fixture(tmp_path): + """Normal pytest fixtures keep working the plugin doesn't intercept them.""" + (tmp_path / "marker").write_text("ok") + assert (tmp_path / "marker").read_text() == "ok" + + +def test_assertion_failure_marks_step_failed(): + """An ``AssertionError`` resolves the Sift step as ``FAILED`` (no traceback attached).""" + assert 1 + 1 == 3 + + +@pytest.mark.skip(reason="Demonstrating the skip outcome") +def test_skipped(): + """Skipped tests resolve as ``SKIPPED`` in the Sift report.""" + pass + + +def test_unexpected_exception_marks_step_errored(): + """Non-``AssertionError`` exceptions resolve the Sift step as ``ERROR`` with the traceback attached.""" + raise ValueError("simulated environmental failure") diff --git a/python/examples/pytest_plugin/tests/with_sift/__init__.py b/python/examples/pytest_plugin/tests/with_sift/__init__.py new file mode 100644 index 000000000..6fd60c38d --- /dev/null +++ b/python/examples/pytest_plugin/tests/with_sift/__init__.py @@ -0,0 +1,8 @@ +"""Subpackage of tests that use the Sift plugin APIs explicitly. + +Demonstrates ``step.measure`` (numeric / string / bool bounds), nested +``step.substep``, gate markers, class and nested-class step nesting, stacked +parametrize, and ``step.report_outcome``. The package directory itself +becomes a parent step in the report tree (via ``sift_package_step``, on by +default). +""" diff --git a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py new file mode 100644 index 000000000..34bf602b7 --- /dev/null +++ b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py @@ -0,0 +1,159 @@ +"""End-to-end demo of the test-results features: measurements, substeps, +exclusion, classes, nested classes, and stacked parametrize.""" + +import pytest + + +def test_measurements(step) -> None: + """Measurements are the first-class method for recording numeric, string, or bool bounds criteria and their outcomes. These show up in report steps. + ``step.measure`` accepts numeric (min/max), string, or bool bounds. + Names should be chosen that provide sufficient context, but general enough that similar/identical measurements + across steps or reports can be compared. + """ + step.measure(name="numeric_value", value=1.5, bounds={"min": 0.0, "max": 2.0}) + step.measure(name="string_label", value="ok", bounds="ok") + step.measure(name="bool_flag", value=True, bounds=True) + + # Descriptions and metadata can also be provided to measurements. + step.measure( + name="numeric_value_2", + value=1.5, + bounds={"min": 0.0, "max": 2.0}, + description="Numeric that represents X, Y, Z", + metadata={"subsystem": "A"}, + ) + + # If you plan to link the pytest report to a Sift Run, you can also assign related channels for easy plotting in the app + step.measure( + name="numeric_value", + value=1.5, + bounds={"min": 0.0, "max": 2.0}, + channel_names=["channel_1", "channel_2"], + ) + + +def test_substeps(step) -> None: + """``step.substep(...)`` opens child steps inside one test; substeps nest arbitrarily. + This can be useful for grouping related measurements or for creating a more natural report structure + without the need to create a new test, class, etc. + + Metadata can be attached at the step level by passing ``metadata=...`` to + ``substep``; the same keyword is accepted by ``report_context.new_step`` + and propagates to the resulting ``TestStep``. + """ + with step.substep(name="phase_1", metadata={"phase_index": 1}) as s1: + s1.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) + + with step.substep(name="phase_2", metadata={"phase_index": 2}) as s2: + with s2.substep(name="phase_2a") as s2a: + s2a.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) + + +def test_measure_series(step) -> None: + """``measure_avg`` and ``measure_all`` are the series variants of ``measure``. + + Both accept a list, numpy array, or pandas series of numeric values. + ``measure_avg`` records one row holding the mean of the series and + bounds-checks it. ``measure_all`` evaluates every value individually and + records one row per out-of-bounds element (in-bounds values are NOT + recorded, keeping the report compact). + """ + voltages = [4.95, 5.02, 5.01, 4.98, 5.00] + step.measure_avg( + name="voltage_mean", + values=voltages, + bounds={"min": 4.9, "max": 5.1}, + unit="V", + ) + # All values are in-bounds here, so measure_all records nothing extra; + # change one to e.g. 6.0 to see an out-of-bounds row appear. + step.measure_all( + name="voltage_samples", + values=voltages, + bounds={"min": 4.9, "max": 5.1}, + unit="V", + ) + + +def test_failed_measurement_marks_sift_step_failed(step) -> None: + """An out-of-bounds measurement marks the Sift step as ``FAILED`` + without raising. The pytest test still passes (no assertion, no + exception); the Sift report records bounds compliance while pytest + records control flow. + + Use this pattern when measurements are diagnostic data you want to + collect alongside the test result, even when some readings fall outside + spec. See ``test_assert_passed_at_end`` below for the recommended way + to also fail pytest when any measurement is out of bounds. + """ + step.measure( + name="voltage", + value=99.0, # outside the bounds below; marks the step FAILED in Sift + bounds={"min": 0.0, "max": 10.0}, + unit="V", + ) + + +def test_assert_measurements_passed_at_end(step) -> None: + """Recommended pattern: take every measurement first, then assert + ``step.measurements_passed`` once at the end. + + Asserting on individual ``step.measure(...)`` calls raises + ``AssertionError`` on the first failure, so any measurements after the + failing one never run and never land in the Sift report. The end-of-test + assertion is strictly better for diagnostic completeness: every + measurement is recorded, including the failures, and the aggregate + result is then folded into the pytest outcome. + + The ``b`` measurement below is deliberately out of bounds. ``c`` still + runs and is recorded; only the final ``assert`` fires. + """ + step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0}) + step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0}) # out of bounds + step.measure(name="c", value=1.5, bounds={"min": 0.0, "max": 2.0}) # still recorded + assert step.measurements_passed, "one or more measurements out of bounds" + + +def test_report_level_metadata(step, report_context) -> None: + """Attach metadata to the run-wide ``TestReport`` via ``report_context.report.update(...)``. + + The same ``update({...})`` pattern works for any field on + ``TestReportUpdate`` (``run_id``, ``serial_number``, ``part_number``, + ``system_operator``, ``metadata``, ...). Useful for linking a session + to a Sift Run or tagging the report with build / operator info. + """ + report_context.report.update( + { + "metadata": { + "build_id": "v1.2.3", + "operator": "ci", + } + } + ) + step.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) + + +@pytest.mark.sift_exclude +def test_excluded() -> None: + """``sift_exclude`` runs the test in pytest but produces no Sift step.""" + assert True + + +class TestClassStep: + """A test class becomes its own step in the report tree. + + This docstring becomes the description of the ``TestClassStep`` step. + """ + + @pytest.mark.parametrize("axis_a", ["a1", "a2"]) + @pytest.mark.parametrize("axis_b", ["b1", "b2"]) + def test_parametrize(self, step, axis_a: str, axis_b: str) -> None: + """Stacked parametrize nests outer-to-inner in decorator-on-page order.""" + step.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) + + class TestNested: + """Nested classes produce nested class steps.""" + + def test_report_outcome(self, step) -> None: + """``step.report_outcome`` records a non-numeric pass/fail substep.""" + step.report_outcome(name="check", result=True, reason="value matched") diff --git a/python/lib/sift_client/_tests/pytest_plugin/_fakes.py b/python/lib/sift_client/_tests/pytest_plugin/_fakes.py new file mode 100644 index 000000000..460100daa --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/_fakes.py @@ -0,0 +1,132 @@ +"""Test doubles for the pytester-driven pytest-plugin tests. + +The fake ``ReportContext`` is a drop-in for the real one that records every +step creation to a JSON file at session exit. Used by ``test_parametrize.py`` +to assert the step tree produced by an inner pytester pytest run. +""" + +from __future__ import annotations + +import itertools +import json +from typing import TYPE_CHECKING, Any +from unittest.mock import MagicMock + +if TYPE_CHECKING: + from pathlib import Path + + +class FakeStep: + def __init__(self, id_: str, name: str, parent_step_id: str | None, step_path: str) -> None: + self.id_ = id_ + self.name = name + self.parent_step_id = parent_step_id + self.step_path = step_path + self.status: Any = None + self.description: Any = None + self.error_info: Any = None + + def update(self, fields: dict[str, Any]) -> None: + for k, v in fields.items(): + setattr(self, k, v) + + +class FakeReport: + def __init__(self) -> None: + self.id_ = "report-id" + + def update(self, fields: dict[str, Any]) -> None: + pass + + +class FakeReportContext: + def __init__(self, steps_file: Path) -> None: + self.steps_file = steps_file + self.report = FakeReport() + self.client = MagicMock() + self.step_stack: list[FakeStep] = [] + self.step_number_at_depth: dict[int, int] = {} + self.open_step_results: dict[str, bool] = {} + self.any_failures = False + self.log_file: Path | None = None + self.steps: list[dict[str, Any]] = [] + self._ids = itertools.count(1) + + def __enter__(self) -> FakeReportContext: + return self + + def __exit__(self, *_: Any) -> None: + self.steps_file.write_text(json.dumps(self.steps)) + + def new_step( + self, + name: str, + description: str | None = None, + assertion_as_fail_not_error: bool = True, + metadata: dict[str, Any] | None = None, + ) -> Any: + # Reuse the real NewStep machinery — it talks to this fake via the + # methods below. + from sift_client.util.test_results.context_manager import NewStep + + return NewStep( + self, # type: ignore[arg-type] + name=name, + description=description, + assertion_as_fail_not_error=assertion_as_fail_not_error, + metadata=metadata, + ) + + def get_next_step_path(self) -> str: + top = self.step_stack[-1] if self.step_stack else None + path = top.step_path if top else "" + next_n = self.step_number_at_depth.get(len(self.step_stack), 0) + 1 + prefix = f"{path}." if path else "" + return f"{prefix}{next_n}" + + def create_step( + self, + name: str, + description: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> FakeStep: + step_path = self.get_next_step_path() + parent = self.step_stack[-1] if self.step_stack else None + step = FakeStep( + id_=f"step-{next(self._ids)}", + name=name, + parent_step_id=parent.id_ if parent else None, + step_path=step_path, + ) + self.step_number_at_depth[len(self.step_stack)] = ( + self.step_number_at_depth.get(len(self.step_stack), 0) + 1 + ) + self.step_stack.append(step) + self.open_step_results[step.step_path] = True + self.steps.append( + { + "id": step.id_, + "name": name, + "parent_step_id": step.parent_step_id, + "step_path": step_path, + } + ) + return step + + def record_step_outcome(self, outcome: bool, step: FakeStep) -> None: + if not outcome: + self.open_step_results[step.step_path] = False + self.any_failures = True + + def resolve_and_propagate_step_result(self, step: FakeStep, error_info: Any = None) -> bool: + result = self.open_step_results.get(step.step_path, True) + if error_info: + result = False + return result + + def exit_step(self, step: FakeStep) -> None: + self.step_number_at_depth[len(self.step_stack)] = 0 + stack_top = self.step_stack.pop() + self.open_step_results.pop(step.step_path) + if stack_top.id_ != step.id_: + raise ValueError("popped step was not the top of the stack") diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py index cba4bc1ee..90a5fcb56 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py @@ -109,20 +109,19 @@ def test_disabled_yields_stub_fixtures( clear_sift_env: None, write_plugin_conftest: Callable[[], None], ) -> None: - """`report_context` / `step` / `module_substep` are real instances backed by a simulate client.""" + """`report_context` / `step` are real instances backed by a simulate client.""" write_plugin_conftest() pytester.makepyfile( """ from sift_client.util.test_results import ReportContext from sift_client.util.test_results.context_manager import NewStep - def test_types(step, report_context, module_substep): + def test_types(step, report_context): assert isinstance(report_context, ReportContext) assert report_context.is_simulated is True assert report_context.report.is_simulated is True assert step.current_step.is_simulated is True assert isinstance(step, NewStep) - assert isinstance(module_substep, NewStep) """ ) result = pytester.runpytest_subprocess("--sift-disabled") diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py new file mode 100644 index 000000000..cecad2df8 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -0,0 +1,889 @@ +"""Tests for the plugin's hierarchy-step nesting behavior. + +Covers every layer the plugin opens parent steps for — packages, modules, +classes (including nested), parametrize axes — plus the ini opt-out flags, +failure-cleanup semantics, and the drain helper. + +Each test spins up an inner pytest run via ``pytester`` whose conftest swaps +in a ``FakeReportContext`` (from ``_fakes.py``) that records every step +creation to a JSON file. The outer test reads that file and asserts the +resulting step tree. +""" + +from __future__ import annotations + +import json +from pathlib import Path as _Path +from textwrap import dedent +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from pathlib import Path + +_STEPS_FILE_ENV = "SIFT_FAKE_STEPS_FILE" + +# ``_fakes.py`` is excluded from the wheel by ``pyproject.toml``'s +# ``packages.find`` rule that strips ``sift_client._tests``. The inner +# pytester subprocess uses the installed package and cannot import from +# ``sift_client._tests``. Embed the fake source directly into the inner +# conftest so the subprocess gets a fully self-contained module to load. +_FAKES_SOURCE = (_Path(__file__).parent / "_fakes.py").read_text() + +_INNER_CONFTEST = f""" +{_FAKES_SOURCE} + +import os +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +pytest_plugins = ["sift_client.pytest_plugin"] + + +@pytest.fixture(scope="session") +def sift_client(): + return MagicMock() + + +@pytest.fixture(scope="session", autouse=True) +def report_context(sift_client): + import sift_client.pytest_plugin as plugin_module + steps_file = Path(os.environ[{_STEPS_FILE_ENV!r}]) + with FakeReportContext(steps_file) as ctx: + plugin_module.REPORT_CONTEXT = ctx + yield ctx +""" + + +@pytest.fixture +def steps_file(pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch) -> Path: + path = pytester.path / "captured_steps.json" + pytester.makeconftest(_INNER_CONFTEST) + monkeypatch.setenv(_STEPS_FILE_ENV, str(path)) + return path + + +def _by_name(steps: list[dict]) -> dict[str, list[dict]]: + out: dict[str, list[dict]] = {} + for s in steps: + out.setdefault(s["name"], []).append(s) + return out + + +def _ancestor_names(steps: list[dict], leaf: dict) -> list[str]: + """Walk from ``leaf`` to the root via parent_step_id, returning names.""" + by_id = {s["id"]: s for s in steps} + chain: list[str] = [] + cur: dict | None = leaf + while cur is not None: + chain.append(cur["name"]) + parent_id = cur["parent_step_id"] + cur = by_id.get(parent_id) if parent_id else None + return chain + + +def test_class_methods_cluster_under_class_step( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_klass=dedent( + """ + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert len(by_name["TestFoo"]) == 1 + class_id = by_name["TestFoo"][0]["id"] + assert by_name["test_a"][0]["parent_step_id"] == class_id + assert by_name["test_b"][0]["parent_step_id"] == class_id + + +def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_nested=dedent( + """ + class TestOuter: + class TestInner: + def test_a(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert len(by_name["TestOuter"]) == 1 + assert len(by_name["TestInner"]) == 1 + leaf = by_name["test_a"][0] + assert _ancestor_names(steps, leaf) == [ + "test_a", + "TestInner", + "TestOuter", + "test_nested.py", + ] + + +def test_class_parametrize_nests_under_class(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_cp=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.parametrize("v", [1, 2]) + def test_a(self, v): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + class_id = by_name["TestFoo"][0]["id"] + test_a_id = by_name["test_a"][0]["id"] + assert by_name["test_a"][0]["parent_step_id"] == class_id + assert by_name["v=1"][0]["parent_step_id"] == test_a_id + assert by_name["v=2"][0]["parent_step_id"] == test_a_id + + +def test_two_sibling_classes_in_module(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_sib=dedent( + """ + class TestA: + def test_x(self): + pass + + class TestB: + def test_y(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + mod_id = by_name["test_sib.py"][0]["id"] + assert by_name["TestA"][0]["parent_step_id"] == mod_id + assert by_name["TestB"][0]["parent_step_id"] == mod_id + # Sanity: each class is opened exactly once (no duplicate parents). + assert len(by_name["TestA"]) == 1 + assert len(by_name["TestB"]) == 1 + + +def test_mixed_class_and_free_function(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_mix=dedent( + """ + class TestA: + def test_x(self): + pass + + def test_free(): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + mod_id = by_name["test_mix.py"][0]["id"] + # Class method parents to TestA; free function parents directly to module. + assert by_name["TestA"][0]["parent_step_id"] == mod_id + assert by_name["test_x"][0]["parent_step_id"] == by_name["TestA"][0]["id"] + assert by_name["test_free"][0]["parent_step_id"] == mod_id + + +def test_class_with_all_excluded_methods_no_class_step( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_excl=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.sift_exclude + def test_a(self): + pass + + @pytest.mark.sift_exclude + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "TestFoo" not in by_name + assert "test_a" not in by_name + assert "test_b" not in by_name + + +def test_sift_exclude_on_class_propagates(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_clsexcl=dedent( + """ + import pytest + + @pytest.mark.sift_exclude + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "TestFoo" not in by_name + assert "test_a" not in by_name + + +def test_class_docstring_becomes_step_description( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_doc=dedent( + ''' + class TestFoo: + """Class docstring.""" + + def test_a(self): + pass + ''' + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # The fake records step creation but not all fields — check the class + # step was recorded, then read the description via the FakeStep's + # description attribute by re-reading steps. The fake's create_step only + # records name/parent/path/id, so verify via the leaf chain only here. + leaf = by_name["test_a"][0] + assert _ancestor_names(steps, leaf)[:3] == ["test_a", "TestFoo", "test_doc.py"] + + +def test_transition_between_class_chains_drains_parametrize( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_trans=dedent( + """ + import pytest + + class TestA: + @pytest.mark.parametrize("v", [1]) + def test_x(self, v): + pass + + class TestB: + @pytest.mark.parametrize("w", [2]) + def test_y(self, w): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # Each class opens exactly once; parametrize parents under the right class. + assert len(by_name["TestA"]) == 1 + assert len(by_name["TestB"]) == 1 + test_x_id = by_name["test_x"][0]["id"] + test_y_id = by_name["test_y"][0]["id"] + assert by_name["v=1"][0]["parent_step_id"] == test_x_id + assert by_name["w=2"][0]["parent_step_id"] == test_y_id + # Confirm full chain: leaves trace up through correct class. + chain_x = _ancestor_names(steps, by_name["v=1"][0]) + chain_y = _ancestor_names(steps, by_name["w=2"][0]) + assert "TestA" in chain_x + assert "TestB" not in chain_x + assert "TestB" in chain_y + assert "TestA" not in chain_y + + +# --------------------------------------------------------------------------- +# Failure-cleanup tests +# --------------------------------------------------------------------------- + + +def test_drain_step_stack_continues_past_failing_exit() -> None: + """Lenient mode: a misbehaving ``__exit__`` must not block the rest of the stack.""" + from sift_client.pytest_plugin import ( + SiftPytestStepDrainWarning, + _drain_step_stack, + ) + + class _Good: + def __init__(self) -> None: + self.closed = False + + def __exit__(self, *_: object) -> None: + self.closed = True + + class _Bad: + def __exit__(self, *_: object) -> None: + raise RuntimeError("boom") + + g1, g2, bad = _Good(), _Good(), _Bad() + stack: list[tuple[str, object]] = [("g1", g1), ("bad", bad), ("g2", g2)] + with pytest.warns(SiftPytestStepDrainWarning, match="boom"): + _drain_step_stack(stack) + assert stack == [] + assert g1.closed + assert g2.closed + + +def test_drain_step_stack_strict_drains_fully_then_raises() -> None: + """Strict mode: drain every frame, then raise with the FIRST failure chained.""" + from sift_client.pytest_plugin import ( + SiftPytestStepDrainError, + _drain_step_stack, + ) + + class _Good: + def __init__(self) -> None: + self.closed = False + + def __exit__(self, *_: object) -> None: + self.closed = True + + class _Bad: + def __init__(self, label: str) -> None: + self.label = label + + def __exit__(self, *_: object) -> None: + raise RuntimeError(f"boom-{self.label}") + + g, b1, b2 = _Good(), _Bad("first"), _Bad("second") + # Stack drains LIFO: pop order is b2, b1, g. So b2's failure is the first + # one collected and surfaces in __cause__. + stack: list[tuple[str, object]] = [("g", g), ("b1", b1), ("b2", b2)] + with pytest.raises(SiftPytestStepDrainError, match="2 step.*'b2'") as exc_info: + _drain_step_stack(stack, swallow_errors=False) + # Stack fully drained even though it raised. + assert stack == [] + assert g.closed + # Original exception chained for debuggability. + assert isinstance(exc_info.value.__cause__, RuntimeError) + assert "boom-second" in str(exc_info.value.__cause__) + + +def test_failing_test_in_class_does_not_orphan_class_step( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """A failing class method must not block the class step from cleaning up. + + Sibling methods in the same class must still parent to the same class + step, and a later class in the module must open as a sibling (not nested + under an orphan). + """ + pytester.makepyfile( + test_fail=dedent( + """ + class TestFoo: + def test_a(self): + raise AssertionError("boom") + + def test_b(self): + pass + + class TestBar: + def test_c(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2, failed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert len(by_name["TestFoo"]) == 1 + assert len(by_name["TestBar"]) == 1 + foo_id = by_name["TestFoo"][0]["id"] + bar_id = by_name["TestBar"][0]["id"] + mod_id = by_name["test_fail.py"][0]["id"] + assert by_name["test_a"][0]["parent_step_id"] == foo_id + assert by_name["test_b"][0]["parent_step_id"] == foo_id + assert by_name["test_c"][0]["parent_step_id"] == bar_id + # Both classes are siblings under the same module — TestBar didn't get + # nested under an orphan TestFoo. + assert by_name["TestFoo"][0]["parent_step_id"] == mod_id + assert by_name["TestBar"][0]["parent_step_id"] == mod_id + + +def test_failing_parametrized_method_in_class_closes_full_chain( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """A failing parametrized class method must not orphan its parametrize parents.""" + pytester.makepyfile( + test_pfail=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.parametrize("v", [1, 2]) + def test_a(self, v): + if v == 1: + raise AssertionError("boom") + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2, failed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + foo_id = by_name["TestFoo"][0]["id"] + test_a_id = by_name["test_a"][0]["id"] + # Both parametrize leaves parent to the same test_a; test_b parents + # directly to TestFoo (no parametrize parent leaked across methods). + assert by_name["v=1"][0]["parent_step_id"] == test_a_id + assert by_name["v=2"][0]["parent_step_id"] == test_a_id + assert by_name["test_b"][0]["parent_step_id"] == foo_id + + +# --------------------------------------------------------------------------- +# Opt-out flag tests +# --------------------------------------------------------------------------- + + +def _write_ini(pytester: pytest.Pytester, **overrides: object) -> None: + """Write a pytest.ini with the given sift_* overrides set under [pytest].""" + lines = ["[pytest]"] + for key, value in overrides.items(): + lines.append(f"{key} = {value}") + pytester.makefile(".ini", pytest="\n".join(lines) + "\n") + + +def test_sift_class_step_false_skips_class_steps( + pytester: pytest.Pytester, steps_file: Path +) -> None: + _write_ini(pytester, sift_class_step="false") + pytester.makepyfile( + test_noclass=dedent( + """ + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "TestFoo" not in by_name + mod_id = by_name["test_noclass.py"][0]["id"] + assert by_name["test_a"][0]["parent_step_id"] == mod_id + assert by_name["test_b"][0]["parent_step_id"] == mod_id + + +def test_sift_module_step_false_skips_module_step( + pytester: pytest.Pytester, steps_file: Path +) -> None: + _write_ini(pytester, sift_module_step="false") + pytester.makepyfile( + test_nomod=dedent( + """ + class TestFoo: + def test_a(self): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "test_nomod.py" not in by_name + # TestFoo attaches to the report root (no parent recorded by the fake). + assert by_name["TestFoo"][0]["parent_step_id"] is None + assert by_name["test_a"][0]["parent_step_id"] == by_name["TestFoo"][0]["id"] + + +def test_sift_parametrize_nesting_false_keeps_flat_leaves( + pytester: pytest.Pytester, steps_file: Path +) -> None: + _write_ini(pytester, sift_parametrize_nesting="false") + pytester.makepyfile( + test_flat=dedent( + """ + import pytest + + @pytest.mark.parametrize("v", [1, 2]) + def test_a(v): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # No parametrize parent step. + assert "test_a" not in by_name + assert "v=1" not in by_name + # Leaves use the bracket-mangled pytest names. + assert "test_a[1]" in by_name + assert "test_a[2]" in by_name + mod_id = by_name["test_flat.py"][0]["id"] + assert by_name["test_a[1]"][0]["parent_step_id"] == mod_id + assert by_name["test_a[2]"][0]["parent_step_id"] == mod_id + + +def test_sift_module_step_false_still_drains_across_modules( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """sift_module_step=false must not merge same-named classes across modules. + + The hierarchy chain always includes the module ancestor for identity + (even when it's not rendered as a step), so two modules each declaring + ``class TestFoo`` produce two distinct ``TestFoo`` frames in the diff. + """ + _write_ini(pytester, sift_module_step="false") + pytester.makepyfile( + test_a=dedent( + """ + class TestFoo: + def test_x(self): + pass + """ + ), + test_b=dedent( + """ + class TestFoo: + def test_y(self): + pass + """ + ), + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # Two distinct TestFoo class steps — one per module — not a shared frame. + assert len(by_name["TestFoo"]) == 2 + foo_ids = {s["id"] for s in by_name["TestFoo"]} + # Each test method parents to a different TestFoo id. + test_x_parent = by_name["test_x"][0]["parent_step_id"] + test_y_parent = by_name["test_y"][0]["parent_step_id"] + assert test_x_parent in foo_ids + assert test_y_parent in foo_ids + assert test_x_parent != test_y_parent + + +def test_package_step_default_opens_for_init_dirs( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """Default: a directory with ``__init__.py`` produces a parent package step.""" + pytester.mkpydir("pkg_a") + (pytester.path / "pkg_a" / "test_x.py").write_text( + dedent( + """ + def test_one(): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "pkg_a" in by_name + pkg_id = by_name["pkg_a"][0]["id"] + mod = by_name["test_x.py"][0] + assert mod["parent_step_id"] == pkg_id + + +def test_same_named_packages_in_different_dirs_do_not_merge( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """Two packages with the same display name but different paths must stay distinct. + + The hierarchy diff compares on ``nodeid`` (identity), not just the + display name — so a ``utils`` package under ``proj_a/`` and another + under ``proj_b/`` (where ``proj_a/`` and ``proj_b/`` are bare + directories that pytest treats as ``pytest.Dir`` nodes and the chain + walker skips) produce two distinct ``utils`` parent steps in the report + tree, not a silent merge. + """ + (pytester.path / "proj_a" / "utils").mkdir(parents=True) + (pytester.path / "proj_a" / "utils" / "__init__.py").touch() + (pytester.path / "proj_a" / "utils" / "test_x.py").write_text( + dedent( + """ + def test_one(): + pass + """ + ) + ) + (pytester.path / "proj_b" / "utils").mkdir(parents=True) + (pytester.path / "proj_b" / "utils" / "__init__.py").touch() + (pytester.path / "proj_b" / "utils" / "test_y.py").write_text( + dedent( + """ + def test_two(): + pass + """ + ) + ) + # ``importlib`` import mode is required so two packages with the same + # name on disk don't collide during sys.path-based import. + result = pytester.runpytest_subprocess("-v", "--import-mode=importlib") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # Two distinct ``utils`` package steps — one per project. + assert len(by_name["utils"]) == 2 + utils_ids = {s["id"] for s in by_name["utils"]} + # Each module step parents to a different ``utils`` instance. + parent_x = by_name["test_x.py"][0]["parent_step_id"] + parent_y = by_name["test_y.py"][0]["parent_step_id"] + assert parent_x in utils_ids + assert parent_y in utils_ids + assert parent_x != parent_y + + +def test_sift_package_step_false_skips_package_steps( + pytester: pytest.Pytester, steps_file: Path +) -> None: + """With ``sift_package_step=false`` the directory step is suppressed.""" + _write_ini(pytester, sift_package_step="false") + pytester.mkpydir("pkg_a") + (pytester.path / "pkg_a" / "test_x.py").write_text( + dedent( + """ + def test_one(): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert "pkg_a" not in by_name + # The module step still opens and is now the top-level frame. + assert by_name["test_x.py"][0]["parent_step_id"] is None + + +def test_all_three_flags_false_matches_legacy_behavior( + pytester: pytest.Pytester, steps_file: Path +) -> None: + _write_ini( + pytester, + sift_module_step="false", + sift_class_step="false", + sift_parametrize_nesting="false", + ) + pytester.makepyfile( + test_legacy=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.parametrize("v", [1, 2]) + def test_a(self, v): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # No module, class, or parametrize parents — just bracket-mangled leaves. + assert "test_legacy.py" not in by_name + assert "TestFoo" not in by_name + assert "test_a" not in by_name + assert "test_a[1]" in by_name + assert "test_a[2]" in by_name + assert by_name["test_a[1]"][0]["parent_step_id"] is None + assert by_name["test_a[2]"][0]["parent_step_id"] is None + + +# --------------------------------------------------------------------------- +# Parametrize nesting +# --------------------------------------------------------------------------- + + +def test_single_parametrize_clusters_under_originalname( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_rail=dedent( + """ + import pytest + + @pytest.mark.parametrize("v", [3.3, 5.0]) + def test_rail(v): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # Module step + one shared `test_rail` parent + two leaves. + assert len(by_name["test_rail.py"]) == 1 + assert len(by_name["test_rail"]) == 1 + assert len(by_name["v=3.3"]) == 1 + assert len(by_name["v=5.0"]) == 1 + test_rail_id = by_name["test_rail"][0]["id"] + assert by_name["v=3.3"][0]["parent_step_id"] == test_rail_id + assert by_name["v=5.0"][0]["parent_step_id"] == test_rail_id + + +def test_stacked_parametrize_nests_outer_to_inner( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_iso=dedent( + """ + import pytest + + @pytest.mark.parametrize("voltage", ["high", "low"]) + @pytest.mark.parametrize("component", ["motor", "ducer"]) + def test_iso(voltage, component): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=4) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # One `test_iso` parent, two `voltage='…'` parents, four `component='…'` leaves. + assert len(by_name["test_iso"]) == 1 + assert len(by_name["voltage='high'"]) == 1 + assert len(by_name["voltage='low'"]) == 1 + assert len(by_name["component='motor'"]) == 2 # one per voltage + assert len(by_name["component='ducer'"]) == 2 + test_iso_id = by_name["test_iso"][0]["id"] + vh_id = by_name["voltage='high'"][0]["parent_step_id"] + vl_id = by_name["voltage='low'"][0]["parent_step_id"] + assert vh_id == test_iso_id + assert vl_id == test_iso_id + # Each component leaf parents to one of the voltage parents. + voltage_ids = { + by_name["voltage='high'"][0]["id"], + by_name["voltage='low'"][0]["id"], + } + for leaf in by_name["component='motor'"] + by_name["component='ducer'"]: + assert leaf["parent_step_id"] in voltage_ids + + +def test_fixture_parametrization_participates(pytester: pytest.Pytester, steps_file: Path) -> None: + pytester.makepyfile( + test_widget=dedent( + """ + import pytest + + @pytest.fixture(params=["a", "b"]) + def widget(request): + return request.param + + def test_widget(widget): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=2) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + assert len(by_name["test_widget"]) == 1 + parent_id = by_name["test_widget"][0]["id"] + assert by_name["widget='a'"][0]["parent_step_id"] == parent_id + assert by_name["widget='b'"][0]["parent_step_id"] == parent_id + + +def test_module_boundary_isolates_parametrize_stack( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_a=dedent( + """ + import pytest + + @pytest.mark.parametrize("v", [1, 2]) + def test_one(v): + pass + """ + ), + test_b=dedent( + """ + import pytest + + @pytest.mark.parametrize("w", ["x", "y"]) + def test_two(w): + pass + """ + ), + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=4) + steps = json.loads(steps_file.read_text()) + by_name = _by_name(steps) + # Each module step contains its own `test_one`/`test_two` parametrize subtree. + mod_a = by_name["test_a.py"][0] + mod_b = by_name["test_b.py"][0] + assert by_name["test_one"][0]["parent_step_id"] == mod_a["id"] + assert by_name["test_two"][0]["parent_step_id"] == mod_b["id"] + + +def test_leaf_parent_chain_terminates_at_report( + pytester: pytest.Pytester, steps_file: Path +) -> None: + pytester.makepyfile( + test_chain=dedent( + """ + import pytest + + @pytest.mark.parametrize("a", [1]) + @pytest.mark.parametrize("b", ["x"]) + def test_chain(a, b): + pass + """ + ) + ) + result = pytester.runpytest_subprocess("-v") + result.assert_outcomes(passed=1) + steps = json.loads(steps_file.read_text()) + leaf = next(s for s in steps if s["name"].startswith("b=")) + chain = _ancestor_names(steps, leaf) + # leaf b=… → a=… → test_chain → test_chain.py (module step) → root + assert chain == ["b='x'", "a=1", "test_chain", "test_chain.py"] diff --git a/python/lib/sift_client/_tests/util/test_report_context.py b/python/lib/sift_client/_tests/util/test_report_context.py index f12247c7a..e92e57bb8 100644 --- a/python/lib/sift_client/_tests/util/test_report_context.py +++ b/python/lib/sift_client/_tests/util/test_report_context.py @@ -10,16 +10,15 @@ from __future__ import annotations -import logging import sys -from typing import TYPE_CHECKING +import warnings + +import pytest from sift_client import SiftClient, SiftConnectionConfig +from sift_client.errors import SiftWarning from sift_client.util.test_results import ReportContext -if TYPE_CHECKING: - import pytest - def _make_simulate_client() -> SiftClient: """Build a SiftClient flagged for in-process simulation. @@ -38,46 +37,50 @@ def _make_simulate_client() -> SiftClient: return client -def _make_context(command: list[str]) -> ReportContext: +def _make_context(command: list[str], *, timeout: float = 0.5) -> ReportContext: """Build a ReportContext whose replay subprocess is the provided command. - `log_file=True` triggers the temp-file path so `_open_import_proc` fires - on `__enter__`. The substitute argv is swapped in via the public-ish - `_build_replay_command` hook so the production Popen kwargs stay - exercised. + ``log_file=True`` triggers the temp-file path so ``_open_import_proc`` fires + on ``__enter__``. The substitute argv is swapped in via the public-ish + ``_build_replay_command`` hook so the production Popen kwargs stay + exercised. ``timeout`` overrides the worker grace window so tests don't + wait the full production timeout for the timeout branch to trigger. """ rc = ReportContext(_make_simulate_client(), name="test", log_file=True) rc._build_replay_command = lambda: command # type: ignore[method-assign] + rc._import_proc_timeout = timeout return rc -def test_worker_clean_exit_is_silent(caplog: pytest.LogCaptureFixture) -> None: - """Worker exits with code 0 → __exit__ is silent (case 1).""" +def test_worker_clean_exit_is_silent() -> None: + """Worker exits with code 0 → __exit__ emits no SiftWarning (case 1).""" rc = _make_context([sys.executable, "-c", "pass"]) - with caplog.at_level(logging.ERROR): + with warnings.catch_warnings(record=True) as recorded: + warnings.simplefilter("always") with rc: pass - assert "Import process" not in caplog.text - assert "replay-test-result-log" not in caplog.text + sift_warnings = [w for w in recorded if issubclass(w.category, SiftWarning)] + assert sift_warnings == [] assert rc._import_proc is not None assert rc._import_proc.returncode == 0 -def test_worker_timeout_kills_and_logs(caplog: pytest.LogCaptureFixture) -> None: - """Worker still running at session end → kill + log, no raise (case 2).""" - rc = _make_context([sys.executable, "-c", "import time; time.sleep(30)"]) - with caplog.at_level(logging.ERROR): +def test_worker_timeout_kills_and_warns() -> None: + """Worker still running at session end → kill + SiftWarning, no raise (case 2).""" + rc = _make_context([sys.executable, "-c", "import time; time.sleep(30)"], timeout=0.2) + with pytest.warns(SiftWarning) as recorded: with rc: pass assert rc._import_proc is not None # `kill()` + `wait()` were called; process is dead. assert rc._import_proc.poll() is not None - assert "did not exit in 1s" in caplog.text - assert "replay-test-result-log" in caplog.text + messages = "\n".join(str(w.message) for w in recorded) + assert "did not exit in 0.2s" in messages + assert "import-test-result-log" in messages -def test_worker_nonzero_exit_logs_stderr_no_raise(caplog: pytest.LogCaptureFixture) -> None: - """Worker exits non-zero with stderr → log stderr + replay hint, no raise (case 3).""" +def test_worker_nonzero_exit_warns_stderr_no_raise() -> None: + """Worker exits non-zero with stderr → SiftWarning with stderr + replay hint, no raise (case 3).""" rc = _make_context( [ sys.executable, @@ -85,11 +88,12 @@ def test_worker_nonzero_exit_logs_stderr_no_raise(caplog: pytest.LogCaptureFixtu "import sys; sys.stderr.write('rpc deadline exceeded'); sys.exit(2)", ] ) - with caplog.at_level(logging.ERROR): + with pytest.warns(SiftWarning) as recorded: with rc: pass assert rc._import_proc is not None assert rc._import_proc.returncode == 2 - assert "exited with code 2" in caplog.text - assert "rpc deadline exceeded" in caplog.text - assert "replay-test-result-log" in caplog.text + messages = "\n".join(str(w.message) for w in recorded) + assert "exited with code 2" in messages + assert "rpc deadline exceeded" in messages + assert "import-test-result-log" in messages diff --git a/python/lib/sift_client/_tests/util/test_test_results_utils.py b/python/lib/sift_client/_tests/util/test_test_results_utils.py index 256803769..4fd6ab112 100644 --- a/python/lib/sift_client/_tests/util/test_test_results_utils.py +++ b/python/lib/sift_client/_tests/util/test_test_results_utils.py @@ -385,6 +385,46 @@ def test_report_outcome(self, report_context, step): if not initial_any_failures: report_context.any_failures = False + def test_measurements_passed_property(self, report_context, step): + """``step.measurements_passed`` counts only direct ``measure*`` calls + on this step, and stays True when only a substep or ``report_outcome`` + records a failure. + """ + current_step_path = step.current_step.step_path + initial_open_step_result = report_context.open_step_results.get(current_step_path, True) + initial_any_failures = report_context.any_failures + + # No measurements yet, vacuously True. + assert step.measurements_passed is True + + # In-bounds measurement keeps it True. + step.measure(name="ok", value=1.0, bounds={"min": 0.0, "max": 2.0}) + assert step.measurements_passed is True + + # A failing report_outcome doesn't flip measurements_passed because + # it isn't a direct measure() call on this step. + step.report_outcome("substep-fail", False, "deliberately failing") + assert step.measurements_passed is True + + # Out-of-bounds measurement flips ``measurements_passed`` False. + step.measure(name="bad", value=99.0, bounds={"min": 0.0, "max": 2.0}) + assert step.measurements_passed is False + + # measure_avg / measure_all go through ``measure`` internally and + # also increment the counter on out-of-bounds values. + step.measure_avg( + name="bad_avg", + values=[50.0, 60.0, 70.0], # mean 60 is well outside [0, 2] + bounds={"min": 0.0, "max": 2.0}, + ) + assert step.measurements_passed is False + + # Restore state. + if initial_open_step_result: + report_context.open_step_results[current_step_path] = True + if not initial_any_failures: + report_context.any_failures = False + def test_bad_assert(self, report_context, step): # Capture current state of report context's failures so we can keep things passed at a high level if the test's induced failures happen as expected. current_step_path = step.current_step.step_path diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 494ded3b6..7c4c1c2f5 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -1,22 +1,212 @@ from __future__ import annotations import os +import warnings from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path -from typing import TYPE_CHECKING, Any, Generator +from typing import TYPE_CHECKING, Any, Generator, Tuple import pytest from sift_client import SiftClient, SiftConnectionConfig +from sift_client.errors import SiftWarning from sift_client.sift_types.test_report import TestStatus from sift_client.util.test_results import ReportContext + +class SiftPytestPluginWarning(SiftWarning): + """Base warning for issues raised by the Sift pytest plugin.""" + + +class SiftPytestStepDrainWarning(SiftPytestPluginWarning): + """A step's ``__exit__`` raised while the plugin was draining its stack. + + Surfaced at module-teardown or session-end so the drain can continue and + pytest test outcomes stay unaffected; the underlying exception is included + in the message for debugging. + """ + + +class SiftPytestStepDrainError(RuntimeError): + """Raised when mid-session drain fails — signals a likely upstream invariant break.""" + + if TYPE_CHECKING: from sift_client.util.test_results.context_manager import NewStep REPORT_CONTEXT: Any = None +_STASH_MISSING = object() + +_PARAMETRIZE_PATH_KEY = pytest.StashKey[Tuple[str, ...]]() +# Each frame: (path_key, open step). Frames are shared across sibling test items +# and drained at session end. +_PARAMETRIZE_STACK: list[tuple[str, Any]] = [] + +_HIERARCHY_KEY = pytest.StashKey[Tuple[Tuple[str, str, "str | None", bool], ...]]() +# Outer-to-inner frames for the item's collection-tree ancestors. Each chain +# entry is ``(identity, name, doc, rendered)``: +# - ``identity``: a globally-unique key (``node.nodeid``) used for diff +# comparison. Two ancestors at the same depth with the same display name +# but reached via different paths (e.g., ``proj_a/utils`` and +# ``proj_b/utils`` in a monorepo) get distinct identities, so they never +# silently merge in the diff. +# - ``name``: the human-readable step name used when ``rendered`` opens the +# Sift step. +# - ``doc``: docstring used for the step description if rendered. +# - ``rendered``: True iff the corresponding ``sift_*_step`` ini flag is on. +# Non-rendered frames participate in the diff but do not call +# ``rc.new_step(...)`` — they appear with ``ns=None`` in the stack. +# +# Stack entries: ``(identity, name, open_step_or_None)``. Frames are shared +# across sibling test items and drained at session end. Drained AFTER +# _PARAMETRIZE_STACK since parametrize parents nest inside hierarchy parents. +_HIERARCHY_STACK: list[tuple[str, str, Any]] = [] + + +def _drain_step_stack(stack: list, *, swallow_errors: bool = True) -> None: + """Pop and close every frame. + + With ``swallow_errors=True`` (default, used at teardown / session end), + per-frame failures are surfaced as ``SiftPytestStepDrainWarning`` so a + single misbehaving ``__exit__`` can't block the rest of the stack from + cleaning up or cascade out of pytest's finalizer chain. + + With ``swallow_errors=False`` (mid-session, when a class transition forces + parametrize parents to close), the stack is still fully drained but the + first per-frame exception is re-raised at the end as a + ``SiftPytestStepDrainError`` so a real upstream invariant violation + surfaces as a test error instead of a silenceable warning. + """ + errors: list[tuple[str, BaseException]] = [] + while stack: + entry = stack.pop() + # Tolerate either ``(name, ns)`` (parametrize stack) or + # ``(identity, name, ns)`` (hierarchy stack) entries. + name, ns = entry[-2], entry[-1] + if ns is None: + # Non-rendered diff-only frame (e.g. a Package frame when + # ``sift_package_step=false``); nothing to close. + continue + try: + ns.__exit__(None, None, None) + except Exception as exc: + if swallow_errors: + warnings.warn( + f"Sift plugin: closing step {name!r} during drain raised " + f"{type(exc).__name__}: {exc}", + SiftPytestStepDrainWarning, + stacklevel=2, + ) + else: + errors.append((name, exc)) + if errors: + first_name, first_exc = errors[0] + raise SiftPytestStepDrainError( + f"Sift plugin: {len(errors)} step(s) raised while draining mid-session; " + f"first failure on {first_name!r}: {type(first_exc).__name__}: {first_exc}" + ) from first_exc + + +def _drain_parametrize_stack(*, swallow_errors: bool = True) -> None: + _drain_step_stack(_PARAMETRIZE_STACK, swallow_errors=swallow_errors) + + +def _drain_hierarchy_stack(*, swallow_errors: bool = True) -> None: + _drain_step_stack(_HIERARCHY_STACK, swallow_errors=swallow_errors) + + +def _close_frame(name: str, ns: Any) -> None: + """Close a single frame, warning on per-frame failure. + + Used by the mid-session hierarchy-stack pop and the rollback paths so a + misbehaving ``__exit__`` neither shadows the original exception nor leaks + sibling frames. ``ns=None`` indicates a non-rendered diff-only frame; skip. + """ + if ns is None: + return + try: + ns.__exit__(None, None, None) + except Exception as exc: + warnings.warn( + f"Sift plugin: closing step {name!r} raised {type(exc).__name__}: {exc}", + SiftPytestStepDrainWarning, + stacklevel=2, + ) + + +def _build_parametrize_path(item: pytest.Item) -> tuple[str, ...]: + """Outer-to-inner step display names for a parametrized item. + + Pytest stores ``callspec.params`` with the BOTTOM decorator's axis first; + the Sift step tree treats the TOP decorator as outermost, so we reverse. + """ + callspec = getattr(item, "callspec", None) + if callspec is None or not callspec.params: + return () + originalname = getattr(item, "originalname", item.name) + frames: list[str] = [originalname] + for name, value in reversed(callspec.params.items()): + frames.append(f"{name}={value!r}") + return tuple(frames) + + +def _build_hierarchy_chain( + item: pytest.Item | pytest.Collector, + config: pytest.Config, +) -> tuple[tuple[str, str, str | None, bool], ...]: + """Outer-to-inner ``(identity, name, docstring, rendered)`` for collection ancestors. + + Walks ``item.parent`` upward and ALWAYS collects every ``pytest.Package``, + ``pytest.Module``, and ``pytest.Class`` ancestor — they all participate in + the diff that keeps the report tree coherent across tests, so two + same-named ancestors reached via different paths (e.g., ``proj_a/utils`` + and ``proj_b/utils`` in a monorepo where the ``proj_*`` dirs are + ``pytest.Dir`` nodes the walker skips) cannot silently merge. + + The ``identity`` field is ``node.nodeid`` — globally unique per collected + node. The diff compares on identity, not the display ``name``. + + The ``rendered`` flag is True iff the layer's ini flag is on + (``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``). + Non-rendered frames participate in the diff for identity but don't open a + Sift step. + + The ``node.obj`` access is a pytest property that imports the underlying + Python object and can raise *any* exception (ImportError, custom + metaclass errors, descriptor ``__doc__`` properties that throw). Guard + broadly so a misbehaving collector doesn't abort the whole collection + phase — that frame's docstring just becomes ``None``. + """ + include_package = bool(_option_or_ini(config, _PACKAGE_STEP)) + include_module = bool(_option_or_ini(config, _MODULE_STEP)) + include_class = bool(_option_or_ini(config, _CLASS_STEP)) + + chain: list[tuple[str, str, str | None, bool]] = [] + # ``node.parent`` is typed as the internal ``_pytest.nodes.Node`` which + # isn't part of pytest's public API; widen to ``Any`` for the walk. + node: Any = item + while node is not None: + if isinstance(node, pytest.Class): + rendered = include_class + elif isinstance(node, pytest.Module): + rendered = include_module + elif isinstance(node, pytest.Package): + rendered = include_package + else: + node = node.parent + continue + try: + doc = ( + (getattr(node, "obj", None) and getattr(node.obj, "__doc__", None)) or "" + ).strip() or None + except Exception: + doc = None + chain.append((node.nodeid, node.name, doc, rendered)) + node = node.parent + return tuple(reversed(chain)) + @dataclass(frozen=True) class _Option: @@ -105,10 +295,45 @@ class _Option: _AUTOUSE = _Option( ini_name="sift_autouse", ini_help="Default for the Sift autouse fixtures (report_context, step, " - "module_substep). When true (default), tests are included unless marked " - "with @pytest.mark.sift_exclude. When false, tests are skipped unless " - "marked with @pytest.mark.sift_include. Bulk-apply markers in a " - "directory's conftest via `pytest_collection_modifyitems`.", + "_hierarchy_parents, _parametrize_parents). When true (default), tests " + "are included unless marked with @pytest.mark.sift_exclude. When false, " + "tests are skipped unless marked with @pytest.mark.sift_include. " + "Bulk-apply markers in a directory's conftest via " + "`pytest_collection_modifyitems`.", + ini_type="bool", + ini_default=True, +) + +_PACKAGE_STEP = _Option( + ini_name="sift_package_step", + ini_help="When true (default), open a parent step for each Python package " + "(directory with an ``__init__.py``) in the test path. Set to false to " + "flatten package grouping.", + ini_type="bool", + ini_default=True, +) + +_MODULE_STEP = _Option( + ini_name="sift_module_step", + ini_help="When true (default), open a per-module parent step. Set to false " + "to skip module-level grouping in the report tree.", + ini_type="bool", + ini_default=True, +) + +_CLASS_STEP = _Option( + ini_name="sift_class_step", + ini_help="When true (default), open per-class parent steps (including nested " + "classes). Set to false to keep class methods at module level.", + ini_type="bool", + ini_default=True, +) + +_PARAMETRIZE_NESTING = _Option( + ini_name="sift_parametrize_nesting", + ini_help="When true (default), parametrized tests nest under shared parent " + "steps (e.g. test_a -> v=1, v=2). Set to false to keep the flat per-test " + "leaf naming (test_a[1], test_a[2]).", ini_type="bool", ini_default=True, ) @@ -121,6 +346,10 @@ class _Option: _GRPC_URI, _REST_URI, _AUTOUSE, + _PACKAGE_STEP, + _MODULE_STEP, + _CLASS_STEP, + _PARAMETRIZE_NESTING, ) @@ -163,6 +392,44 @@ def pytest_configure(config: pytest.Config) -> None: ) +def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: + """Stash each item's class chain + parametrize path and cluster siblings. + + Sorts by ``(file_path, hierarchy_chain, parametrize_path)`` so sibling + items under a shared parent (package, module, class, or parametrize axis) + stay contiguous — otherwise a free function sorting between two class + methods would tear down + re-open the class step, producing duplicate + parents in the report tree. + """ + for item in items: + item.stash[_HIERARCHY_KEY] = _build_hierarchy_chain(item, config) + item.stash[_PARAMETRIZE_PATH_KEY] = _build_parametrize_path(item) + # Use ``.get(...)`` defensively: a third-party hook may inject items after + # our stashing loop runs, and we'd rather sort them at the tail than + # KeyError out of collection. + items.sort( + key=lambda i: ( + str(i.path), + tuple(identity for identity, _, _, _ in i.stash.get(_HIERARCHY_KEY, ())), + i.stash.get(_PARAMETRIZE_PATH_KEY, ()), + ) + ) + + +def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: + """Drain any parent steps still open at session end (innermost first). + + Wrapped so a failure in the inner drain does not prevent the outer one + from running. With ``module_substep`` removed, this is the sole place + where hierarchy parents close — they persist across all tests and only + drain when the session ends. + """ + try: + _drain_parametrize_stack() + finally: + _drain_hierarchy_stack() + + def _is_offline(pytestconfig: pytest.Config | None) -> bool: return bool(_option_or_ini(pytestconfig, _OFFLINE)) @@ -186,22 +453,6 @@ def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bo return default -def _module_has_included_tests(request: pytest.FixtureRequest, default: bool) -> bool: - """True when at least one test in `request`'s module is gated on. - - Used by the module-scoped `module_substep` fixture to decide whether to - activate without triggering `report_context` creation for modules where - every test is excluded. - """ - module_path = request.path - for item in request.session.items: - if item.path != module_path: - continue - if _sift_enabled_for(item, default): - return True - return False - - def _option_or_ini(pytestconfig: pytest.Config | None, opt: _Option) -> Any: """Resolve a Sift plugin setting from CLI > ini > None. @@ -302,7 +553,19 @@ def _report_context_impl( ) as context: global REPORT_CONTEXT REPORT_CONTEXT = context - yield context + try: + yield context + finally: + # Drain the hierarchy + parametrize stacks INSIDE the + # ReportContext's ``with`` block, so the final ``__exit__`` + # update calls for those parent steps are written to the log + # file BEFORE the import worker drains. Without this, the + # worker exits with a partial backlog and the parent steps + # are stuck IN_PROGRESS in the Sift report. + try: + _drain_parametrize_stack() + finally: + _drain_hierarchy_stack() _CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = ( @@ -411,9 +674,10 @@ def report_context( The fixture is no longer autouse; it's instantiated on the first call to ``request.getfixturevalue("report_context")``, which today happens - inside the gated ``step`` and ``module_substep`` fixtures. If every - test in the session is excluded via the marker gate, this fixture is - never resolved and no ReportContext (or teardown subprocess) is created. + inside the gated ``step``, ``_hierarchy_parents``, and + ``_parametrize_parents`` fixtures. If every test in the session is + excluded via the marker gate, this fixture is never resolved and no + ReportContext (or teardown subprocess) is created. What gets yielded depends on the mode: @@ -460,24 +724,183 @@ def report_context( def _step_impl( report_context: ReportContext, request: pytest.FixtureRequest ) -> Generator[NewStep, None, None]: - name = str(request.node.name) - existing_docstring = request.node.obj.__doc__ or None + node = request.node + # Items get a parametrize path stashed in ``pytest_collection_modifyitems``; + # modules/other nodes fall back to their node name. The leaf frame + # (``path[-1]``) is the test-specific display name — parents are opened + # by ``_parametrize_parents``. When parametrize-nesting is disabled, fall + # back to the bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf + # remains uniquely identifiable. + if _option_or_ini(request.config, _PARAMETRIZE_NESTING): + path = node.stash.get(_PARAMETRIZE_PATH_KEY, ()) + name = path[-1] if path else str(node.name) + else: + name = str(node.name) + # ``node.obj`` may not exist (e.g., ``pytest.DoctestItem``) or may raise + # when accessed — fall back to no description in those cases rather than + # erroring out a perfectly valid test. ``getattr``'s default only + # suppresses ``AttributeError``; the try/except catches everything else + # (RuntimeError from a misbehaving ``__doc__`` descriptor, etc.). + try: + existing_docstring = getattr(getattr(node, "obj", None), "__doc__", None) or None + except Exception: + existing_docstring = None with report_context.new_step( name=name, description=existing_docstring, assertion_as_fail_not_error=False ) as new_step: yield new_step - if hasattr(request.node, "rep_call") and request.node.rep_call.excinfo: + if hasattr(node, "rep_call") and node.rep_call.excinfo: new_step.update_step_from_result( - request.node.rep_call.excinfo, - request.node.rep_call.excinfo.value, - request.node.rep_call.excinfo.tb, + node.rep_call.excinfo, + node.rep_call.excinfo.value, + node.rep_call.excinfo.tb, ) +@pytest.fixture(autouse=True) +def _hierarchy_parents( + request: pytest.FixtureRequest, + pytestconfig: pytest.Config, +) -> None: + """Open/close hierarchy parent steps (packages, modules, classes) for the current item. + + Same diff-stack pattern as ``_parametrize_parents`` but operates on + ``_HIERARCHY_KEY``. The chain is built outer-to-inner from the item's + collection-tree ancestors; which node types are included is decided at + build time by ``sift_package_step`` / ``sift_module_step`` / + ``sift_class_step``. When the chain changes (pop or push), the parametrize + stack is drained first since parametrize parents nest INSIDE these. + + Gated off when the item is excluded (avoids eager ``report_context`` setup). + """ + default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + if not _sift_enabled_for(request.node, default): + return None + # Fall back to computing the chain on-demand for items that bypassed + # ``pytest_collection_modifyitems`` (e.g., dynamically inserted by another + # plugin's later hook). Defaulting to ``()`` would incorrectly drain the + # entire open hierarchy stack for those items. + desired = request.node.stash.get(_HIERARCHY_KEY, _STASH_MISSING) + if desired is _STASH_MISSING: + desired = _build_hierarchy_chain(request.node, pytestconfig) + common = 0 + # Compare on identity (nodeid) — same-named ancestors at different paths + # MUST stay distinct. + while ( + common < len(_HIERARCHY_STACK) + and common < len(desired) + and _HIERARCHY_STACK[common][0] == desired[common][0] + ): + common += 1 + # Any change to the hierarchy chain orphans parametrize parents from the + # previous test — drain them before mutating the hierarchy stack so + # ReportContext's top-of-stack invariant holds. Strict mode: a per-frame + # ``__exit__`` failure here signals a real upstream drift between the + # plugin stacks and ReportContext; raise it as a test error instead of a + # silenceable warning. + if common < len(_HIERARCHY_STACK) or common < len(desired): + _drain_parametrize_stack(swallow_errors=False) + # Symmetric per-frame guard for the hierarchy pop so one bad ``__exit__`` + # doesn't leave _HIERARCHY_STACK partially drained for every subsequent test. + while len(_HIERARCHY_STACK) > common: + _identity, name, ns = _HIERARCHY_STACK.pop() + _close_frame(name, ns) + if not desired[common:]: + return None + # Fetch ``report_context`` lazily — but only when there's at least one + # rendered frame to push. Pure diff-only frames (e.g. a Package frame when + # ``sift_package_step=false``) just update _HIERARCHY_STACK with ns=None. + rc = None + # Roll back any partial push so a mid-loop exception doesn't leave half + # the chain orphaned on the stack. Per-frame guard inside the rollback so + # a failing ``__exit__`` doesn't shadow the original exception or leak + # the remaining opened frames. + opened: list[tuple[str, str, Any]] = [] + try: + for identity, name, doc, rendered in desired[common:]: + if rendered: + if rc is None: + rc = request.getfixturevalue("report_context") + ns = rc.new_step(name=name, description=doc, assertion_as_fail_not_error=False) + ns.__enter__() + opened.append((identity, name, ns)) + else: + opened.append((identity, name, None)) + except BaseException: + while opened: + _identity, name, ns = opened.pop() + _close_frame(name, ns) + raise + _HIERARCHY_STACK.extend(opened) + return None + + +@pytest.fixture(autouse=True) +def _parametrize_parents( + request: pytest.FixtureRequest, + pytestconfig: pytest.Config, + _hierarchy_parents: None, +) -> None: + """Open/close shared parametrize parent steps for the current item. + + Diffs the item's desired parametrize path against the open stack: pops the + stale tail, then opens new parents (everything except the innermost frame — + the ``step`` fixture creates that as the leaf). Parents persist across + sibling items so a tree like ``test_x[a=1]`` / ``test_x[a=2]`` shares one + ``test_x`` container. + + Gated off when the current item is excluded so that excluded items don't + eagerly request ``report_context`` (which would defeat its lazy creation), + or when ``sift_parametrize_nesting=false``. Parents persist until the + diff against a subsequent test's chain pops them, or until + ``pytest_sessionfinish`` drains anything left at session end. + """ + default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + if not _sift_enabled_for(request.node, default): + return None + if not _option_or_ini(pytestconfig, _PARAMETRIZE_NESTING): + return None + # Fall back to on-demand computation for dynamically-inserted items; + # see _hierarchy_parents for the same rationale. + desired = request.node.stash.get(_PARAMETRIZE_PATH_KEY, _STASH_MISSING) + if desired is _STASH_MISSING: + desired = _build_parametrize_path(request.node) + parents = desired[:-1] + common = 0 + while ( + common < len(_PARAMETRIZE_STACK) + and common < len(parents) + and _PARAMETRIZE_STACK[common][0] == parents[common] + ): + common += 1 + # Per-frame guard so one bad ``__exit__`` doesn't leave _PARAMETRIZE_STACK + # partially drained for every subsequent test. + while len(_PARAMETRIZE_STACK) > common: + name, ns = _PARAMETRIZE_STACK.pop() + _close_frame(name, ns) + if not parents[common:]: + return None + rc = request.getfixturevalue("report_context") + opened: list[tuple[str, Any]] = [] + try: + for display in parents[common:]: + ns = rc.new_step(name=display, assertion_as_fail_not_error=False) + ns.__enter__() + opened.append((display, ns)) + except BaseException: + while opened: + name, ns = opened.pop() + _close_frame(name, ns) + raise + _PARAMETRIZE_STACK.extend(opened) + return None + + @pytest.fixture(autouse=True) def step( request: pytest.FixtureRequest, pytestconfig: pytest.Config, + _parametrize_parents: None, ) -> Generator[NewStep | None, None, None]: """Create an outer step for the function when the Sift gate is on. @@ -498,27 +921,6 @@ def step( yield from _step_impl(rc, request) -@pytest.fixture(scope="module", autouse=True) -def module_substep( - request: pytest.FixtureRequest, - pytestconfig: pytest.Config, -) -> Generator[NewStep | None, None, None]: - """Create a per-module step when at least one test in the module is gated on. - - Inspects the module's collected items rather than gating on a single marker, - so a module with mixed inclusion/exclusion still produces the module-level - step (individual `step` fixtures then decide per-test). When every test in - the module is excluded, the substep is skipped without requesting - `report_context`. - """ - default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) - if not _module_has_included_tests(request, default): - yield None - return - rc = request.getfixturevalue("report_context") - yield from _step_impl(rc, request) - - @pytest.fixture(scope="session") def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRequest) -> bool: """Verify the ``SiftClient`` can reach Sift via ``/ping``. diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py index ddce0326c..a3ac081bc 100644 --- a/python/lib/sift_client/util/test_results/__init__.py +++ b/python/lib/sift_client/util/test_results/__init__.py @@ -61,10 +61,13 @@ def main(self): By default, every test in the session produces a Sift report: one `TestReport` per session, one step per test function (`step`), and one -parent step per test file (`module_substep`). The plugin also registers a -default `sift_client` fixture that reads `SIFT_API_KEY`, `SIFT_GRPC_URI`, -and `SIFT_REST_URI` from the environment. Override it by defining your own -`sift_client` fixture in your conftest. +parent step per Python package (directory with `__init__.py`), test file, +and test class +above it. Individual layers can be flattened via the `sift_package_step`, +`sift_module_step`, `sift_class_step`, and `sift_parametrize_nesting` ini +flags. The plugin also registers a default `sift_client` fixture that reads +`SIFT_API_KEY`, `SIFT_GRPC_URI`, and `SIFT_REST_URI` from the environment. +Override it by defining your own `sift_client` fixture in your conftest. Note: FedRAMP users: results are buffered to a temp file and uploaded by a subprocess at session end (no API calls during the run). Disable the buffer diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 3d375814a..bd2ec917f 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -7,6 +7,7 @@ import subprocess import tempfile import traceback +import warnings from contextlib import AbstractContextManager, contextmanager from datetime import datetime, timezone from pathlib import Path @@ -14,6 +15,7 @@ import numpy as np +from sift_client.errors import SiftWarning from sift_client.sift_types.test_report import ( ErrorInfo, NumericBounds, @@ -42,15 +44,19 @@ def log_replay_instructions(log_file: str | Path | None) -> None: - """Log instructions for manually replaying a test result log file. + """Surface replay instructions when an import/replay attempt fails. - Used when an import/replay attempt fails so the user can retry against the same file. + Emitted as a ``SiftWarning`` (not a logger.error) so pytest and other + runners surface it in their warning summary; logger.error is suppressed + by default in most CLI tools. """ if log_file is None: return - logger.error( - f"Error replaying log file: {log_file}.\n" - f" Can replay with `replay-test-result-log {log_file}`." + warnings.warn( + f"Sift log file was not fully replayed: {log_file}. " + f"Re-run with `import-test-result-log {log_file}` to complete the upload.", + SiftWarning, + stacklevel=2, ) @@ -110,6 +116,11 @@ class ReportContext(AbstractContextManager): open_step_results: dict[str, bool] any_failures: bool _import_proc: subprocess.Popen | None = None + # Seconds to wait for the import worker subprocess to finish uploading + # the JSONL backlog at session end before killing it. Tests substitute + # a smaller value (via ``_make_context`` patching) so they don't wait + # the full window for the timeout branch to trigger. + _import_proc_timeout: float = 30.0 def __init__( self, @@ -227,31 +238,40 @@ def __exit__(self, exc_type, exc_value, traceback): # them fail the session — tests already ran and their outcome # is independent of delivery. The local log file is the source # of recovery for both failure modes via - # `replay-test-result-log `: + # `import-test-result-log `: # 1. Exits cleanly (returncode 0). Silent. - # 2. Still running after the 1s grace window (TimeoutExpired). + # 2. Still running after the grace window (TimeoutExpired). # Healthy worker with a large backlog; kill and surface - # replay instructions. + # replay instructions. 30 seconds is enough for a normal + # test suite to drain; pathological backlogs should opt + # into inline mode (`--sift-log-file=false`) instead. # 3. Exited with non-zero. Connection failures and API call # errors land here — the worker's replay loop has no retry, - # so the first failed RPC crashes the subprocess. Log the - # captured stderr at ERROR with replay instructions. + # so the first failed RPC crashes the subprocess. Surface + # the captured stderr with replay instructions. try: - _, stderr_bytes = self._import_proc.communicate(timeout=1) + _, stderr_bytes = self._import_proc.communicate(timeout=self._import_proc_timeout) except subprocess.TimeoutExpired: - logger.error("Import process did not exit in 1s, killing it") self._import_proc.kill() self._import_proc.wait() + warnings.warn( + f"Sift import worker did not exit in " + f"{self._import_proc_timeout}s; killing it. " + "Local log file is preserved for manual replay.", + SiftWarning, + stacklevel=2, + ) log_replay_instructions(self.log_file) return True # Ensures the session is marked as passed in pytest if self._import_proc.returncode != 0: stderr_text = ( stderr_bytes.decode("utf-8", errors="replace").strip() if stderr_bytes else "" ) - logger.error( - "Import process exited with code %d. stderr: %s", - self._import_proc.returncode, - stderr_text or "", + warnings.warn( + f"Sift import worker exited with code " + f"{self._import_proc.returncode}. stderr: {stderr_text or ''}", + SiftWarning, + stacklevel=2, ) log_replay_instructions(self.log_file) @@ -409,6 +429,11 @@ def __init__( self.client = report_context.client self.current_step = self.report_context.create_step(name, description, metadata=metadata) self.assertion_as_fail_not_error = assertion_as_fail_not_error + # Per-step measurement-failure count for ``measurements_passed``. + # Tracks only direct ``measure*`` calls on this NewStep instance; + # substep / ``report_outcome`` failures are intentionally not folded + # in here (see ``measurements_passed`` vs ``passed``). + self._failed_measurement_count = 0 def __enter__(self): """Enter the context manager to create a new step. @@ -417,6 +442,20 @@ def __enter__(self): """ return self + @property + def measurements_passed(self) -> bool: + """True if every measurement recorded directly on this step has passed. + + Counts only ``step.measure``, ``step.measure_avg``, and + ``step.measure_all`` calls on this ``NewStep`` instance. Useful for + the ``assert step.measurements_passed`` pattern at the end of a test + when you want to fail pytest on any out-of-bounds measurement + without short-circuiting on the first failure (asserting on + individual ``measure(...)`` return values skips every measurement + after the failing one). + """ + return self._failed_measurement_count == 0 + def update_step_from_result( self, exc: type[Exception] | None, @@ -529,6 +568,8 @@ def measure( create, log_file=self.report_context.log_file ) self.report_context.record_step_outcome(measurement.passed, self.current_step) + if not measurement.passed: + self._failed_measurement_count += 1 return measurement.passed diff --git a/python/mkdocs.yml b/python/mkdocs.yml index 90bfd10ed..5108b7e4a 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -59,7 +59,9 @@ nav: - examples/index.md - Basic Usage: examples/basic.ipynb - Data Ingestion: examples/ingestion.ipynb + # Will migrate to Guides in the future - Pytest Plugin: examples/pytest_plugin.md + - Pytest Plugin Quickstart: examples/pytest_plugin_quickstart.md # - Guides: # - Logging # - Error Handling diff --git a/python/pyproject.toml b/python/pyproject.toml index a2cd6a410..0bb07e84a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -26,7 +26,11 @@ keywords = ["sift", "sift-stack", "siftstack", "sift_py"] dependencies = [ "grpcio~=1.13", "PyYAML~=6.0", - "rapidyaml~=0.11", + # TODO: rapidyaml 0.13.0 ships C++ source that fails to compile against + # the GCC version on current GitHub Actions runners (csubstr operator= + # and SFINAE errors in the bundled c4core). Cap below 0.13 until either + # rapidyaml ships fixed sdists or we move to binary wheels. + "rapidyaml>=0.11,<0.13", "pandas>=2.0,<3.1", "protobuf>=5.0", "pydantic~=2.10", From d3a444bce127d75e813fd883e1b979d16a57519a Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 11:14:50 -0700 Subject: [PATCH 05/19] Python(feat): pytest pass fail behavior improvements (#568) --- python/docs/examples/pytest_plugin.md | 4 + .../pytest_plugin/pass_fail_behavior.md | 164 +++++ .../_tests/pytest_plugin/_fakes.py | 132 ---- .../pytest_plugin/_step_status_capture.py | 139 +++++ .../pytest_plugin/step_status_states.md | 105 ++++ .../_tests/pytest_plugin/test_hierarchy.py | 179 +++--- .../_tests/pytest_plugin/test_pass_fail.py | 562 ++++++++++++++++++ python/lib/sift_client/pytest_plugin.py | 169 +++++- .../util/test_results/context_manager.py | 145 +++-- python/mkdocs.yml | 3 + 10 files changed, 1313 insertions(+), 289 deletions(-) create mode 100644 python/docs/guides/pytest_plugin/pass_fail_behavior.md delete mode 100644 python/lib/sift_client/_tests/pytest_plugin/_fakes.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/step_status_states.md create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index c464e564e..5a40d450d 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -306,6 +306,10 @@ outcomes into `TestStatus`: | Non-`AssertionError` exception escapes the test (e.g. `ValueError`, `TimeoutError`) | `ERROR`, with the formatted traceback (last 10 frames plus the first frame) on `step.error_info.error_message` | | Manual `step.current_step.update({"status": ...})` | Whatever you set; the step exit handler honors a manually-resolved status | +For the full contract, including skips, xfail/xpass, hard exits (`SystemExit`, +`KeyboardInterrupt`), setup/teardown phase failures, and propagation rules, +see the [Pass/Fail Behavior guide](../guides/pytest_plugin/pass_fail_behavior.md). + A failure or error at any depth propagates upward: the parent substep, the function step, the class/module/package steps above it, and the session report all get marked failed. diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md new file mode 100644 index 000000000..6e9b1d6e3 --- /dev/null +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -0,0 +1,164 @@ +# Pass/Fail Behavior + +The pytest plugin maps every pytest outcome to a `TestStatus` on the +corresponding Sift step. Use this page to look up what a given test will +produce, and how that result rolls up to the parent steps and the report. + +## `TestStatus` values + +The statuses below come from `sift_client.sift_types.test_report.TestStatus`. + +| Status | Meaning | +| ------------- |------------------------------------------------------------------------------------------------------------------------| +| `PASSED` | The step completed and every check it owns succeeded. | +| `FAILED` | An assertion, a `pytest.fail(...)`, a failed `report_outcome`, or a failing measurement marked it. | +| `ERROR` | An unexpected exception escaped the test body or a fixture (setup or teardown). | +| `ABORTED` | A hard exit (`SystemExit`, observed `KeyboardInterrupt`) interrupted the test. | +| `SKIPPED` | The test was skipped at collection time, at runtime, or from a fixture. | +| `IN_PROGRESS` | Test in progress or the plugin never observed a final outcome (e.g. a session-aborting interrupt killed pytest first). | + +## Normal test outcomes + +| Scenario | Trigger | Outcome | +| ----------------------------------------- | ------------------------------------ | -------- | +| Test passes | function body returns cleanly | `PASSED` | +| Assertion failure | `assert 1 == 2` | `FAILED` | +| `pytest.fail("...")` from the body | `pytest.fail("intentional failure")` | `FAILED` | +| Uncaught non-assertion exception | `raise ValueError("boom")` | `ERROR` | + +A non-assertion exception gets its formatted traceback recorded on +`step.error_info.error_message`. + +## Hard exits + +Hard exits the plugin can observe map to `ABORTED`. If pytest tears the +session down before the plugin sees the exit, the step stays at +`IN_PROGRESS` instead of resolving. + +| Scenario | Trigger | Outcome | +| ---------------------------------------------- | ------------------------- | -------------------------------------------------------------------- | +| `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | +| `KeyboardInterrupt` the plugin observes | `raise KeyboardInterrupt` | `ABORTED` | +| Session-aborting `KeyboardInterrupt` | Ctrl-C terminates pytest | `IN_PROGRESS` (session ends before the plugin's hooks fire) | + +### Abort propagation through nested substeps + +Every step that was open when the abort fired records +`ABORTED`. + +```python title="test_abort.py" +import sys + + +def test_x(step): + with step.substep(name="completed_sub"): + pass # closes as PASSED before the abort + with step.substep(name="outer_sub") as outer_sub: + with outer_sub.substep(name="inner_sub"): + sys.exit(1) # ABORTED applied to inner_sub, outer_sub, and the test step +``` + +The Sift report shows `completed_sub` as `PASSED` and the three steps +still open at the abort (`inner_sub`, `outer_sub`, and the test step +itself) as `ABORTED`. + +## Skips + +| Scenario | Trigger | Outcome | +| ------------------------------------- | --------------------------------------------- | --------- | +| Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` | +| Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` | +| Runtime skip from the test body | `pytest.skip("...")` | `SKIPPED` | +| Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | `SKIPPED` | + +`SKIPPED` does not propagate as a failure. A skipped substep or test does +not block its parent from resolving to `PASSED`. + +## Expected failures (xfail / xpass) + +xfail marks declare that a test is expected to fail. The plugin follows +the same semantics pytest does. + +| Scenario | Trigger | Outcome | +| ----------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------- | +| xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | `PASSED` (the test fulfilled the xfail expectation) | +| Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | `FAILED` (the mark no longer matches reality) | +| Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | `PASSED` (`strict=False` does not insist on the failure) | +| `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` (the `raises=` mismatch is a real test failure) | +| `xfail(run=False)` | `@pytest.mark.xfail(run=False)` | `SKIPPED` (the body never ran) | + +## Influencing outcomes from test code + +A test can also set the step's outcome directly via the helpers below. +Substeps your test opens follow the same propagation rules as the ones +the plugin opens for you. + +### Manual status override + +`step.current_step.update({...})` sets the status directly. The step's +exit handler does not overwrite it. + +```python +from sift_client.sift_types.test_report import TestStatus + + +def test_manual(step): + step.current_step.update({"status": TestStatus.FAILED}) +``` + +### `report_outcome` for externally computed checks + +`report_outcome(name, result, reason)` records a named check whose +pass/fail was computed elsewhere (a subprocess, a remote system, your own +comparison logic). A failing outcome marks the step `FAILED`. + +```python +def test_external_check(step): + result, reason = run_external_validator() + step.report_outcome("ext-validator", result, reason) +``` + +### Measurements with bounds + +`step.measure(name=, value=, bounds=)` records a measurement and resolves +the step to `FAILED` if the value is out of bounds. The call returns the +pass/fail boolean and does not raise, so multiple measurements can run +without short-circuiting. + +```python +def test_battery(step): + step.measure(name="voltage", value=12.1, bounds={"min": 11.5, "max": 13.0}, unit="V") + step.measure(name="current", value=0.42, bounds={"max": 1.0}, unit="A") +``` + +### Substep failures + +A failed substep propagates failure to its parent step. A manually-set +`SKIPPED` on a substep does not. + +```python +def test_with_substep(step): + with step.substep(name="check") as inner: + inner.measure(name="value", value=99.0, bounds={"min": 0.0, "max": 5.0}) + # The outer step resolves to FAILED because the substep failed. +``` + +## Propagation rules + +Every non-`PASSED`/`SKIPPED` step marks its parent as failed. What the +parent records depends on whether its own scope had an abort and whether +a child already failed: + +- A hard exit (`SystemExit` or an observed `KeyboardInterrupt`) in the + step's own scope records `ABORTED`. `ABORTED` propagates through every + step the abort passes through on its way up. +- A child that already recorded a non-`PASSED`/`SKIPPED` outcome marks + the parent as `FAILED`. This holds whether or not an exception is still + propagating through the parent's scope: only the originating substep + records `ERROR`; ancestors inherit `FAILED`. The traceback stays on + the originating step's `error_info`. +- A step records `ERROR` only when its own scope raised a non-Assertion + exception AND no child has failed. + +`SKIPPED` does not propagate. A status set explicitly via +`current_step.update` is kept. diff --git a/python/lib/sift_client/_tests/pytest_plugin/_fakes.py b/python/lib/sift_client/_tests/pytest_plugin/_fakes.py deleted file mode 100644 index 460100daa..000000000 --- a/python/lib/sift_client/_tests/pytest_plugin/_fakes.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Test doubles for the pytester-driven pytest-plugin tests. - -The fake ``ReportContext`` is a drop-in for the real one that records every -step creation to a JSON file at session exit. Used by ``test_parametrize.py`` -to assert the step tree produced by an inner pytester pytest run. -""" - -from __future__ import annotations - -import itertools -import json -from typing import TYPE_CHECKING, Any -from unittest.mock import MagicMock - -if TYPE_CHECKING: - from pathlib import Path - - -class FakeStep: - def __init__(self, id_: str, name: str, parent_step_id: str | None, step_path: str) -> None: - self.id_ = id_ - self.name = name - self.parent_step_id = parent_step_id - self.step_path = step_path - self.status: Any = None - self.description: Any = None - self.error_info: Any = None - - def update(self, fields: dict[str, Any]) -> None: - for k, v in fields.items(): - setattr(self, k, v) - - -class FakeReport: - def __init__(self) -> None: - self.id_ = "report-id" - - def update(self, fields: dict[str, Any]) -> None: - pass - - -class FakeReportContext: - def __init__(self, steps_file: Path) -> None: - self.steps_file = steps_file - self.report = FakeReport() - self.client = MagicMock() - self.step_stack: list[FakeStep] = [] - self.step_number_at_depth: dict[int, int] = {} - self.open_step_results: dict[str, bool] = {} - self.any_failures = False - self.log_file: Path | None = None - self.steps: list[dict[str, Any]] = [] - self._ids = itertools.count(1) - - def __enter__(self) -> FakeReportContext: - return self - - def __exit__(self, *_: Any) -> None: - self.steps_file.write_text(json.dumps(self.steps)) - - def new_step( - self, - name: str, - description: str | None = None, - assertion_as_fail_not_error: bool = True, - metadata: dict[str, Any] | None = None, - ) -> Any: - # Reuse the real NewStep machinery — it talks to this fake via the - # methods below. - from sift_client.util.test_results.context_manager import NewStep - - return NewStep( - self, # type: ignore[arg-type] - name=name, - description=description, - assertion_as_fail_not_error=assertion_as_fail_not_error, - metadata=metadata, - ) - - def get_next_step_path(self) -> str: - top = self.step_stack[-1] if self.step_stack else None - path = top.step_path if top else "" - next_n = self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - prefix = f"{path}." if path else "" - return f"{prefix}{next_n}" - - def create_step( - self, - name: str, - description: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> FakeStep: - step_path = self.get_next_step_path() - parent = self.step_stack[-1] if self.step_stack else None - step = FakeStep( - id_=f"step-{next(self._ids)}", - name=name, - parent_step_id=parent.id_ if parent else None, - step_path=step_path, - ) - self.step_number_at_depth[len(self.step_stack)] = ( - self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - ) - self.step_stack.append(step) - self.open_step_results[step.step_path] = True - self.steps.append( - { - "id": step.id_, - "name": name, - "parent_step_id": step.parent_step_id, - "step_path": step_path, - } - ) - return step - - def record_step_outcome(self, outcome: bool, step: FakeStep) -> None: - if not outcome: - self.open_step_results[step.step_path] = False - self.any_failures = True - - def resolve_and_propagate_step_result(self, step: FakeStep, error_info: Any = None) -> bool: - result = self.open_step_results.get(step.step_path, True) - if error_info: - result = False - return result - - def exit_step(self, step: FakeStep) -> None: - self.step_number_at_depth[len(self.step_stack)] = 0 - stack_top = self.step_stack.pop() - self.open_step_results.pop(step.step_path) - if stack_top.id_ != step.id_: - raise ValueError("popped step was not the top of the stack") diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py new file mode 100644 index 000000000..e92d1726e --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -0,0 +1,139 @@ +"""Read step status sequences from a Sift offline-mode log file. + +The contract suite drives each scenario through an inner pytester session +run with ``--sift-offline``, which causes the real plugin + ``ReportContext`` +to write every test-result API call to a JSONL log. This module parses +that log into a per-step status timeline that ``test_pass_fail.py`` asserts +against, with no test-only ``ReportContext`` fake required. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from sift_client._internal.low_level_wrappers._test_results_log import iter_log_data_lines +from sift_client.sift_types.test_report import TestStatus + +if TYPE_CHECKING: + from pathlib import Path + + +@dataclass +class CapturedStep: + step_id: str + name: str + step_path: str + parent_step_id: str | None + statuses: list[TestStatus] = field(default_factory=list) + + +_PROTO_STATUS_NAMES = { + "TEST_STATUS_UNSPECIFIED": TestStatus.UNSPECIFIED, + "TEST_STATUS_DRAFT": TestStatus.DRAFT, + "TEST_STATUS_PASSED": TestStatus.PASSED, + "TEST_STATUS_FAILED": TestStatus.FAILED, + "TEST_STATUS_ABORTED": TestStatus.ABORTED, + "TEST_STATUS_ERROR": TestStatus.ERROR, + "TEST_STATUS_IN_PROGRESS": TestStatus.IN_PROGRESS, + "TEST_STATUS_SKIPPED": TestStatus.SKIPPED, +} + + +def _status(name: str | None) -> TestStatus: + if name is None: + return TestStatus.UNSPECIFIED + return _PROTO_STATUS_NAMES.get(name, TestStatus.UNSPECIFIED) + + +def parse_log(log_path: Path) -> dict[str, CapturedStep]: + """Parse the offline log into ``{step_id: CapturedStep}``. + + Walks the JSONL file in order, building a ``CapturedStep`` for each + ``CreateTestStep`` entry and appending the new status from each + ``UpdateTestStep`` entry. + """ + steps: dict[str, CapturedStep] = {} + for request_type, response_id, json_str in iter_log_data_lines(log_path): + payload = json.loads(json_str) + test_step = payload.get("testStep", {}) + if request_type == "CreateTestStep" and response_id: + steps[response_id] = CapturedStep( + step_id=response_id, + name=test_step.get("name", ""), + step_path=test_step.get("stepPath", ""), + parent_step_id=test_step.get("parentStepId") or None, + statuses=[_status(test_step.get("status"))], + ) + elif request_type == "UpdateTestStep": + step_id = test_step.get("testStepId") + new_status = test_step.get("status") + if step_id and step_id in steps and new_status is not None: + steps[step_id].statuses.append(_status(new_status)) + return steps + + +_active_log: Path | None = None +_cached: dict[str, CapturedStep] | None = None + + +def set_log(path: Path) -> None: + """Point subsequent queries at a new log file. Clears the parse cache.""" + global _active_log, _cached + _active_log = path + _cached = None + + +def _steps() -> dict[str, CapturedStep]: + global _cached + if _cached is None: + if _active_log is None or not _active_log.exists(): + _cached = {} + else: + _cached = parse_log(_active_log) + return _cached + + +def steps_by_name(name: str) -> list[CapturedStep]: + return [s for s in _steps().values() if s.name == name] + + +def test_step(name: str) -> CapturedStep | None: + """The step the autouse ``step`` fixture creates for the test function. + + Multiple steps can share a name (e.g. when the makereport hook records an + inline step for a collection-time skip on top of the autouse step). The + autouse step is the shallowest by path depth. + """ + matches = steps_by_name(name) + if not matches: + return None + return min(matches, key=lambda s: s.step_path.count(".")) + + +def final_status(name: str) -> TestStatus | None: + step = test_step(name) + return step.statuses[-1] if step and step.statuses else None + + +def load_steps(log_path: Path) -> list[dict]: + """Load the offline log as a list of step records keyed by hierarchy fields. + + Each record has ``id``, ``name``, ``parent_step_id``, ``step_path``, the + shape ``test_hierarchy.py`` expects for its ``_by_name`` and + ``_ancestor_names`` walkers. Returns an empty list if the log was never + created (e.g. every item in the inner session was ``sift_exclude``-d, so + the plugin's ``report_context`` fixture never fired). + """ + if not log_path.exists(): + return [] + return [ + { + "id": s.step_id, + "name": s.name, + "parent_step_id": s.parent_step_id, + "step_path": s.step_path, + } + for s in parse_log(log_path).values() + ] diff --git a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md new file mode 100644 index 000000000..7e366a512 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md @@ -0,0 +1,105 @@ +# Pytest-plugin step-status: test scenarios + +Reference for the pass/fail scenarios covered by +[`test_pass_fail.py`](test_pass_fail.py). Each row pairs a scenario with the +`TestStatus` the plugin records, and maps to the user-facing contract in +[`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md). + +`TestStatus` values come from `sift_client.sift_types.test_report.TestStatus`: +`PASSED`, `FAILED`, `ERROR`, `SKIPPED`, `ABORTED`, `IN_PROGRESS`. Hard process +exits the plugin can observe (`SystemExit`, `KeyboardInterrupt` when pytest +delivers a call-phase report) map to `ABORTED`. A session-aborting interrupt +that fires before the plugin sees it leaves the step in `IN_PROGRESS`. + +## Case ID scheme + +Each scenario has a stable case ID of the form `PREFIX-NN`. Tests in +`test_pass_fail.py` reference their case ID in a leading comment so a test can +be traced back to its row here without rereading the scenario: + +| Prefix | Section | +| ------- | ---------------------------------------- | +| `CALL` | Call-phase exit paths | +| `SKIP` | Skip paths | +| `XFAIL` | xfail / xpass | +| `PHASE` | Setup / teardown phases | +| `COLL` | Collection / fixture-resolution failures | +| `API` | Plugin-API exit paths | + + +## Call-phase exit paths + +| Case | Scenario | Trigger | Outcome | +| --------- | ------------------------------- | ------------------------------------ | -------------------------------------------------------------------------------------------------------- | +| `CALL-01` | Test passes | function body returns cleanly | `PASSED` | +| `CALL-02` | Assert failure in call phase | `assert 1 == 2` | `FAILED` | +| `CALL-03` | Generic exception in call phase | `raise ValueError("boom")` | `ERROR` | +| `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | +| `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | +| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` — session aborts before the plugin sees the interrupt; `ABORTED` if the plugin does see it | +| `CALL-07` | Substep raises non-Assertion exception | `with step.substep(...): raise ValueError("boom")` | Substep `ERROR`, test step `FAILED` (child-failed signal outranks the propagating exception) | + +## Skip paths + +| Case | Scenario | Trigger | Outcome | +| --------- | -------------------------------- | -------------------------------------------- | ------------------------------------------------------------------------ | +| `SKIP-01` | Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` — only the makereport hook records a step; no autouse step ran | +| `SKIP-02` | Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` — same route as `@pytest.mark.skip` | +| `SKIP-03` | Runtime skip in body | `pytest.skip("...")` | Outer step `SKIPPED`; no duplicate nested step | +| `SKIP-04` | Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | + +## xfail / xpass + +| Case | Scenario | Trigger | Outcome | +| ---------- | ----------------------------------------- | ---------------------------------------------------------- | -------------------------------------------------------- | +| `XFAIL-01` | xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | `PASSED` — test fulfilled the xfail expectation | +| `XFAIL-02` | Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | `FAILED` — mark no longer matches reality | +| `XFAIL-03` | Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | `PASSED` — `strict=False` doesn't insist on the failure | +| `XFAIL-04` | `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` — `raises=` mismatch is a real test failure | +| `XFAIL-05` | `xfail(run=False)` | `@pytest.mark.xfail(run=False)` (body never executed) | `SKIPPED` — the test never ran | + +## Setup / teardown phases + +| Case | Scenario | Trigger | Outcome | +| ---------- | -------------------------------------------- | ------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------- | +| `PHASE-01` | Setup-phase fixture failure (RuntimeError) | `@pytest.fixture` raises before `yield`; test body never runs | `ERROR` — plugin reads the setup-phase report and maps `failed` → `ERROR` (a `phase=setup` annotation is a planned follow-up) | +| `PHASE-02` | Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; test body passed | `FAILED` — plugin upgrades a passed step when the teardown report shows `failed` (a `phase=teardown` annotation is a planned follow-up) | +| `PHASE-03` | Call-phase fail **plus** teardown-phase fail | `assert 1 == 2` in body AND `@pytest.fixture` raises after `yield` | `FAILED` — call-phase failure dominates; surfacing the teardown error alongside is a planned follow-up | + +## Collection / fixture-resolution failures + +| Case | Scenario | Trigger | Outcome | +| --------- | --------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `COLL-01` | Missing fixture | `def test_x(nonexistent_fixture):` | `ERROR` — missing fixture surfaces as a setup-phase failure (a `phase=setup` annotation is a planned follow-up) | + +## Plugin-API exit paths (in-test mutations) + +| Case | Scenario | Trigger | Outcome | +| -------- | --------------------------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `API-01` | Manual status override | `step.current_step.update({"status": TestStatus.FAILED})` | `FAILED` | +| `API-02` | `report_outcome(result=False)` | `step.report_outcome("the_check", False, "did not match")` | `FAILED` | +| `API-03` | `measure(...)` out-of-bounds | `step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0})` | `FAILED` | +| `API-04` | Failed measurement on a substep | `with step.substep(...) as s: s.measure(... out-of-bounds)` | `FAILED` — propagates from substep to parent | +| `API-05` | Manually-skipped substep | `with step.substep(...) as s: s.current_step.update({"status": SKIPPED})` | Parent step `PASSED` — skip does not propagate as a failure | +| `API-06` | Hard exit inside a nested substep | `with step.substep(...) as s: with s.substep(...): sys.exit(1)` | Every open step on the unwind path records `ABORTED`; a sibling substep that closed before the abort keeps its prior status | + +## Out of scope + +Scenarios deliberately not covered by this suite: + +- **Timeout** — needs `pytest-timeout` or a manual signal harness. +- **Signal (SIGKILL / SIGTERM)** — cannot be caught from inside the process; + needs a subprocess-level harness. +- **`pytest.exit("...")`** — niche; the "aborts subsequent tests" behavior + is hard to characterize cleanly because each `pytester` invocation is + its own session. +- **`os._exit()`** — bypasses Python cleanup entirely; can't be tested + in-process because it would kill the outer pytest run. Guaranteed + data-loss case alongside `SystemExit` / `SIGKILL`. +- **Parametrize-level marks** (`pytest.param(..., marks=pytest.mark.xfail / skip)`) + — routes through a different selection path but produces the same + `report.outcome`, so behavior matches the function-level marks already + covered above. +- **Import error / syntax error / `conftest.py` error** — these fail + collection entirely; no `item` is produced and no plugin hook fires, so + no Sift step is recorded. diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index cecad2df8..1efd4e817 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -4,65 +4,43 @@ classes (including nested), parametrize axes — plus the ini opt-out flags, failure-cleanup semantics, and the drain helper. -Each test spins up an inner pytest run via ``pytester`` whose conftest swaps -in a ``FakeReportContext`` (from ``_fakes.py``) that records every step -creation to a JSON file. The outer test reads that file and asserts the -resulting step tree. +Each test spins up an inner pytest run via ``pytester`` configured with +``--sift-offline`` and a known log path. The plugin writes every test-result +API call to that JSONL log, and the outer test parses it via +``_step_status_capture.load_steps`` to reconstruct the step tree. """ from __future__ import annotations -import json -from pathlib import Path as _Path from textwrap import dedent from typing import TYPE_CHECKING import pytest +from sift_client._tests.pytest_plugin import _step_status_capture as capture + if TYPE_CHECKING: from pathlib import Path -_STEPS_FILE_ENV = "SIFT_FAKE_STEPS_FILE" - -# ``_fakes.py`` is excluded from the wheel by ``pyproject.toml``'s -# ``packages.find`` rule that strips ``sift_client._tests``. The inner -# pytester subprocess uses the installed package and cannot import from -# ``sift_client._tests``. Embed the fake source directly into the inner -# conftest so the subprocess gets a fully self-contained module to load. -_FAKES_SOURCE = (_Path(__file__).parent / "_fakes.py").read_text() - -_INNER_CONFTEST = f""" -{_FAKES_SOURCE} - -import os -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -pytest_plugins = ["sift_client.pytest_plugin"] +_INNER_CONFTEST = 'pytest_plugins = ["sift_client.pytest_plugin"]\n' -@pytest.fixture(scope="session") -def sift_client(): - return MagicMock() - -@pytest.fixture(scope="session", autouse=True) -def report_context(sift_client): - import sift_client.pytest_plugin as plugin_module - steps_file = Path(os.environ[{_STEPS_FILE_ENV!r}]) - with FakeReportContext(steps_file) as ctx: - plugin_module.REPORT_CONTEXT = ctx - yield ctx -""" +def _base_ini_lines(log_path: Path) -> list[str]: + """Default ini settings every inner pytester run needs.""" + return [ + "[pytest]", + "sift_offline = true", + f"sift_log_file = {log_path}", + "sift_git_metadata = false", + ] @pytest.fixture -def steps_file(pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch) -> Path: - path = pytester.path / "captured_steps.json" +def log_file(pytester: pytest.Pytester) -> Path: + path = pytester.path / "sift.log" pytester.makeconftest(_INNER_CONFTEST) - monkeypatch.setenv(_STEPS_FILE_ENV, str(path)) + pytester.makefile(".ini", pytest="\n".join(_base_ini_lines(path)) + "\n") return path @@ -85,9 +63,7 @@ def _ancestor_names(steps: list[dict], leaf: dict) -> list[str]: return chain -def test_class_methods_cluster_under_class_step( - pytester: pytest.Pytester, steps_file: Path -) -> None: +def test_class_methods_cluster_under_class_step(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_klass=dedent( """ @@ -102,7 +78,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestFoo"]) == 1 class_id = by_name["TestFoo"][0]["id"] @@ -110,7 +86,7 @@ def test_b(self): assert by_name["test_b"][0]["parent_step_id"] == class_id -def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_nested=dedent( """ @@ -123,7 +99,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestOuter"]) == 1 assert len(by_name["TestInner"]) == 1 @@ -136,7 +112,7 @@ def test_a(self): ] -def test_class_parametrize_nests_under_class(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_class_parametrize_nests_under_class(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_cp=dedent( """ @@ -151,7 +127,7 @@ def test_a(self, v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) class_id = by_name["TestFoo"][0]["id"] test_a_id = by_name["test_a"][0]["id"] @@ -160,7 +136,7 @@ def test_a(self, v): assert by_name["v=2"][0]["parent_step_id"] == test_a_id -def test_two_sibling_classes_in_module(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_two_sibling_classes_in_module(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_sib=dedent( """ @@ -176,7 +152,7 @@ def test_y(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) mod_id = by_name["test_sib.py"][0]["id"] assert by_name["TestA"][0]["parent_step_id"] == mod_id @@ -186,7 +162,7 @@ def test_y(self): assert len(by_name["TestB"]) == 1 -def test_mixed_class_and_free_function(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_mixed_class_and_free_function(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_mix=dedent( """ @@ -201,7 +177,7 @@ def test_free(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) mod_id = by_name["test_mix.py"][0]["id"] # Class method parents to TestA; free function parents directly to module. @@ -211,7 +187,7 @@ def test_free(): def test_class_with_all_excluded_methods_no_class_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_excl=dedent( @@ -231,14 +207,14 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name assert "test_a" not in by_name assert "test_b" not in by_name -def test_sift_exclude_on_class_propagates(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_sift_exclude_on_class_propagates(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_clsexcl=dedent( """ @@ -256,14 +232,14 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name assert "test_a" not in by_name def test_class_docstring_becomes_step_description( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_doc=dedent( @@ -278,7 +254,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # The fake records step creation but not all fields — check the class # step was recorded, then read the description via the FakeStep's @@ -289,7 +265,7 @@ def test_a(self): def test_transition_between_class_chains_drains_parametrize( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_trans=dedent( @@ -310,7 +286,7 @@ def test_y(self, w): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Each class opens exactly once; parametrize parents under the right class. assert len(by_name["TestA"]) == 1 @@ -396,7 +372,7 @@ def __exit__(self, *_: object) -> None: def test_failing_test_in_class_does_not_orphan_class_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """A failing class method must not block the class step from cleaning up. @@ -422,7 +398,7 @@ def test_c(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2, failed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestFoo"]) == 1 assert len(by_name["TestBar"]) == 1 @@ -439,7 +415,7 @@ def test_c(self): def test_failing_parametrized_method_in_class_closes_full_chain( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """A failing parametrized class method must not orphan its parametrize parents.""" pytester.makepyfile( @@ -460,7 +436,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2, failed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) foo_id = by_name["TestFoo"][0]["id"] test_a_id = by_name["test_a"][0]["id"] @@ -476,18 +452,18 @@ def test_b(self): # --------------------------------------------------------------------------- -def _write_ini(pytester: pytest.Pytester, **overrides: object) -> None: - """Write a pytest.ini with the given sift_* overrides set under [pytest].""" - lines = ["[pytest]"] +def _write_ini(pytester: pytest.Pytester, log_file: Path, **overrides: object) -> None: + """Write a pytest.ini with the given sift_* overrides, preserving the + offline/log/git-metadata defaults the ``log_file`` fixture installs. + """ + lines = _base_ini_lines(log_file) for key, value in overrides.items(): lines.append(f"{key} = {value}") pytester.makefile(".ini", pytest="\n".join(lines) + "\n") -def test_sift_class_step_false_skips_class_steps( - pytester: pytest.Pytester, steps_file: Path -) -> None: - _write_ini(pytester, sift_class_step="false") +def test_sift_class_step_false_skips_class_steps(pytester: pytest.Pytester, log_file: Path) -> None: + _write_ini(pytester, log_file, sift_class_step="false") pytester.makepyfile( test_noclass=dedent( """ @@ -502,7 +478,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name mod_id = by_name["test_noclass.py"][0]["id"] @@ -511,9 +487,9 @@ def test_b(self): def test_sift_module_step_false_skips_module_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: - _write_ini(pytester, sift_module_step="false") + _write_ini(pytester, log_file, sift_module_step="false") pytester.makepyfile( test_nomod=dedent( """ @@ -525,7 +501,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "test_nomod.py" not in by_name # TestFoo attaches to the report root (no parent recorded by the fake). @@ -534,9 +510,9 @@ def test_a(self): def test_sift_parametrize_nesting_false_keeps_flat_leaves( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: - _write_ini(pytester, sift_parametrize_nesting="false") + _write_ini(pytester, log_file, sift_parametrize_nesting="false") pytester.makepyfile( test_flat=dedent( """ @@ -550,7 +526,7 @@ def test_a(v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # No parametrize parent step. assert "test_a" not in by_name @@ -564,7 +540,7 @@ def test_a(v): def test_sift_module_step_false_still_drains_across_modules( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """sift_module_step=false must not merge same-named classes across modules. @@ -572,7 +548,7 @@ def test_sift_module_step_false_still_drains_across_modules( (even when it's not rendered as a step), so two modules each declaring ``class TestFoo`` produce two distinct ``TestFoo`` frames in the diff. """ - _write_ini(pytester, sift_module_step="false") + _write_ini(pytester, log_file, sift_module_step="false") pytester.makepyfile( test_a=dedent( """ @@ -591,7 +567,7 @@ def test_y(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Two distinct TestFoo class steps — one per module — not a shared frame. assert len(by_name["TestFoo"]) == 2 @@ -605,7 +581,7 @@ def test_y(self): def test_package_step_default_opens_for_init_dirs( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """Default: a directory with ``__init__.py`` produces a parent package step.""" pytester.mkpydir("pkg_a") @@ -619,7 +595,7 @@ def test_one(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "pkg_a" in by_name pkg_id = by_name["pkg_a"][0]["id"] @@ -628,7 +604,7 @@ def test_one(): def test_same_named_packages_in_different_dirs_do_not_merge( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """Two packages with the same display name but different paths must stay distinct. @@ -663,7 +639,7 @@ def test_two(): # name on disk don't collide during sys.path-based import. result = pytester.runpytest_subprocess("-v", "--import-mode=importlib") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Two distinct ``utils`` package steps — one per project. assert len(by_name["utils"]) == 2 @@ -677,10 +653,10 @@ def test_two(): def test_sift_package_step_false_skips_package_steps( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """With ``sift_package_step=false`` the directory step is suppressed.""" - _write_ini(pytester, sift_package_step="false") + _write_ini(pytester, log_file, sift_package_step="false") pytester.mkpydir("pkg_a") (pytester.path / "pkg_a" / "test_x.py").write_text( dedent( @@ -692,7 +668,7 @@ def test_one(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "pkg_a" not in by_name # The module step still opens and is now the top-level frame. @@ -700,10 +676,11 @@ def test_one(): def test_all_three_flags_false_matches_legacy_behavior( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: _write_ini( pytester, + log_file, sift_module_step="false", sift_class_step="false", sift_parametrize_nesting="false", @@ -722,7 +699,7 @@ def test_a(self, v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # No module, class, or parametrize parents — just bracket-mangled leaves. assert "test_legacy.py" not in by_name @@ -740,7 +717,7 @@ def test_a(self, v): def test_single_parametrize_clusters_under_originalname( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_rail=dedent( @@ -755,7 +732,7 @@ def test_rail(v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Module step + one shared `test_rail` parent + two leaves. assert len(by_name["test_rail.py"]) == 1 @@ -768,7 +745,7 @@ def test_rail(v): def test_stacked_parametrize_nests_outer_to_inner( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_iso=dedent( @@ -784,7 +761,7 @@ def test_iso(voltage, component): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=4) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # One `test_iso` parent, two `voltage='…'` parents, four `component='…'` leaves. assert len(by_name["test_iso"]) == 1 @@ -806,7 +783,7 @@ def test_iso(voltage, component): assert leaf["parent_step_id"] in voltage_ids -def test_fixture_parametrization_participates(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_fixture_parametrization_participates(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_widget=dedent( """ @@ -823,7 +800,7 @@ def test_widget(widget): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["test_widget"]) == 1 parent_id = by_name["test_widget"][0]["id"] @@ -832,7 +809,7 @@ def test_widget(widget): def test_module_boundary_isolates_parametrize_stack( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_a=dedent( @@ -856,7 +833,7 @@ def test_two(w): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=4) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Each module step contains its own `test_one`/`test_two` parametrize subtree. mod_a = by_name["test_a.py"][0] @@ -865,9 +842,7 @@ def test_two(w): assert by_name["test_two"][0]["parent_step_id"] == mod_b["id"] -def test_leaf_parent_chain_terminates_at_report( - pytester: pytest.Pytester, steps_file: Path -) -> None: +def test_leaf_parent_chain_terminates_at_report(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_chain=dedent( """ @@ -882,7 +857,7 @@ def test_chain(a, b): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) leaf = next(s for s in steps if s["name"].startswith("b=")) chain = _ancestor_names(steps, leaf) # leaf b=… → a=… → test_chain → test_chain.py (module step) → root diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py new file mode 100644 index 000000000..0e1540ce7 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -0,0 +1,562 @@ +"""Contract suite: maps each pytest exit path to the ``TestStatus`` the +Sift pytest plugin is required to record on the outer step. + +Each scenario writes a tiny inner test file and runs it through pytester +with a fake ``sift_client`` injected via a generated conftest. The fake +records every step status write into ``_step_status_capture.CAPTURED_STEPS`` +so this outer test can assert on what the plugin produced. + +Assertions encode the contract from +``docs/guides/pytest_plugin/pass_fail_behavior.md``. Tests for scenarios the +plugin does not yet handle correctly are expected to **fail today** — they +are the punch list. ``lib/sift_client/_tests/pytest_plugin/step_status_states.md`` +tracks each scenario's observed-today behavior next to the target so the +remaining gaps are visible without running the suite. +""" + +from __future__ import annotations + +import textwrap + +import pytest + +from sift_client._tests.pytest_plugin import _step_status_capture as capture +from sift_client.sift_types.test_report import TestStatus + +pytest_plugins = ["pytester"] + + +_INNER_CONFTEST_SRC = ''' +"""Auto-generated conftest. Loading the Sift plugin is the only thing the +inner session needs. ``--sift-offline`` on the CLI causes the plugin's +default ``sift_client`` fixture to construct a placeholder client and the +real ``ReportContext`` writes every API call to the JSONL log without +contacting Sift. +""" + +pytest_plugins = ["sift_client.pytest_plugin"] +''' + + +@pytest.fixture +def inner(pytester): + """Install the inner conftest. Returns ``pytester``.""" + pytester.makeconftest(_INNER_CONFTEST_SRC) + return pytester + + +# Prepended to every inner test file. Pytest skips marker-based ``skip`` items +# before any autouse fixture runs, which would leave ``REPORT_CONTEXT`` unset +# and the plugin's inline-skip recording inert. A single passing item up-front +# forces ``report_context`` to initialize so the makereport hook can record +# the skip into the same session's JSONL. +_WARMUP = "def test_sift_warmup(): pass\n\n" + + +def _run(pytester, body: str) -> None: + pytester.makepyfile(_WARMUP + textwrap.dedent(body)) + log_path = pytester.path / "sift.log" + capture.set_log(log_path) + pytester.runpytest_inprocess( + "--sift-offline", + f"--sift-log-file={log_path}", + "--no-sift-git-metadata", + ) + + +# --------------------------------------------------------------------------- +# Call-phase exit paths +# --------------------------------------------------------------------------- + + +def test_pass_maps_to_passed(inner): + # Case: CALL-01 + _run( + inner, + """ + def test_x(): + assert True + """, + ) + assert capture.final_status("test_x") == TestStatus.PASSED + + +def test_assert_failure_maps_to_failed(inner): + # Case: CALL-02 + _run( + inner, + """ + def test_x(): + assert 1 == 2 + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_generic_exception_maps_to_error(inner): + # Case: CALL-03 + _run( + inner, + """ + def test_x(): + raise ValueError("boom") + """, + ) + assert capture.final_status("test_x") == TestStatus.ERROR + + +def test_system_exit_maps_to_aborted(inner): + # Case: CALL-05 + _run( + inner, + """ + import sys + def test_x(): + sys.exit(1) + """, + ) + assert capture.final_status("test_x") == TestStatus.ABORTED + + +def test_pytest_fail_maps_to_failed(inner): + # Case: CALL-04 + _run( + inner, + """ + import pytest + def test_x(): + pytest.fail("intentional failure") + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_keyboard_interrupt_leaves_step_in_progress(inner): + # Case: CALL-06 + # KeyboardInterrupt aborts the session before the call-phase makereport + # fires; the plugin can't observe the interrupt. The contract is that + # the step is left in IN_PROGRESS rather than being silently resolved + # to PASSED — a session-aborting interrupt should not look like a clean + # pass in the report. + try: + _run( + inner, + """ + def test_x(): + raise KeyboardInterrupt + """, + ) + except KeyboardInterrupt: + pass + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.IN_PROGRESS + + +def test_substep_exception_records_error_with_failed_parent(inner): + # Case: CALL-07 + _run( + inner, + """ + def test_x(step): + with step.substep(name="inner"): + raise ValueError("boom") + """, + ) + # Only the originating substep records ERROR. The test step inherits the + # child-failed signal and resolves to FAILED, even though the same + # ValueError propagated through its scope. + inner_sub = next(iter(capture.steps_by_name("inner")), None) + test_x = capture.test_step("test_x") + assert inner_sub is not None + assert test_x is not None + assert inner_sub.statuses[-1] == TestStatus.ERROR + assert test_x.statuses[-1] == TestStatus.FAILED + + +# --------------------------------------------------------------------------- +# Skip paths +# --------------------------------------------------------------------------- + + +def test_pytest_skip_in_body_maps_to_skipped(inner): + # Case: SKIP-03 + _run( + inner, + """ + import pytest + def test_x(): + pytest.skip("not today") + """, + ) + # Runtime skip in the body resolves the outer step to SKIPPED. The + # makereport hook must not create a duplicate nested step. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.SKIPPED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +def test_pytest_mark_skip_records_skipped(inner): + # Case: SKIP-01 + _run( + inner, + """ + import pytest + @pytest.mark.skip(reason="collection-time skip") + def test_x(): + assert False + """, + ) + # Collection-time skip: the autouse step fixture never runs. Only the + # makereport hook creates a step, with status SKIPPED. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +def test_pytest_mark_skipif_records_skipped(inner): + # Case: SKIP-02 + _run( + inner, + """ + import pytest + @pytest.mark.skipif(True, reason="conditional skip") + def test_x(): + assert False + """, + ) + # `skipif` with a truthy condition follows the same path as + # `@pytest.mark.skip`; only the makereport hook records a step. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +def test_skip_inside_fixture_setup(inner): + # Case: SKIP-04 + _run( + inner, + """ + import pytest + + @pytest.fixture + def skipping_fixture(): + pytest.skip("environment not ready") + + def test_x(skipping_fixture): + assert True + """, + ) + # A setup-phase skip resolves the outer step to SKIPPED. The makereport + # hook must not create a duplicate nested step. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.SKIPPED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +# --------------------------------------------------------------------------- +# xfail / xpass +# --------------------------------------------------------------------------- + + +def test_xfail_marked_test_that_fails(inner): + # Case: XFAIL-01 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(reason="known issue") + def test_x(): + assert 1 == 2 + """, + ) + # xfail + expected failure fulfills the contract; outer step resolves to + # PASSED. No duplicate nested step from the makereport hook. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +def test_xfail_strict_unexpected_pass(inner): + # Case: XFAIL-02 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(strict=True, reason="should fail") + def test_x(): + assert True + """, + ) + # strict xfail that passes must surface as FAILED: either the bug was + # fixed (remove the mark) or the test stopped exercising what it claimed. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_xfail_non_strict_unexpected_pass(inner): + # Case: XFAIL-03 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(reason="might pass sometimes") + def test_x(): + assert True + """, + ) + # Non-strict xfail does not insist on the failure, so a passing run is + # PASSED. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED + + +def test_xfail_raises_mismatch(inner): + # Case: XFAIL-04 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(raises=ValueError, reason="expected ValueError") + def test_x(): + raise KeyError("wrong exception") + """, + ) + # `raises=` mismatch is a real test failure — the contract required a + # specific exception type and a different one was thrown. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_xfail_run_false(inner): + # Case: XFAIL-05 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(run=False, reason="never run") + def test_x(): + assert False + """, + ) + # The test never ran; outer step is SKIPPED. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +# --------------------------------------------------------------------------- +# Setup-phase / teardown-phase fixture failures +# --------------------------------------------------------------------------- + + +def test_setup_phase_fixture_failure(inner): + # Case: PHASE-01 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_setup(): + raise RuntimeError("setup boom") + + def test_x(bad_setup): + assert True + """, + ) + # A fixture that raises before `yield` fails the setup phase. The outer + # step must surface this as ERROR; the test body never executed and a + # silently green step would hide the failure. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ERROR + + +def test_teardown_phase_fixture_failure(inner): + # Case: PHASE-02 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_teardown(): + yield + raise RuntimeError("teardown boom") + + def test_x(bad_teardown): + assert True + """, + ) + # A fixture that raises after `yield` fails the teardown phase. The + # outer step's status reflects the teardown failure as FAILED rather + # than the call-phase pass. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_call_fail_plus_teardown_fail(inner): + # Case: PHASE-03 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_teardown(): + yield + raise RuntimeError("teardown boom") + + def test_x(bad_teardown): + assert 1 == 2 + """, + ) + # Call-phase failure dominates the outer step status; the contract also + # requires the teardown error to be surfaced somewhere on the step + # (mechanism TBD — see pass_fail_behavior.md). This test asserts the + # status today; tighten once a surfacing mechanism is chosen. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +# --------------------------------------------------------------------------- +# Collection-phase failures +# --------------------------------------------------------------------------- + + +def test_missing_fixture_maps_to_error(inner): + # Case: COLL-01 + _run( + inner, + """ + def test_x(nonexistent_fixture): + assert True + """, + ) + # An unresolved fixture is a setup-phase failure. The outer step + # surfaces as ERROR rather than a misleading green pass for a test + # that never executed. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ERROR + + +# --------------------------------------------------------------------------- +# Plugin-API exit paths (in-test mutations) +# --------------------------------------------------------------------------- + + +def test_manual_status_update_to_failed(inner): + # Case: API-01 + _run( + inner, + """ + from sift_client.sift_types.test_report import TestStatus + def test_x(step): + step.current_step.update({"status": TestStatus.FAILED}) + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_report_outcome_false_maps_to_failed(inner): + # Case: API-02 + _run( + inner, + """ + def test_x(step): + step.report_outcome("the_check", False, "did not match") + """, + ) + # Outer step sees a failed substep and rolls up to FAILED. + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_measure_out_of_bounds_maps_to_failed(inner): + # Case: API-03 + _run( + inner, + """ + def test_x(step): + step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0}) + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_substep_failure_propagates_to_parent(inner): + # Case: API-04 + _run( + inner, + """ + def test_x(step): + with step.substep(name="inner") as inner_step: + inner_step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0}) + """, + ) + # `test_measure_out_of_bounds_maps_to_failed` exercises a failed + # measurement on the function step itself; this one verifies the same + # failure on a nested substep propagates up to the parent. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_skipped_substep_does_not_fail_parent(inner): + # Case: API-05 + _run( + inner, + """ + from sift_client.sift_types.test_report import TestStatus + def test_x(step): + with step.substep(name="optional_check") as cal: + cal.current_step.update( + {"status": TestStatus.SKIPPED}, + log_file=step.report_context.log_file, + ) + """, + ) + # A manually-resolved SKIPPED on a substep must not propagate as a failure + # to the parent. The outer step has no measurements of its own and resolves + # to PASSED. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED + + +def test_abort_inside_substep_marks_every_open_step_aborted(inner): + # Case: API-06 + _run( + inner, + """ + import sys + def test_x(step): + with step.substep(name="completed_sub"): + pass + with step.substep(name="outer_sub") as outer_sub: + with outer_sub.substep(name="inner_sub"): + sys.exit(1) + """, + ) + # SystemExit unwinds the substep stack on the way out. Every step that was + # open when the abort fired (inner substep, outer substep, test step) + # must record ABORTED. The sibling substep that closed cleanly before the + # abort must retain its PASSED status. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ABORTED + outer_sub = next(iter(capture.steps_by_name("outer_sub")), None) + inner_sub = next(iter(capture.steps_by_name("inner_sub")), None) + completed_sub = next(iter(capture.steps_by_name("completed_sub")), None) + assert outer_sub is not None + assert inner_sub is not None + assert completed_sub is not None + assert outer_sub.statuses[-1] == TestStatus.ABORTED + assert inner_sub.statuses[-1] == TestStatus.ABORTED + assert completed_sub.statuses[-1] == TestStatus.PASSED diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 7c4c1c2f5..c3b303ac8 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -5,14 +5,16 @@ from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path +from types import SimpleNamespace from typing import TYPE_CHECKING, Any, Generator, Tuple import pytest from sift_client import SiftClient, SiftConnectionConfig from sift_client.errors import SiftWarning -from sift_client.sift_types.test_report import TestStatus +from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext +from sift_client.util.test_results.context_manager import format_truncated_traceback class SiftPytestPluginWarning(SiftWarning): @@ -508,17 +510,162 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | return Path(raw) +def _error_info_from_longrepr(longrepr: Any) -> ErrorInfo: + """Fall back to the report's longrepr when no Python exception is available.""" + return ErrorInfo(error_code=1, error_message=str(longrepr) if longrepr is not None else "") + + +def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: + """Resolve the function step's status from pytest's per-phase reports. + + Reads ``_sift_phase_setup`` / ``_sift_phase_call`` and the test's xfail marker, + then mutates ``new_step.current_step`` in place and flips + ``new_step._sift_managed_externally`` so ``NewStep.__exit__`` emits the + resolved status without re-classifying. + + When the call phase reports ``passed`` and no override is needed (i.e. the + test's own status or substep failures should drive the result), this leaves + the step alone so the default ``__exit__`` resolution stays in charge. + """ + current_step = new_step.current_step + if current_step is None: + # The step never opened (the autouse fixture short-circuited or was + # disabled). Nothing to resolve. + return + setup_phase = getattr(item, "_sift_phase_setup", None) + call_phase = getattr(item, "_sift_phase_call", None) + xfail_marker = item.get_closest_marker("xfail") + xfail_runs = xfail_marker.kwargs.get("run", True) if xfail_marker is not None else True + + status: TestStatus | None = None + error_info: ErrorInfo | None = None + keep_managed = False + + if setup_phase is not None and setup_phase.report.outcome == "failed": + status = TestStatus.ERROR + excinfo = setup_phase.call.excinfo + if excinfo is not None: + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + else: + error_info = _error_info_from_longrepr(setup_phase.report.longrepr) + elif setup_phase is not None and setup_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase is None: + # Setup completed but the call-phase report never fired — the inner + # pytester session was aborted (e.g. by KeyboardInterrupt) before the + # plugin could observe the outcome. Leave the step at IN_PROGRESS so + # the report does not lie about a clean pass. + keep_managed = True + else: + wasxfail = getattr(call_phase.report, "wasxfail", None) + if wasxfail is not None: + if call_phase.report.outcome == "failed": + # Strict xpass: pytest synthesizes a failure when an xfail(strict=True) + # test unexpectedly passes. The xfail mark no longer matches reality. + status = TestStatus.FAILED + elif call_phase.report.outcome == "skipped": + if xfail_marker is not None and xfail_runs is False: + # xfail(run=False): the test body never executed. + status = TestStatus.SKIPPED + else: + # xfail + expected failure: the test fulfilled its xfail expectation. + status = TestStatus.PASSED + else: + # Non-strict xpass: passes that weren't required to fail. + status = TestStatus.PASSED + elif call_phase.report.outcome == "passed": + # Default __exit__ resolves PASSED/FAILED from open_step_results and any + # status the test code may have set. Don't override it here. + return + elif call_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase.report.outcome == "failed": + excinfo = call_phase.call.excinfo + children_passed = new_step.report_context.open_step_results.get( + current_step.step_path, True + ) + if excinfo is None: + status = TestStatus.FAILED + elif isinstance(excinfo.value, AssertionError): + status = TestStatus.FAILED + elif isinstance(excinfo.value, pytest.fail.Exception): + status = TestStatus.FAILED + elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): + # Hard exits the plugin can observe: pytest converted the + # raise into a call-phase report. The session-aborting variant + # (call_phase is None) lands earlier and stays IN_PROGRESS. + status = TestStatus.ABORTED + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif xfail_marker is not None: + # xfail(raises=X) with a non-matching exception: the contract failed. + status = TestStatus.FAILED + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif not children_passed: + # A substep already recorded the error and carries the traceback; + # the test step only inherits the child-failed signal. + status = TestStatus.FAILED + else: + status = TestStatus.ERROR + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + + if status is None and not keep_managed: + return + + if status is not None: + # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. + current_step.__dict__["status"] = status + if error_info is not None: + current_step.__dict__["error_info"] = error_info + new_step._sift_managed_externally = True + + +def _finalize_after_teardown(item: pytest.Item, teardown_report: pytest.TestReport) -> None: + """Upgrade a closed step to FAILED when the teardown phase failed. + + The autouse step fixture has already exited by the time the teardown + makereport hook fires, so call ``step.update`` again to override the status + server-side and propagate the failure to the still-open parent step. + """ + step: NewStep | None = getattr(item, "_sift_step", None) + if step is None: + return + current_step = step.current_step + if current_step is None: + return + if teardown_report.outcome == "failed" and current_step.status == TestStatus.PASSED: + current_step.update({"status": TestStatus.FAILED}) + step.report_context.mark_step_failed_after_close(current_step) + + @pytest.hookimpl(tryfirst=True, hookwrapper=True) def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): - """Capture pytest outcomes so assertion failures and skips land on the Sift step.""" + """Capture per-phase reports and finalize step status after teardown. + + Stashes both ``rep_`` (the ``CallInfo``, kept for pytest plugins that + expect that conventional attribute) and ``_sift_phase_`` (a + ``SimpleNamespace(call, report)`` used by ``_resolve_initial_status``). The + collection-time skip path is strictly gated on ``_sift_step`` being unset + so it does not duplicate steps the fixture already created. + """ outcome = yield report = outcome.get_result() - if report.outcome == "skipped": - # Skipped tests bypass the autouse `step` fixture, so we record the step manually here. - if REPORT_CONTEXT: - with REPORT_CONTEXT.new_step(name=item.name) as new_step: - new_step.current_step.update({"status": TestStatus.SKIPPED}) setattr(item, "rep_" + report.when, call) + setattr(item, "_sift_phase_" + report.when, SimpleNamespace(call=call, report=report)) + + # Collection-time skip (``@pytest.mark.skip`` / ``skipif``): the autouse + # ``step`` fixture never runs, so the hook is the only place that can + # record a step. Presence of ``_sift_step`` is the "fixture ran" signal. + if ( + REPORT_CONTEXT + and report.when == "setup" + and report.outcome == "skipped" + and getattr(item, "_sift_step", None) is None + ): + with REPORT_CONTEXT.new_step(name=item.name) as inline_step: + inline_step.current_step.update({"status": TestStatus.SKIPPED}) + + if report.when == "teardown": + _finalize_after_teardown(item, report) def _report_context_impl( @@ -748,13 +895,9 @@ def _step_impl( with report_context.new_step( name=name, description=existing_docstring, assertion_as_fail_not_error=False ) as new_step: + node._sift_step = new_step yield new_step - if hasattr(node, "rep_call") and node.rep_call.excinfo: - new_step.update_step_from_result( - node.rep_call.excinfo, - node.rep_call.excinfo.value, - node.rep_call.excinfo.tb, - ) + _resolve_initial_status(new_step, node) @pytest.fixture(autouse=True) diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index bd2ec917f..3454ef5e2 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -43,6 +43,17 @@ logger = logging.getLogger(__name__) +def format_truncated_traceback( + exc: type[BaseException] | None, + exc_value: BaseException | None, + tb: object | None, +) -> ErrorInfo: + """Format an ErrorInfo from a traceback, keeping the first frame and the last 10.""" + stack = traceback.format_exception(exc, exc_value, tb) # type: ignore[arg-type] + stack = [stack[0], *stack[-10:]] if len(stack) > 10 else stack + return ErrorInfo(error_code=1, error_message="".join(stack)) + + def log_replay_instructions(log_file: str | Path | None) -> None: """Surface replay instructions when an import/replay attempt fails. @@ -363,30 +374,33 @@ def record_step_outcome(self, outcome: bool, step: TestStep): self.open_step_results[step.step_path] = False self.any_failures = True - def resolve_and_propagate_step_result( - self, - step: TestStep, - error_info: ErrorInfo | None = None, - ) -> bool: - """Resolve the result of a step and propagate the result to the parent step if it failed.""" - result = self.open_step_results.get(step.step_path, True) - if error_info: - result = False - if step.status != TestStatus.IN_PROGRESS: - # The step was manually completed so use that result. - # Skipped steps are considered passed. - result = step.status in (TestStatus.PASSED, TestStatus.SKIPPED) - - # Update the parent step results if this step failed (true by default so no need to do anything if we didn't fail). - if not result: + def mark_step_failed_after_close(self, step: TestStep): + """Mark a step's parent as failed after the step has already been popped from the stack. + + Used by the pytest plugin when a teardown-phase report fires after the + fixture's ``__exit__`` has already resolved and exited the step. + """ + self.any_failures = True + path_parts = step.step_path.split(".") + if len(path_parts) > 1: + self.open_step_results[".".join(path_parts[:-1])] = False + + def propagate_step_result(self, step: TestStep, status: TestStatus) -> bool: + """Propagate this step's final status to the parent step. + + Status is the governor: anything outside ``{PASSED, SKIPPED}`` counts + as a failure for the parent. ``error_info`` is intentionally not + consulted here; it is free-form diagnostic data that may sit on a + step regardless of status. + """ + succeeded = status in (TestStatus.PASSED, TestStatus.SKIPPED) + if not succeeded: self.any_failures = True self.open_step_results[step.step_path] = False path_parts = step.step_path.split(".") if len(path_parts) > 1: - parent_step_path = ".".join(path_parts[:-1]) - self.open_step_results[parent_step_path] = False - - return result + self.open_step_results[".".join(path_parts[:-1])] = False + return succeeded def exit_step(self, step: TestStep): """Exit a step and update the report context.""" @@ -407,6 +421,10 @@ class NewStep(AbstractContextManager): client: SiftClient assertion_as_fail_not_error: bool = True current_step: TestStep | None = None + # Set by the pytest plugin's ``_resolve_initial_status`` to signal that + # status was already resolved upstream and ``__exit__`` should skip + # re-classifying. Read via ``getattr`` so unset is treated as False. + _sift_managed_externally: bool = False def __init__( self, @@ -471,34 +489,55 @@ def update_step_from_result( returns: The false if step failed or errored, true otherwise. """ + current_step = self.current_step + if current_step is None: + # The step was never opened; nothing to resolve. Treat as a pass + # so callers that branch on the return value don't see a spurious + # failure. + return True + error_info = None - assert self.current_step is not None + aborted = False + errored = False if exc: if isinstance(exc_value, AssertionError) and not self.assertion_as_fail_not_error: # If we're not showing assertion errors (i.e. pytest), mark step as failed but don't set error info. - self.report_context.record_step_outcome(False, self.current_step) + self.report_context.record_step_outcome(False, current_step) + elif isinstance(exc_value, (KeyboardInterrupt, SystemExit)): + # Hard exit propagating through the substep stack: record as + # ABORTED so every in-progress step on the way out reflects + # the abort rather than coercing to ERROR. + aborted = True + error_info = format_truncated_traceback(exc, exc_value, tb) else: - stack = traceback.format_exception(exc, exc_value, tb) # type: ignore - stack = [stack[0], *stack[-10:]] if len(stack) > 10 else stack - trace = "".join(stack) - error_info = ErrorInfo( - error_code=1, - error_message=trace, - ) - - # Resolve the status of this step (i.e. fail if children failed) and propagate the result to the parent step. - result = self.report_context.resolve_and_propagate_step_result( - self.current_step, error_info - ) - - # Mark the step as completed - status = self.current_step.status + errored = True + error_info = format_truncated_traceback(exc, exc_value, tb) + + # Status is the governor: anything other than IN_PROGRESS was set + # deliberately (manual override, plugin pre-resolution, etc.) and must + # not be silently overwritten by side-channel signals. When the step is + # still IN_PROGRESS, resolve from independent state: aborts first, then + # a child-failed signal (parents inherit FAILED, not the originating + # ERROR), then the step's own captured exception, then the children-pass + # default. error_info is diagnostic and never drives status. + status = current_step.status if status == TestStatus.IN_PROGRESS: - # Update the status only if the step was in progress i.e. not updated elsewhere. - status = TestStatus.PASSED if result else TestStatus.FAILED - if error_info: - status = TestStatus.ERROR - self.current_step.update( + children_passed = self.report_context.open_step_results.get( + current_step.step_path, True + ) + if aborted: + status = TestStatus.ABORTED + elif not children_passed: + status = TestStatus.FAILED + elif errored: + status = TestStatus.ERROR + else: + status = TestStatus.PASSED + + # Propagate based on the resolved status; error_info rides along as + # pure diagnostics and does not affect propagation. + result = self.report_context.propagate_step_result(current_step, status) + current_step.update( { "status": status, "end_time": datetime.now(timezone.utc), @@ -509,6 +548,28 @@ def update_step_from_result( return result def __exit__(self, exc, exc_value, tb): + if getattr(self, "_sift_managed_externally", False): + # The pytest fixture already resolved status from phase reports. + # Propagate based on that resolved status, emit one update_step + # with the resolved values, and pop from the stack without + # re-classifying. + current_step = self.current_step + if current_step is None: + # The step was never opened; nothing to propagate. + return True + result = self.report_context.propagate_step_result(current_step, current_step.status) + current_step.update( + { + "status": current_step.status, + "end_time": datetime.now(timezone.utc), + "error_info": current_step.error_info, + }, + ) + self.report_context.exit_step(current_step) + if hasattr(self, "force_result"): + result = self.force_result + return result + result = self.update_step_from_result(exc, exc_value, tb) # Now that the step is updated. Let the report context handle removing it from the stack and updating the report context. diff --git a/python/mkdocs.yml b/python/mkdocs.yml index 5108b7e4a..af174aa4f 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -62,6 +62,9 @@ nav: # Will migrate to Guides in the future - Pytest Plugin: examples/pytest_plugin.md - Pytest Plugin Quickstart: examples/pytest_plugin_quickstart.md + - Guides: + - Pytest Plugin: + - Pass/Fail Behavior: guides/pytest_plugin/pass_fail_behavior.md # - Guides: # - Logging # - Error Handling From 45a5f8d78c15d8b907e7a53ee7f7938c5ac65755 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 12:38:40 -0700 Subject: [PATCH 06/19] Python(feat): report assertion message in report as error info for pytest plugin (#587) --- python/docs/examples/pytest_plugin.md | 12 ++-- .../docs/examples/pytest_plugin_quickstart.md | 13 +++-- .../pytest_plugin/pass_fail_behavior.md | 6 +- python/examples/pytest_plugin/README.md | 15 ++--- .../tests/with_sift/test_with_sift_demo.py | 17 +++--- .../pytest_plugin/_step_status_capture.py | 10 ++++ .../_tests/pytest_plugin/test_pass_fail.py | 55 +++++++++++++++++++ .../_tests/util/test_test_results_utils.py | 6 +- python/lib/sift_client/pytest_plugin.py | 6 +- .../util/test_results/context_manager.py | 47 +++++++++++++--- 10 files changed, 150 insertions(+), 37 deletions(-) diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index 5a40d450d..69dde25ae 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -86,7 +86,7 @@ def sift_client() -> SiftClient: | Name | Kind | Scope | Purpose | |---|---|---|---| | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | -| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, and `current_step`. | +| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. | | `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently — see [ini options](#ini-options). | | `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | @@ -263,13 +263,15 @@ def test_no_fixtures_still_creates_a_step(): def test_measure_a_single_value(step): """Take `step` explicitly when you want to record a measurement.""" voltage = 4.97 - passed = step.measure( + step.measure( name="battery_voltage", value=voltage, bounds={"min": 4.8, "max": 5.2}, unit="V", ) - assert passed, f"voltage {voltage}V out of bounds" + # An out-of-bounds measurement already marks the step FAILED. Call this at + # the end to also fail pytest, without an assertion message in error_info. + step.fail_if_measurements_failed() def test_measure_strings_and_booleans(step): @@ -612,8 +614,8 @@ def test_only_outliers_recorded(step): unit="psi", ) # Returns False because 99.9 is out of bounds. The step is already - # marked failed; raise here only if you also want pytest to fail. - assert all_in_bounds + # marked failed; call this only if you also want pytest to fail. + step.fail_if_measurements_failed() ``` !!! note "`measure_all` requires at least one bound" diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md index 54328c707..bd8414aa7 100644 --- a/python/docs/examples/pytest_plugin_quickstart.md +++ b/python/docs/examples/pytest_plugin_quickstart.md @@ -136,7 +136,7 @@ TestReport (FAILED, since failures propagate up from leaves) │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) ├── test_measure_series PASSED ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) - ├── test_assert_measurements_passed_at_end FAILED (pytest FAILED) + ├── test_fail_if_measurements_failed_at_end FAILED (pytest FAILED) ├── test_report_level_metadata PASSED └── TestClassStep ├── test_parametrize @@ -158,12 +158,13 @@ The `with_sift` module shows two patterns for handling measurement results: `test_failed_measurement_marks_sift_step_failed` lets the test keep passing in pytest while the Sift step is `FAILED` (useful when measurements are diagnostic data you want to collect regardless of outcome); and -`test_assert_measurements_passed_at_end` takes every measurement first and -then asserts `step.measurements_passed` once at the end, so every +`test_fail_if_measurements_failed_at_end` takes every measurement first and +then calls `step.fail_if_measurements_failed()` once at the end, so every measurement still lands in the report even when one fails. The end-of-test -assertion is the recommended pattern: asserting on an individual -`step.measure(...)` call short-circuits on the first failure and skips -every measurement that follows. Expected +call is the recommended pattern: it fails via `pytest.fail` (no assertion +noise in `error_info`), and unlike asserting on an individual +`step.measure(...)` call it does not short-circuit on the first failure and +skip every measurement that follows. Expected pytest output is `16 passed, 3 failed, 1 skipped`. Flip any of the `sift_*_step` / `sift_parametrize_nesting` flags in diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md index 6e9b1d6e3..5c0f178c2 100644 --- a/python/docs/guides/pytest_plugin/pass_fail_behavior.md +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -26,8 +26,10 @@ The statuses below come from `sift_client.sift_types.test_report.TestStatus`. | `pytest.fail("...")` from the body | `pytest.fail("intentional failure")` | `FAILED` | | Uncaught non-assertion exception | `raise ValueError("boom")` | `ERROR` | -A non-assertion exception gets its formatted traceback recorded on -`step.error_info.error_message`. +An assertion failure records the concise assertion message (the exception +line(s), no traceback frames) on `step.error_info.error_message` while still +mapping to `FAILED`. A non-assertion exception gets its formatted traceback +recorded on `step.error_info.error_message`. ## Hard exits diff --git a/python/examples/pytest_plugin/README.md b/python/examples/pytest_plugin/README.md index c74a9c939..6eeaf9a34 100644 --- a/python/examples/pytest_plugin/README.md +++ b/python/examples/pytest_plugin/README.md @@ -75,7 +75,7 @@ TestReport (FAILED, since failures propagate up from leaves) │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) ├── test_measure_series PASSED ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) - ├── test_assert_measurements_passed_at_end FAILED (pytest FAILED) + ├── test_fail_if_measurements_failed_at_end FAILED (pytest FAILED) ├── test_report_level_metadata PASSED └── TestClassStep ├── test_parametrize @@ -97,12 +97,13 @@ The `with_sift` module shows two patterns for handling measurement results: `test_failed_measurement_marks_sift_step_failed` lets the test keep passing in pytest while the Sift step is `FAILED` (useful when measurements are diagnostic data you want to collect regardless of outcome); and -`test_assert_measurements_passed_at_end` takes every measurement first and -then asserts `step.measurements_passed` once at the end, so every +`test_fail_if_measurements_failed_at_end` takes every measurement first and +then calls `step.fail_if_measurements_failed()` once at the end, so every measurement still lands in the report even when one fails. The end-of-test -assertion is the recommended pattern: asserting on an individual -`step.measure(...)` call short-circuits on the first failure and skips -every measurement that follows. Expected +call is the recommended pattern: it fails via `pytest.fail` (no assertion +noise in `error_info`), and unlike asserting on an individual +`step.measure(...)` call it does not short-circuit on the first failure and +skip every measurement that follows. Expected pytest output is `16 passed, 3 failed, 1 skipped`. Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in @@ -115,5 +116,5 @@ Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in | `conftest.py` | Plugin registration via `pytest_plugins`; optional `load_dotenv()` | | `pytest.ini` | The four nesting flags + git metadata flag at their defaults | | `tests/pytest_only/test_pytest_only_demo.py` | Plain pytest tests with no Sift APIs. The plugin captures pass/fail automatically; covers functions, fixtures, parametrize, classes, plus one each of `AssertionError` (FAILED), `pytest.skip` (SKIPPED), and a raised `ValueError` (ERROR) | -| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `assert step.measurements_passed` end-of-test pattern that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | +| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `step.fail_if_measurements_failed()` end-of-test call that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | | `tests/{pytest_only,with_sift}/__init__.py` | Each Python package (directory with `__init__.py`) becomes a parent step in the report tree | diff --git a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py index 34bf602b7..ee3eef513 100644 --- a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py +++ b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py @@ -94,24 +94,25 @@ def test_failed_measurement_marks_sift_step_failed(step) -> None: ) -def test_assert_measurements_passed_at_end(step) -> None: - """Recommended pattern: take every measurement first, then assert - ``step.measurements_passed`` once at the end. +def test_fail_if_measurements_failed_at_end(step) -> None: + """Recommended pattern: take every measurement first, then call + ``step.fail_if_measurements_failed()`` once at the end. Asserting on individual ``step.measure(...)`` calls raises ``AssertionError`` on the first failure, so any measurements after the failing one never run and never land in the Sift report. The end-of-test - assertion is strictly better for diagnostic completeness: every - measurement is recorded, including the failures, and the aggregate - result is then folded into the pytest outcome. + call is strictly better for diagnostic completeness: every measurement is + recorded, including the failures, and the aggregate result is then folded + into the pytest outcome. It fails via ``pytest.fail`` rather than an + assertion, so the failed step carries no assertion noise in ``error_info``. The ``b`` measurement below is deliberately out of bounds. ``c`` still - runs and is recorded; only the final ``assert`` fires. + runs and is recorded; only the final call fails the test. """ step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0}) step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0}) # out of bounds step.measure(name="c", value=1.5, bounds={"min": 0.0, "max": 2.0}) # still recorded - assert step.measurements_passed, "one or more measurements out of bounds" + step.fail_if_measurements_failed() def test_report_level_metadata(step, report_context) -> None: diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py index e92d1726e..77e09bdf5 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -27,6 +27,7 @@ class CapturedStep: step_path: str parent_step_id: str | None statuses: list[TestStatus] = field(default_factory=list) + error_messages: list[str] = field(default_factory=list) _PROTO_STATUS_NAMES = { @@ -58,6 +59,7 @@ def parse_log(log_path: Path) -> dict[str, CapturedStep]: for request_type, response_id, json_str in iter_log_data_lines(log_path): payload = json.loads(json_str) test_step = payload.get("testStep", {}) + error_message = test_step.get("errorInfo", {}).get("errorMessage") if request_type == "CreateTestStep" and response_id: steps[response_id] = CapturedStep( step_id=response_id, @@ -65,12 +67,15 @@ def parse_log(log_path: Path) -> dict[str, CapturedStep]: step_path=test_step.get("stepPath", ""), parent_step_id=test_step.get("parentStepId") or None, statuses=[_status(test_step.get("status"))], + error_messages=[error_message] if error_message else [], ) elif request_type == "UpdateTestStep": step_id = test_step.get("testStepId") new_status = test_step.get("status") if step_id and step_id in steps and new_status is not None: steps[step_id].statuses.append(_status(new_status)) + if error_message: + steps[step_id].error_messages.append(error_message) return steps @@ -117,6 +122,11 @@ def final_status(name: str) -> TestStatus | None: return step.statuses[-1] if step and step.statuses else None +def final_error_message(name: str) -> str | None: + step = test_step(name) + return step.error_messages[-1] if step and step.error_messages else None + + def load_steps(log_path: Path) -> list[dict]: """Load the offline log as a list of step records keyed by hierarchy fields. diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index 0e1540ce7..d5f9674ce 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -91,6 +91,12 @@ def test_x(): """, ) assert capture.final_status("test_x") == TestStatus.FAILED + # The concise assertion message is recorded on error_info for the UI, but + # without the full traceback frames. + message = capture.final_error_message("test_x") + assert message is not None + assert "assert 1 == 2" in message + assert "Traceback (most recent call last)" not in message def test_generic_exception_maps_to_error(inner): @@ -131,6 +137,34 @@ def test_x(): assert capture.final_status("test_x") == TestStatus.FAILED +def test_fail_if_measurements_failed_fails_without_error_info(inner): + # An out-of-bounds measurement plus step.fail_if_measurements_failed() + # fails the test via pytest.fail, so the step is FAILED with no assertion + # message in error_info (the reason this helper exists over `assert`). + _run( + inner, + """ + def test_x(step): + step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0}) + step.fail_if_measurements_failed() + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + assert capture.final_error_message("test_x") is None + + +def test_fail_if_measurements_failed_passes_when_in_bounds(inner): + _run( + inner, + """ + def test_x(step): + step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0}) + step.fail_if_measurements_failed() + """, + ) + assert capture.final_status("test_x") == TestStatus.PASSED + + def test_keyboard_interrupt_leaves_step_in_progress(inner): # Case: CALL-06 # KeyboardInterrupt aborts the session before the call-phase makereport @@ -174,6 +208,27 @@ def test_x(step): assert test_x.statuses[-1] == TestStatus.FAILED +def test_substep_assert_failure_records_message_with_failed(inner): + # Case: CALL-02 (substep). A substep inherits assertion_as_fail_not_error + # from the autouse step (False under pytest), so a failed assertion in a + # substep resolves to FAILED and records the concise assertion message. + _run( + inner, + """ + def test_x(step): + with step.substep(name="inner"): + assert 1 == 2 + """, + ) + inner_sub = next(iter(capture.steps_by_name("inner")), None) + assert inner_sub is not None + assert inner_sub.statuses[-1] == TestStatus.FAILED + assert inner_sub.error_messages + message = inner_sub.error_messages[-1] + assert "assert 1 == 2" in message + assert "Traceback (most recent call last)" not in message + + # --------------------------------------------------------------------------- # Skip paths # --------------------------------------------------------------------------- diff --git a/python/lib/sift_client/_tests/util/test_test_results_utils.py b/python/lib/sift_client/_tests/util/test_test_results_utils.py index 4fd6ab112..c41587314 100644 --- a/python/lib/sift_client/_tests/util/test_test_results_utils.py +++ b/python/lib/sift_client/_tests/util/test_test_results_utils.py @@ -463,7 +463,11 @@ def test_bad_assert(self, report_context, step): assert parent_step.status == TestStatus.FAILED assert substep.status == TestStatus.FAILED assert nested_substep.status == TestStatus.FAILED - assert nested_substep.error_info is None + # The assertion-as-fail path records the concise assertion message (no + # traceback frames) on error_info while keeping the FAILED status. + assert nested_substep.error_info is not None + assert "AssertionError" in nested_substep.error_info.error_message + assert "Traceback (most recent call last)" not in nested_substep.error_info.error_message assert nested_substep_2.status == TestStatus.ERROR assert "AssertionError" in nested_substep_2.error_info.error_message assert sibling_substep.status == TestStatus.PASSED diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index c3b303ac8..09aca5e33 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -14,7 +14,10 @@ from sift_client.errors import SiftWarning from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext -from sift_client.util.test_results.context_manager import format_truncated_traceback +from sift_client.util.test_results.context_manager import ( + format_assertion_message, + format_truncated_traceback, +) class SiftPytestPluginWarning(SiftWarning): @@ -588,6 +591,7 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: status = TestStatus.FAILED elif isinstance(excinfo.value, AssertionError): status = TestStatus.FAILED + error_info = format_assertion_message(excinfo.type, excinfo.value) elif isinstance(excinfo.value, pytest.fail.Exception): status = TestStatus.FAILED elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 3454ef5e2..48a89b2d9 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -54,6 +54,20 @@ def format_truncated_traceback( return ErrorInfo(error_code=1, error_message="".join(stack)) +def format_assertion_message( + exc: type[BaseException] | None, + exc_value: BaseException | None, +) -> ErrorInfo: + """Format an ErrorInfo from just the exception line(s), no traceback frames. + + For assertion failures the rewritten ``assert`` explanation lives on the + exception itself, so stack frames add noise without information. Equivalent + to pytest's ``excinfo.exconly()``. + """ + lines = traceback.format_exception_only(exc, exc_value) # type: ignore[arg-type] + return ErrorInfo(error_code=1, error_message="".join(lines)) + + def log_replay_instructions(log_file: str | Path | None) -> None: """Surface replay instructions when an import/replay attempt fails. @@ -465,15 +479,31 @@ def measurements_passed(self) -> bool: """True if every measurement recorded directly on this step has passed. Counts only ``step.measure``, ``step.measure_avg``, and - ``step.measure_all`` calls on this ``NewStep`` instance. Useful for - the ``assert step.measurements_passed`` pattern at the end of a test - when you want to fail pytest on any out-of-bounds measurement - without short-circuiting on the first failure (asserting on - individual ``measure(...)`` return values skips every measurement - after the failing one). + ``step.measure_all`` calls on this ``NewStep`` instance. Pair it with + ``fail_if_measurements_failed()`` at the end of a test to fail pytest on + any out-of-bounds measurement without short-circuiting on the first + failure (asserting on individual ``measure(...)`` return values skips + every measurement after the failing one). """ return self._failed_measurement_count == 0 + def fail_if_measurements_failed( + self, message: str = "one or more measurements out of bounds" + ) -> None: + """Fail the pytest test if any measurement on this step was out of bounds. + + Use instead of ``assert step.measurements_passed``: it fails via + ``pytest.fail`` so the step resolves to FAILED without attaching an + assertion message to ``error_info``. No-op when every measurement + passed. Call once at the end of the test so every measurement is still + recorded before the failure fires. + """ + if self.measurements_passed: + return + import pytest + + pytest.fail(message, pytrace=False) + def update_step_from_result( self, exc: type[Exception] | None, @@ -501,8 +531,11 @@ def update_step_from_result( errored = False if exc: if isinstance(exc_value, AssertionError) and not self.assertion_as_fail_not_error: - # If we're not showing assertion errors (i.e. pytest), mark step as failed but don't set error info. + # pytest-style: an assertion is a plain failure, not an error. Record the + # failure and attach the concise assertion message (no traceback) so the + # UI can show what was asserted. self.report_context.record_step_outcome(False, current_step) + error_info = format_assertion_message(exc, exc_value) elif isinstance(exc_value, (KeyboardInterrupt, SystemExit)): # Hard exit propagating through the substep stack: record as # ABORTED so every in-progress step on the way out reflects From 9f8f09dcf75f06774c995e28edd51ec83c86f6a9 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 13:30:38 -0700 Subject: [PATCH 07/19] Python(chore): pytest docs reorganization (#589) --- python/docs/examples/index.md | 5 +- python/docs/examples/pytest_plugin.md | 828 +----------------- .../docs/examples/pytest_plugin_quickstart.md | 8 +- python/docs/guides/index.md | 11 + .../guides/pytest_plugin/configuration.md | 220 +++++ python/docs/guides/pytest_plugin/index.md | 122 +++ .../pytest_plugin/pass_fail_behavior.md | 19 +- .../guides/pytest_plugin/report_structure.md | 421 +++++++++ .../guides/pytest_plugin/running_modes.md | 138 +++ python/mkdocs.yml | 11 +- 10 files changed, 957 insertions(+), 826 deletions(-) create mode 100644 python/docs/guides/index.md create mode 100644 python/docs/guides/pytest_plugin/configuration.md create mode 100644 python/docs/guides/pytest_plugin/index.md create mode 100644 python/docs/guides/pytest_plugin/report_structure.md create mode 100644 python/docs/guides/pytest_plugin/running_modes.md diff --git a/python/docs/examples/index.md b/python/docs/examples/index.md index 936a35cfd..baf2601e5 100644 --- a/python/docs/examples/index.md +++ b/python/docs/examples/index.md @@ -6,9 +6,12 @@ This section contains interactive Jupyter notebook examples demonstrating how to - **[Basic Usage](basic.ipynb)** - Introduction to the Sift Python client, covering basic operations and API usage - **[Data Ingestion](ingestion.ipynb)** - Learn how to ingest telemetry data into Sift using various methods -- **[Pytest Plugin](pytest_plugin.md)** - Turn a pytest run into a Sift TestReport with measurements, nested steps, and pass/fail outcomes - **[Pytest Plugin Quickstart](pytest_plugin_quickstart.md)** - Guided tour of the runnable demo project under `python/examples/pytest_plugin/` +For the conceptual reference on the pytest plugin (fixtures, configuration, +report structure, and pass/fail behavior), see the +[Pytest Plugin guide](../guides/pytest_plugin/index.md). + ## Running Examples Locally To run these examples on your local machine: diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index 69dde25ae..986e05e1e 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -1,822 +1,14 @@ -# Pytest Plugin - -The Sift Python client ships a pytest plugin that turns a pytest run into a -`TestReport` in Sift. Each test function becomes a `TestStep`, measurements -land as rows under that step, and failures propagate up through nested -substeps to the report itself. - -This page walks through wiring the plugin into a project, the fixtures and -hooks it provides, and the patterns you'll use day-to-day. - -!!! info "Where the plugin lives" - The plugin lives at `sift_client.pytest_plugin`. It is - **not** registered as a `pytest11` entry point. Projects opt in with a - `pytest_plugins` declaration in their top-level `conftest.py`. Pytest - then loads the module as a real plugin: the fixtures, CLI options, and - `pytest_runtest_makereport` hook all register through standard pytest - machinery, so `pytest --trace-config` lists it and - `pytest -p no:sift_client.pytest_plugin` disables it. - -## Install - -```bash -pip install sift-stack-py pytest python-dotenv -``` - -Set the connection details in a `.env` next to your tests: - -```bash -SIFT_API_KEY="your-api-key" -SIFT_GRPC_URI="..." -SIFT_REST_URI="..." -``` - -The `SIFT_GRPC_URI` and `SIFT_REST_URI` are the gRPC and REST endpoints for your Sift organization. You can find these on the Sift Manage page as well as generate an API key. - -## Wire the plugin into `conftest.py` - -A single `pytest_plugins` declaration in your top-level `conftest.py` is all -that's required. The plugin ships a default `sift_client` fixture that reads -`SIFT_API_KEY`, `SIFT_GRPC_URI`, and `SIFT_REST_URI` from the environment. - -```python title="conftest.py" -from dotenv import load_dotenv - -load_dotenv() - -pytest_plugins = ["sift_client.pytest_plugin"] -``` - -That's the whole setup. Every test in the session will now create a step on a -single shared `TestReport`. - -### Customizing the `SiftClient` - -To construct the client differently (custom TLS, timeouts, alternate -credentials, etc.), override the `sift_client` fixture in your conftest. The -plugin's default falls away in favor of your definition. - -```python title="conftest.py" -import os - -import pytest -from dotenv import load_dotenv - -from sift_client import SiftClient, SiftConnectionConfig - -load_dotenv() - -pytest_plugins = ["sift_client.pytest_plugin"] - - -@pytest.fixture(scope="session") -def sift_client() -> SiftClient: - return SiftClient( - connection_config=SiftConnectionConfig( - api_key=os.getenv("SIFT_API_KEY"), - grpc_url=os.getenv("SIFT_GRPC_URI"), - rest_url=os.getenv("SIFT_REST_URI"), - use_ssl=False, - ) - ) -``` - -## Plugin provided fixtures - -| Name | Kind | Scope | Purpose | -|---|---|---|---| -| `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | -| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. | -| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently — see [ini options](#ini-options). | -| `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | -| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | - -### CLI options - -| Flag | Default | Effect | -|---|---|---| -| `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. | -| `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). | -| `--sift-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. | -| `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | - -These can be passed permanently via `addopts`: - -```ini title="pytest.ini" -[pytest] -addopts = --sift-offline -``` - -Or set the matching ini key directly (recommended for stable per-project -configuration). Each CLI flag has a corresponding key under -`[tool.pytest.ini_options]` in `pyproject.toml` or `[pytest]` in `pytest.ini`. -CLI flags, when passed, override the ini values. - -| Ini key | Type | Equivalent CLI flag | -|---|---|---| -| `sift_log_file` | string (`true` / `false` / `none` / path) | `--sift-log-file=` | -| `sift_git_metadata` | bool (default `true`) | `--no-sift-git-metadata` (sets to `false`) | -| `sift_offline` | bool (default `false`) | `--sift-offline` | -| `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) | -| `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | -| `sift_package_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each Python package (directory with `__init__.py`) in the test path. | -| `sift_module_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each test module (file). | -| `sift_class_step` | bool (default `true`) | _(ini-only)_ — open a parent step for each test class, including nested classes. | -| `sift_parametrize_nesting` | bool (default `true`) | _(ini-only)_ — cluster parametrized tests under shared parents (`test_x → axis=value`) instead of flat leaves (`test_x[value]`). | - -The default `sift_client` fixture reads its two URIs from environment first -and falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is -intentionally env-only — keep it out of source control and supply it through -`pytest-dotenv` (see [API key handling](#api-key-handling) below). The env -var wins when both are set, so secrets injected into a CI environment -continue to override values committed to `pyproject.toml`. There are no CLI -flags for credentials. - -| Ini key | Environment variable | Notes | -|---|---|---| -| _(none)_ | `SIFT_API_KEY` | Env-only. Use `.env` + `pytest-dotenv` locally; inject from your secret store in CI. | -| `sift_grpc_uri` | `SIFT_GRPC_URI` | Stable per-org gRPC endpoint; safe to commit. | -| `sift_rest_uri` | `SIFT_REST_URI` | Stable per-org REST endpoint; safe to commit. | - -```toml title="pyproject.toml" -[tool.pytest.ini_options] -sift_offline = true -sift_git_metadata = false -sift_grpc_uri = "your-org.sift.example:443" -sift_rest_uri = "https://your-org.sift.example" -``` - -```ini title="pytest.ini" -[pytest] -sift_offline = true -sift_git_metadata = false -sift_grpc_uri = your-org.sift.example:443 -sift_rest_uri = https://your-org.sift.example -``` - -#### API key handling - -`SIFT_API_KEY` is deliberately read from the process environment only. The -recommended workflow uses the -[`pytest-dotenv`](https://pypi.org/project/pytest-dotenv/) plugin (already a -dependency of `sift-stack-py`), which loads variables from a `.env` file -into `os.environ` before tests run. - -1. Add `.env` to `.gitignore`. -2. Drop your key into `.env` at the project root: - - ```bash title=".env" - SIFT_API_KEY=sk-...your-key... - ``` - -3. In CI, set `SIFT_API_KEY` directly via your provider's secret manager - instead of committing a `.env` file. - -`pytest-dotenv` picks the file up automatically; no `pytest_configure` -glue is needed. - -!!! warning "FedRAMP / shared environments" - Pass `--sift-log-file=false` (or set the ini key to `"false"`) - to skip the temp file + worker pipeline. Create/update calls then run - inline against the API instead of being deferred through a subprocess. - -### Report metadata captured automatically - -Every report the plugin creates includes: - -- `name` and `test_case`: derived from the first positional argument to `pytest`. When it resolves to an existing path the plugin uses the basename for `name` and the full path string for `test_case`; otherwise both fall back to `pytest `. `name` always has a UTC ISO timestamp appended. See examples below. -- `test_system_name`: `socket.gethostname()`. -- `system_operator`: `getpass.getuser()`. -- `start_time` / `end_time`: set on session enter/exit. -- `status`: starts at `IN_PROGRESS`, finalized to `PASSED` or `FAILED` on session exit (failure if any step failed or an exception escaped the session). -- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-git-metadata` or when not in a git repo. - -Example invocations: - -| Pytest invocation | Report `name` | Report `test_case` | -|---|---|---| -| `pytest tests/test_battery.py` | `test_battery.py 2026-05-04T12:00:00.123456+00:00` | `tests/test_battery.py` | -| `pytest tests/` | `tests 2026-05-04T12:00:00.123456+00:00` | `tests` | -| `pytest -k voltage` | `pytest -k voltage 2026-05-04T12:00:00.123456+00:00` | `pytest -k voltage` | - -To override defaults (e.g. set a serial number, system operator, or extra -metadata), call `report_context.report.update({...})` from any test or -fixture. See [Linking a Run](#linking-a-run-to-the-report) for the same -pattern applied to `run_id`. - -## Controlling which tests produce reports - -By default every test in the session produces a Sift step. Two markers -and one ini key let you narrow that to a specific set of tests, which is -useful when a repo holds tests that you don't want included in the Sift test report. - -| Setting | Effect | -|---------------------------------------------------------|----------------------------------------------------------------------------------------------| -| `sift_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. | -| `@pytest.mark.sift_include` on a test, class, or module | Force reporting on for that scope, regardless of the project default. | -| `@pytest.mark.sift_exclude` on a test, class, or module | Force reporting off for that scope, regardless of the project default. | - -Closest marker determines setting. `sift_exclude` beats `sift_include` when both apply. -`pytestmark` at the class or module level inherits to every test in scope. - -### Bulk-applying a marker to a directory - -To opt an entire directory in (or out) without editing each file, hook -`pytest_collection_modifyitems` in the directory's `conftest.py`: - -```python title="tests/example/conftest.py" -from pathlib import Path - -import pytest - -_HERE = Path(__file__).parent - - -def pytest_collection_modifyitems(config, items): - for item in items: - try: - item.path.relative_to(_HERE) - except ValueError: - continue - item.add_marker(pytest.mark.sift_include) -``` - -This applies `sift_include` to every test collected under `tests/example/`. -Combine with `sift_autouse = false` in `pyproject.toml` for -opting in to specific directories. - -`pytest_collection_modifyitems` receives every item in the session, not just -this directory's, so the `relative_to` filter is what scopes the marker. - -## Basic usage - -With the conftest in place, the simplest test needs nothing extra. The `step` -fixture is `autouse=True` and pytest test failures and skips are mapped to -step statuses automatically. - -```python title="test_basic.py" -def test_no_fixtures_still_creates_a_step(): - """Autouse `step` records this function as a step on the session report.""" - assert 1 + 1 == 2 - - -def test_measure_a_single_value(step): - """Take `step` explicitly when you want to record a measurement.""" - voltage = 4.97 - step.measure( - name="battery_voltage", - value=voltage, - bounds={"min": 4.8, "max": 5.2}, - unit="V", - ) - # An out-of-bounds measurement already marks the step FAILED. Call this at - # the end to also fail pytest, without an assertion message in error_info. - step.fail_if_measurements_failed() - - -def test_measure_strings_and_booleans(step): - """`bounds` accepts a string or `True`/`False` for non-numeric values.""" - step.measure(name="firmware_version", value="1.4.2", bounds="1.4.2") - step.measure(name="self_test_passed", value=True, bounds=True) - - -def test_docstring_becomes_step_description(step): - """This docstring is the step's description in Sift. - - The plugin pulls `request.node.obj.__doc__` when it creates the step. - Helper functions called from within the test do not get this treatment; - pass `description="..."` explicitly on `substep(...)` instead. - """ - assert step.current_step.description is not None -``` - -!!! tip "Measurements never raise" - `step.measure(...)` returns `True` if the value is in bounds and `False` - otherwise. A `False` result marks the enclosing step as failed but does - not raise. Chain measurements freely and inspect the boolean if you need - custom flow control. - -### Status semantics for failures - -The plugin uses the step exit handler in `NewStep.__exit__` to translate test -outcomes into `TestStatus`: - -| Outcome | Resulting `TestStatus` | -|---|---| -| In-bounds measurements only | `PASSED` | -| Failed measurement, failed `report_outcome`, failed substep, or `AssertionError` raised by the test | `FAILED` (no traceback is attached, since pytest already prints it in the runner output) | -| Non-`AssertionError` exception escapes the test (e.g. `ValueError`, `TimeoutError`) | `ERROR`, with the formatted traceback (last 10 frames plus the first frame) on `step.error_info.error_message` | -| Manual `step.current_step.update({"status": ...})` | Whatever you set; the step exit handler honors a manually-resolved status | - -For the full contract, including skips, xfail/xpass, hard exits (`SystemExit`, -`KeyboardInterrupt`), setup/teardown phase failures, and propagation rules, -see the [Pass/Fail Behavior guide](../guides/pytest_plugin/pass_fail_behavior.md). - -A failure or error at any depth propagates upward: the parent substep, the -function step, the class/module/package steps above it, and the session -report all get marked failed. - -## Nested steps - -Use `step.substep(name=...)` to open a child step. Substeps nest arbitrarily -deep, and a failure at any depth propagates up to fail the parent and the -report. - -```python title="test_nested_steps.py" -import time - - -def test_phased_check(step): - """Phase a single test into setup/exercise/verify substeps.""" - with step.substep(name="setup", description="Power on and wait for boot") as setup: - setup.measure(name="boot_time_s", value=2.1, bounds={"max": 5.0}, unit="s") - - with step.substep(name="exercise", description="Drive the test sequence"): - time.sleep(0.01) - - with step.substep(name="verify", description="Read final state") as verify: - verify.measure(name="final_state", value="IDLE", bounds="IDLE") - - -def test_deeply_nested(step): - """A failure at the bottom fails everyone above it.""" - with step.substep(name="level_1") as l1: - with l1.substep(name="level_2") as l2: - with l2.substep(name="level_3") as l3: - l3.measure(name="leaf_value", value=42, bounds={"min": 0, "max": 100}) -``` - -Each step gets a hierarchical `step_path` (`1`, `1.1`, `1.1.2`, `2`, …) -assigned by `ReportContext`. Sibling substeps within the same parent -auto-increment; opening a new top-level step starts a new branch. - -### Mirroring the test layout - -The plugin opens a parent step for each Python package (`__init__.py` -directory), test file, and test class above every test, plus a parent step -for each `@pytest.mark.parametrize` axis. Every layer is on by default and -individually opt-out via ini flags (`sift_package_step`, `sift_module_step`, -`sift_class_step`, `sift_parametrize_nesting`). Class/module/package -docstrings become the matching step's description. - -### Linking a Run to the report - -`report_context` is the session-scoped fixture; mutating it in one test -affects the whole report. - -```python -def test_link_run_to_report(report_context, sift_client): - run = sift_client.runs.create(...) # however you create your run - report_context.report.update({"run_id": run.id_}) -``` - -The same `update({...})` pattern works for any field on `TestReportUpdate`, -including `serial_number`, `part_number`, `system_operator`, and `metadata`. - -## How pytest layout maps to a Sift report - -The plugin builds the report tree by hooking pytest's collection: every test -node it sees becomes a step. What you control is which constructs create -nodes and where you nest substeps inside them. Common layouts and the -resulting report trees: - -### Flat module of test functions - -The default. Each function is one step directly under the report. - -```python title="test_battery.py" -def test_voltage(step): ... -def test_current(step): ... -def test_temperature(step): ... -``` +--- +hide: + - navigation + - toc +--- -```text title="Sift report" -TestReport -├── test_voltage -├── test_current -└── test_temperature -``` + -### Modules nested under a package - -Two test files under the same Python package (directory with `__init__.py`) -share that package step as their parent. - -```python title="suites/__init__.py" -``` - -```python title="suites/test_battery.py" -def test_voltage(step): ... -def test_current(step): ... -``` - -```python title="suites/test_thermal.py" -def test_idle_temp(step): ... -def test_load_temp(step): ... -``` - -```text title="Sift report" -TestReport -└── suites - ├── test_battery.py - │ ├── test_voltage - │ └── test_current - └── test_thermal.py - ├── test_idle_temp - └── test_load_temp -``` - -### Test classes (and nested classes) - -`class TestFoo:` and `class TestOuter: class TestInner:` produce class and -nested class steps automatically — no manual fixture needed. - -```python title="test_charging.py" -class TestCharging: - """Charging subsystem.""" - - def test_starts_at_zero(self, step): ... - def test_reaches_full(self, step): ... - def test_thermal_throttle(self, step): ... -``` - -```text title="Sift report" -TestReport -└── test_charging.py - └── TestCharging - ├── test_starts_at_zero - ├── test_reaches_full - └── test_thermal_throttle -``` - -The class's docstring becomes the step description. - -### Parametrized tests - -Parametrized tests cluster under a parent step named after the test function, -with one inner parent per parametrize axis (outer-to-inner in -decorator-on-page order). Stacked parametrize produces nested step levels. - -```python -@pytest.mark.parametrize("voltage", [3.3, 5.0, 12.0]) -def test_rail(step, voltage): - step.measure(name="rail_v", value=voltage, bounds={"min": 0.0}) -``` - -```text title="Sift report" -TestReport -└── test_module.py - └── test_rail - ├── voltage=3.3 - ├── voltage=5.0 - └── voltage=12.0 -``` - -Stacked parametrize: - -```python -@pytest.mark.parametrize("voltage", ["high", "low"]) -@pytest.mark.parametrize("component", ["motor", "valve"]) -def test_iso(step, voltage, component): ... -``` - -```text title="Sift report" -TestReport -└── test_module.py - └── test_iso - ├── voltage='high' - │ ├── component='motor' - │ └── component='valve' - └── voltage='low' - ├── component='motor' - └── component='valve' -``` - -Set `sift_parametrize_nesting = false` in `pytest.ini` to fall back to flat -leaf names (`test_rail[3.3]`). - -### Helper functions - -Helpers called from a test do not auto-create a step. The plugin only sees -pytest-collected nodes. To represent helper work in the report, open a -substep at the call site and pass it into the helper: - -```python -def measure_rail(step, name, value, bounds): - return step.measure(name=name, value=value, bounds=bounds, unit="V") - - -def test_power_rails(step): - with step.substep(name="3.3V rail") as rail_3v3: - measure_rail(rail_3v3, "rail_v", 3.31, {"min": 3.2, "max": 3.4}) - - with step.substep(name="5V rail") as rail_5v: - measure_rail(rail_5v, "rail_v", 5.02, {"min": 4.9, "max": 5.1}) -``` - -```text title="Sift report" -TestReport -└── test_power_rails - ├── 3.3V rail - │ └── rail_v (measurement) - └── 5V rail - └── rail_v (measurement) -``` - -!!! tip "Docstring-as-description is top-level only" - The plugin reads the test function's docstring and uses it as the step - description. Docstrings on helper functions are not picked up. Pass - `description="..."` explicitly on `substep(...)` if you want one. - -### Fixtures that contribute steps - -A fixture can open its own substep around setup/teardown by using `step` (for -function-scope) or `report_context.new_step(...)` (for any scope). The substep -ends when the fixture's `yield` returns, which makes the report tree mirror -the lifecycle. - -```python -@pytest.fixture -def warmed_up_dut(step): - with step.substep(name="warmup", description="Bring DUT to operating temp"): - # ... do warmup work ... - yield "dut-handle" - - -def test_steady_state(step, warmed_up_dut): - step.measure(name="temp_c", value=37.2, bounds={"min": 35.0, "max": 40.0}) -``` - -```text title="Sift report" -TestReport -└── test_steady_state - ├── warmup (from fixture) - └── temp_c (measurement) -``` - -## Measurement variants - -`step.measure(...)` records exactly one measurement. For datasets coming off a -sensor or calculated channel, use one of the bulk variants. - -### `measure_avg`: one row, the mean - -`measure_avg` accepts a Python list, a NumPy array, or a pandas `Series`, -takes the mean, and evaluates it against bounds. - -```python -import numpy as np -import pandas as pd - - -def test_avg_with_list(step): - samples = [4.97, 5.01, 5.03, 4.99, 5.02] - step.measure_avg( - name="bus_voltage_avg", - values=samples, - bounds={"min": 4.9, "max": 5.1}, - unit="V", - ) - - -def test_avg_with_numpy(step): - samples = np.linspace(99.5, 100.5, num=50) - step.measure_avg( - name="cpu_temp_avg", - values=samples, - bounds={"min": 95.0, "max": 105.0}, - unit="C", - ) - - -def test_avg_with_pandas(step): - series = pd.Series([0.998, 1.001, 0.999, 1.002, 1.000]) - step.measure_avg( - name="reference_clock_ratio", - values=series, - bounds={"min": 0.99, "max": 1.01}, - ) -``` - -### `measure_all`: only out-of-bounds rows - -Records measurements only for samples that fail bounds, so an all-pass -dataset of N samples doesn't add N rows to the report. Returns `True` when -every sample is in bounds. - -```python -def test_only_outliers_recorded(step): - samples = [10.1, 10.2, 10.3, 99.9, 10.0, 10.1] # 99.9 is the outlier - all_in_bounds = step.measure_all( - name="pressure_psi", - values=samples, - bounds={"min": 9.0, "max": 11.0}, - unit="psi", - ) - # Returns False because 99.9 is out of bounds. The step is already - # marked failed; call this only if you also want pytest to fail. - step.fail_if_measurements_failed() -``` - -!!! note "`measure_all` requires at least one bound" - Passing `bounds={}` raises `ValueError("No bounds provided")`. At - least one of `min` or `max` must be set. - -### `report_outcome`: externally computed pass/fail - -When the decision is computed elsewhere, drop it onto the report as a -named substep with an optional reason. Returns the result you passed in, -so you can use it inline. - -```python -def test_external_checks(step): - step.report_outcome( - name="config_loaded", - result=True, - reason="loaded /etc/dut/config.yaml", - ) - - # Failures show up as a failed substep without raising. - rare_warning_seen = False - step.report_outcome( - name="no_rare_warning", - result=not rare_warning_seen, - reason="grep'd dmesg for the known-flaky warning", - ) -``` - -### Bounds reference - -| Pass to `bounds=` | Value type | Effect | -|---|---|---| -| `{"min": x, "max": y}` (either key optional) | `int` / `float` | Numeric window. One-sided is fine. | -| `NumericBounds(min=x, max=y)` | `int` / `float` | Same as the dict form, explicit. | -| `"expected-string"` | `str` (or `bool`) | Exact equality. For `bool` values, compares lowercased string (`"true"`/`"false"`). | -| `True` or `False` | `bool` (or `str`) | Exact equality. For `str` values, compares lowercased strings. | -| `None` | any | Records the value but does not evaluate it; measurement is recorded as `passed=True`. | - -The `unit` argument is a free-form string label (e.g. `"V"`, `"C"`, `"psi"`). - -## Skip handling - -- `@pytest.mark.skip` and `@pytest.mark.skipif`: the plugin's - `pytest_runtest_makereport` hook sees the skipped outcome and creates a - step with `TestStatus.SKIPPED`. -- Inside a test function, you can mark just one substep as skipped without - aborting the whole test: - - ```python - from sift_client.sift_types.test_report import TestStatus - - - def test_runtime_skip(step): - with step.substep(name="optional_calibration") as cal: - if not precondition_met(): - cal.current_step.update({"status": TestStatus.SKIPPED}) - ``` - - A manually-resolved status is honored by the step's exit handler. No - further bookkeeping required. `SKIPPED` does not propagate as a failure. - -## Running the suite - -```bash -# Full run against your Sift tenant -pytest - -# Pin the log file so you can replay it later if the import worker dies -pytest --sift-log-file=./sift-results.jsonl -``` - -See [Running modes](#running-modes) for the offline and disabled flags -that let the same suite run without (or without contacting) Sift. - -## Running modes - -The plugin runs in one of three modes, picked at invocation: - -| Mode | Flag | Network | Log file | `step.measure(...)` | When to use | -|---|---|---|---|---|---| -| Online (default) | _(none)_ | yes (pings at session start, aborts if it fails) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant | -| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, air-gapped labs, CI without network | -| Disabled | `--sift-disabled` | none | none | bounds eval; returns a real bool | local dev or CI that doesn't have (or want) Sift | - -Pass both flags? Disabled wins. It's the "skip Sift entirely" hammer and -supersedes everything else. - -### Online mode (default) - -`report_context` resolves `client_has_connection` at session start. The -default implementation calls `sift_client.ping.ping()`. A failed ping -aborts the whole session with `pytest.UsageError` and points at -`--sift-offline` and `--sift-disabled` as escape hatches. - -This is loud on purpose. A CI run that silently no-ops on a flaky network -won't get noticed until somebody goes looking for the report, which is -usually weeks later, which is usually too late. - -With the default `--sift-log-file` setting on, create/update calls are -written to a JSONL log file during the run and an -`import-test-result-log --incremental` worker replays them against Sift -in the background. If the worker crashes mid-session (connection failure, -API error) or is still draining its backlog at session end, the failure -is logged at session end with a `replay-test-result-log` command for -manual recovery — test outcomes are unaffected and the local log file is -preserved. Pass `--sift-log-file=false` to make every create/update -synchronous against the API instead. - -#### Overriding the connection check - -Override `client_has_connection` when ping isn't the right signal, for -example a token cache that's only warm when authenticated: - -```python title="conftest.py" -from pathlib import Path - -import pytest - - -@pytest.fixture(scope="session") -def client_has_connection(sift_client) -> bool: - return Path("~/.sift-token-cache").expanduser().is_file() -``` - -The override is ignored under `--sift-offline` and `--sift-disabled`. - -### Offline mode (`--sift-offline`) - -Same fixtures, same `step.measure(...)` semantics as online. The -difference is where the writes go: every create/update lands in a JSONL -log file instead of hitting the Sift API. The session-start ping is -skipped, missing `SIFT_*` env vars are tolerated (placeholders are -filled), and the replay worker (`import-test-result-log --incremental`) -does not get spawned at session end. - -```bash -pytest --sift-offline --sift-log-file=./run.jsonl -``` - -Once you have connectivity, replay it: - -```bash -import-test-result-log ./run.jsonl -``` - -That replay creates the report, steps, and measurements against Sift. -See [Replaying a saved log file](#replaying-a-saved-log-file) for cleanup -and the incremental flag. - -`--sift-log-file=none` is rejected when offline is set. The -log file is the only sink in offline mode, so without it the results are -gone. - -!!! warning "Pin the log path" - Without `--sift-log-file=`, offline mode writes to - a `tempfile.NamedTemporaryFile` and only surfaces the path via a - `logger.info` line. Pin a known path when you intend to replay later. - -### Disabled mode (`--sift-disabled`) - -The plugin stays loaded with the same fixtures and markers as the other -modes. Nothing contacts Sift, no log file is written, and no `SIFT_*` -env vars are required. `step.measure(...)`, `step.measure_avg(...)`, -`step.measure_all(...)`, `step.substep(...)`, and -`report_context.report.update({...})` all behave normally — bounds -evaluate and you get a real pass/fail boolean back. - -Entities returned in disabled mode report `is_simulated == True` (on -`TestReport`, `TestStep`, `TestMeasurement`, and `ReportContext`) so -consumers and tests can branch on provenance. Offline-mode entities -also report `is_simulated == True`. - -How to turn it on, in the order most projects pick: - -```bash -# In an .envrc, devcontainer, or CI job config -export SIFT_DISABLED=1 - -# Per-invocation kill-switch -pytest --sift-disabled - -# Per-project default (uncommon; online is usually the right default) -# pyproject.toml: -# [tool.pytest.ini_options] -# sift_disabled = true -``` - -Good fit for local dev without Sift credentials. Also for library -consumers who don't have a Sift tenant. Also useful in CI for runs that -shouldn't add noise to the report stream, like a PR job re-running the -same suite five times in a row. - -## Replaying a saved log file - -When the worker doesn't finish cleanly the plugin will print a hint mentioning -`import-test-result-log`. To import: +# Pytest Plugin -```bash -import-test-result-log -``` +This page has moved to the [Pytest Plugin guide](../guides/pytest_plugin/index.md). -That replays the saved JSONL log as a single batch (no `--incremental`) and -deletes the file when it lives under the system temp dir. \ No newline at end of file +You should be redirected automatically. If your browser does not redirect, +follow the link above. diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md index bd8414aa7..b30f282c6 100644 --- a/python/docs/examples/pytest_plugin_quickstart.md +++ b/python/docs/examples/pytest_plugin_quickstart.md @@ -8,8 +8,8 @@ axes, manual substeps, and gate markers. It also includes a tests directory that uses no Sift APIs at all, to show how the autouse fixtures capture plain pytest tests for free. -For a conceptual reference (fixtures, ini flags, status semantics), see -[Pytest Plugin](pytest_plugin.md). +For a conceptual reference (fixtures, ini flags, status semantics), see the +[Pytest Plugin guide](../guides/pytest_plugin/index.md). ## Project layout @@ -172,7 +172,7 @@ Flip any of the `sift_*_step` / `sift_parametrize_nesting` flags in ## Next steps -- [Pytest Plugin](pytest_plugin.md): conceptual reference covering fixtures, - ini flags, status semantics, and layout-mapping examples. +- [Pytest Plugin guide](../guides/pytest_plugin/index.md): conceptual reference + covering fixtures, configuration, report structure, and pass/fail behavior. - The demo's [README](https://github.com/sift-stack/sift/blob/main/python/examples/pytest_plugin/README.md) on GitHub mirrors this page and is the canonical source. diff --git a/python/docs/guides/index.md b/python/docs/guides/index.md new file mode 100644 index 000000000..105f0bb25 --- /dev/null +++ b/python/docs/guides/index.md @@ -0,0 +1,11 @@ +# Guides + +Conceptual references for the Sift Python client. Guides explain how a feature +works and how to configure it. For runnable, end-to-end walkthroughs see the +[Examples](../examples/index.md) section. + +## Available guides + +- [Pytest Plugin](pytest_plugin/index.md): turn a pytest run into a `TestReport` + in Sift. Each test becomes a `TestStep`, measurements are recorded as rows, and + failures propagate up through nested substeps to the report. diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md new file mode 100644 index 000000000..6ed78f931 --- /dev/null +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -0,0 +1,220 @@ +# Configuration & Defaults + +This page is the full reference for everything the plugin exposes: fixtures, CLI +flags, ini options, credential handling, and the markers that control which +tests report. + +!!! info "Where the plugin lives" + The plugin lives at `sift_client.pytest_plugin`. It is **not** registered as + a `pytest11` entry point. Projects opt in with a `pytest_plugins` declaration + in their top-level `conftest.py`. Pytest then loads the module as a real + plugin: the fixtures, CLI options, and `pytest_runtest_makereport` hook all + register through standard pytest machinery, so `pytest --trace-config` lists + it and `pytest -p no:sift_client.pytest_plugin` disables it. + +## Credentials + +Set the connection details in a `.env` next to your tests: + +```bash +SIFT_API_KEY="your-api-key" +SIFT_GRPC_URI="..." +SIFT_REST_URI="..." +``` + +The `SIFT_GRPC_URI` and `SIFT_REST_URI` are the gRPC and REST endpoints for your +Sift organization. You can find these on the Sift Manage page as well as +generate an API key. + +The default `sift_client` fixture reads its two URIs from environment first and +falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is +intentionally env-only, so keep it out of source control and supply it through +`pytest-dotenv` (see [API key handling](#api-key-handling) below). The env var +wins when both are set, so secrets injected into a CI environment continue to +override values committed to `pyproject.toml`. There are no CLI flags for +credentials. + +| Ini key | Environment variable | Notes | +|---|---|---| +| _(none)_ | `SIFT_API_KEY` | Env-only. Use `.env` + `pytest-dotenv` locally; inject from your secret store in CI. | +| `sift_grpc_uri` | `SIFT_GRPC_URI` | Stable per-org gRPC endpoint; safe to commit. | +| `sift_rest_uri` | `SIFT_REST_URI` | Stable per-org REST endpoint; safe to commit. | + +### API key handling + +`SIFT_API_KEY` is deliberately read from the process environment only. The +recommended workflow uses the +[`pytest-dotenv`](https://pypi.org/project/pytest-dotenv/) plugin (already a +dependency of `sift-stack-py`), which loads variables from a `.env` file into +`os.environ` before tests run. + +1. Add `.env` to `.gitignore`. +2. Drop your key into `.env` at the project root: + + ```bash title=".env" + SIFT_API_KEY=sk-...your-key... + ``` + +3. In CI, set `SIFT_API_KEY` directly via your provider's secret manager + instead of committing a `.env` file. + +`pytest-dotenv` picks the file up automatically; no `pytest_configure` glue is +needed. + +!!! warning "FedRAMP / shared environments" + Pass `--sift-log-file=false` (or set the ini key to `"false"`) to skip the + temp file + worker pipeline. Create/update calls then run inline against the + API instead of being deferred through a subprocess. + +## Wire the plugin into `conftest.py` + +A single `pytest_plugins` declaration in your top-level `conftest.py` is all +that's required. The plugin ships a default `sift_client` fixture that reads +`SIFT_API_KEY`, `SIFT_GRPC_URI`, and `SIFT_REST_URI` from the environment. + +```python title="conftest.py" +from dotenv import load_dotenv + +load_dotenv() + +pytest_plugins = ["sift_client.pytest_plugin"] +``` + +That's the whole setup. Every test in the session will now create a step on a +single shared `TestReport`. + +### Customizing the `SiftClient` + +To construct the client differently (custom TLS, timeouts, alternate +credentials, etc.), override the `sift_client` fixture in your conftest. The +plugin's default falls away in favor of your definition. + +```python title="conftest.py" +import os + +import pytest +from dotenv import load_dotenv + +from sift_client import SiftClient, SiftConnectionConfig + +load_dotenv() + +pytest_plugins = ["sift_client.pytest_plugin"] + + +@pytest.fixture(scope="session") +def sift_client() -> SiftClient: + return SiftClient( + connection_config=SiftConnectionConfig( + api_key=os.getenv("SIFT_API_KEY"), + grpc_url=os.getenv("SIFT_GRPC_URI"), + rest_url=os.getenv("SIFT_REST_URI"), + use_ssl=False, + ) + ) +``` + +## Plugin provided fixtures + +| Name | Kind | Scope | Purpose | +|---|---|---|---| +| `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | +| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. | +| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently; see [ini options](#ini-options). | +| `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | +| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | + +## CLI options + +| Flag | Default | Effect | +|---|---|---| +| `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. | +| `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). | +| `--sift-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. | +| `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | + +These can be passed permanently via `addopts`: + +```ini title="pytest.ini" +[pytest] +addopts = --sift-offline +``` + +## Ini options + +Set the matching ini key directly (recommended for stable per-project +configuration). Each CLI flag has a corresponding key under +`[tool.pytest.ini_options]` in `pyproject.toml` or `[pytest]` in `pytest.ini`. +CLI flags, when passed, override the ini values. + +| Ini key | Type | Equivalent CLI flag | +|---|---|---| +| `sift_log_file` | string (`true` / `false` / `none` / path) | `--sift-log-file=` | +| `sift_git_metadata` | bool (default `true`) | `--no-sift-git-metadata` (sets to `false`) | +| `sift_offline` | bool (default `false`) | `--sift-offline` | +| `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) | +| `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | +| `sift_package_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each Python package (directory with `__init__.py`) in the test path. | +| `sift_module_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test module (file). | +| `sift_class_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test class, including nested classes. | +| `sift_parametrize_nesting` | bool (default `true`) | _(ini-only)_. Clusters parametrized tests under shared parents (`test_x`, `axis=value`) instead of flat leaves (`test_x[value]`). | + +```toml title="pyproject.toml" +[tool.pytest.ini_options] +sift_offline = true +sift_git_metadata = false +sift_grpc_uri = "your-org.sift.example:443" +sift_rest_uri = "https://your-org.sift.example" +``` + +```ini title="pytest.ini" +[pytest] +sift_offline = true +sift_git_metadata = false +sift_grpc_uri = your-org.sift.example:443 +sift_rest_uri = https://your-org.sift.example +``` + +## Controlling which tests produce reports + +By default every test in the session produces a Sift step. Two markers and one +ini key let you narrow that to a specific set of tests, which is useful when a +repo holds tests that you don't want included in the Sift test report. + +| Setting | Effect | +|---------------------------------------------------------|----------------------------------------------------------------------------------------------| +| `sift_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. | +| `@pytest.mark.sift_include` on a test, class, or module | Force reporting on for that scope, regardless of the project default. | +| `@pytest.mark.sift_exclude` on a test, class, or module | Force reporting off for that scope, regardless of the project default. | + +Closest marker determines setting. `sift_exclude` beats `sift_include` when both apply. +`pytestmark` at the class or module level inherits to every test in scope. + +### Bulk-applying a marker to a directory + +To opt an entire directory in (or out) without editing each file, hook +`pytest_collection_modifyitems` in the directory's `conftest.py`: + +```python title="tests/example/conftest.py" +from pathlib import Path + +import pytest + +_HERE = Path(__file__).parent + + +def pytest_collection_modifyitems(config, items): + for item in items: + try: + item.path.relative_to(_HERE) + except ValueError: + continue + item.add_marker(pytest.mark.sift_include) +``` + +This applies `sift_include` to every test collected under `tests/example/`. +Combine with `sift_autouse = false` in `pyproject.toml` for opting in to +specific directories. + +`pytest_collection_modifyitems` receives every item in the session, not just +this directory's, so the `relative_to` filter is what scopes the marker. diff --git a/python/docs/guides/pytest_plugin/index.md b/python/docs/guides/pytest_plugin/index.md new file mode 100644 index 000000000..9344885b3 --- /dev/null +++ b/python/docs/guides/pytest_plugin/index.md @@ -0,0 +1,122 @@ +# Pytest Plugin + +The Sift Python client ships a pytest plugin that turns a pytest run into a +`TestReport` in Sift. Each test function becomes a `TestStep`, measurements are presented +as rows under that step, and failures propagate up through nested substeps to +the report itself. + +## Quick start + +Install the client and pytest: + +```bash +pip install sift-stack-py pytest python-dotenv +``` + +Set your connection details in a `.env` next to your tests: + +```bash title=".env" +SIFT_API_KEY="..." +SIFT_GRPC_URI="..." +SIFT_REST_URI="..." +``` + +Find these on the Sift Manage page, where you can also generate an API key. + +Register the plugin with a single `pytest_plugins` declaration in your top-level +`conftest.py`: + +```python title="conftest.py" +from dotenv import load_dotenv + +load_dotenv() + +pytest_plugins = ["sift_client.pytest_plugin"] +``` + +Write a test. The `step` fixture is `autouse`, so any test becomes a step on the +report. Take it as an argument when you want to record a measurement: + +```python title="test_battery.py" +def test_battery_voltage(step): + step.measure( + name="battery_voltage", + value=4.97, + bounds={"min": 4.8, "max": 5.2}, + unit="V", + ) + step.fail_if_measurements_failed() +``` + +Run it: + +```bash +pytest +``` + +A `TestReport` shows up in Sift once the session finishes. + +!!! tip "Fail at the end, not per measurement" + `step.measure(...)` returns a pass/fail boolean and never raises, so a + failing measurement marks the step failed without aborting the test. Take + every measurement first, then call `step.fail_if_measurements_failed()` once + at the end, so every measurement still lands in the report even when one + fails. It fails the test via `pytest.fail` (no assertion noise in + `error_info`), and unlike asserting on an individual `step.measure(...)` call + it does not short-circuit on the first failure and skip every measurement + after it. + +## Sensible defaults + +With nothing but the `conftest.py` above, you get: + +- **Full step tree.** Every Python package, test module, test class, and + parametrize axis above a test becomes a parent step, so the report mirrors + your test layout. +- **Online mode.** The plugin pings Sift at session start and streams + create/update calls to your tenant during the run. +- **Git metadata.** Repo, branch, and commit are captured on the report + automatically. + +Everything is on by default and individually overridable. See +[Configuration & Defaults](configuration.md) for the full audit of every knob, +marker, flag, and fixture. + +## Running modes + +The plugin runs in one of three modes, picked at invocation. + +| Mode | How to select | Contacts Sift | When to use | +|---|---|---|---------------------------------------------------------------| +| **Online** | default (no flag) | Yes, during the run | Default choice | +| **Offline** | `--sift-offline` | No; records to a log file for later replay | Environments without Sift access. | +| **Disabled** | `--sift-disabled` | No | Local dev. Bounds still evaluate and return a real pass/fail. | + +Online mode pings Sift once at session start and aborts if Sift is unreachable or the credentials are invalid, +so a misconfigured job fails immediately instead of silently producing no report. +During the run, every create and update is appended to a JSONL log file. +A background worker uploads new entries to Sift incrementally. +If the connection drops mid-test, the test keeps running and the log keeps writing locally. +The remaining entries can be uploaded afterward by running import-test-result-log, which the plugin prints on exit. + +See [Running Modes](running_modes.md) for the log-file and replay pipeline, +overriding the connection check, and replaying a saved log. + +## Report structure + +The report tree mirrors your test layout: packages, modules, classes, and +parametrize axes nest automatically, and you can open arbitrary substeps inside +a test. See [Report Structure](report_structure.md) for the layout-to-tree +mapping, measurement variants, and report metadata. + +## Pass/fail outcomes + +Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard +exit) maps to a `TestStatus`, and failures roll up to the parent steps and the +report. See [Pass/Fail Behavior](pass_fail_behavior.md). + +## Try the runnable demo + +The [Pytest Plugin Quickstart](../../examples/pytest_plugin_quickstart.md) walks +through a self-contained demo project that exercises every layer of the step +tree, with instructions to run it with or without a Sift tenant. diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md index 5c0f178c2..2ce3d0697 100644 --- a/python/docs/guides/pytest_plugin/pass_fail_behavior.md +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -29,7 +29,8 @@ The statuses below come from `sift_client.sift_types.test_report.TestStatus`. An assertion failure records the concise assertion message (the exception line(s), no traceback frames) on `step.error_info.error_message` while still mapping to `FAILED`. A non-assertion exception gets its formatted traceback -recorded on `step.error_info.error_message`. +(the last 10 frames plus the first frame) recorded on +`step.error_info.error_message`. ## Hard exits @@ -76,6 +77,22 @@ itself) as `ABORTED`. `SKIPPED` does not propagate as a failure. A skipped substep or test does not block its parent from resolving to `PASSED`. +Inside a test function, you can mark just one substep as skipped without +aborting the whole test: + +```python +from sift_client.sift_types.test_report import TestStatus + + +def test_runtime_skip(step): + with step.substep(name="optional_calibration") as cal: + if not precondition_met(): + cal.current_step.update({"status": TestStatus.SKIPPED}) +``` + +A manually-resolved status is honored by the step's exit handler. No further +bookkeeping required. + ## Expected failures (xfail / xpass) xfail marks declare that a test is expected to fail. The plugin follows diff --git a/python/docs/guides/pytest_plugin/report_structure.md b/python/docs/guides/pytest_plugin/report_structure.md new file mode 100644 index 000000000..811fd7cf0 --- /dev/null +++ b/python/docs/guides/pytest_plugin/report_structure.md @@ -0,0 +1,421 @@ +# Report Structure + +The report tree mirrors your test layout. Every Python package, test module, +test class, and parametrize axis above a test becomes a parent step, and you can +open arbitrary substeps inside a test. This page covers the layout-to-tree +mapping, the measurement variants you record into it, and the metadata the +plugin captures for you. + +## Recording measurements + +With the conftest in place, the simplest test needs nothing extra. The `step` +fixture is `autouse=True` and pytest test failures and skips are mapped to step +statuses automatically. + +```python title="test_basic.py" +def test_no_fixtures_still_creates_a_step(): + """Autouse `step` records this function as a step on the session report.""" + assert 1 + 1 == 2 + + +def test_measure_a_single_value(step): + """Take `step` explicitly when you want to record a measurement.""" + voltage = 4.97 + step.measure( + name="battery_voltage", + value=voltage, + bounds={"min": 4.8, "max": 5.2}, + unit="V", + ) + # An out-of-bounds measurement already marks the step FAILED. Call this at + # the end to also fail pytest, without an assertion message in error_info. + step.fail_if_measurements_failed() + + +def test_measure_strings_and_booleans(step): + """`bounds` accepts a string or `True`/`False` for non-numeric values.""" + step.measure(name="firmware_version", value="1.4.2", bounds="1.4.2") + step.measure(name="self_test_passed", value=True, bounds=True) + + +def test_docstring_becomes_step_description(step): + """This docstring is the step's description in Sift. + + The plugin pulls `request.node.obj.__doc__` when it creates the step. + Helper functions called from within the test do not get this treatment; + pass `description="..."` explicitly on `substep(...)` instead. + """ + assert step.current_step.description is not None +``` + +!!! tip "Measurements never raise" + `step.measure(...)` returns `True` if the value is in bounds and `False` + otherwise. A `False` result marks the enclosing step as failed but does not + raise. Chain measurements freely and inspect the boolean if you need custom + flow control. For how outcomes map to `TestStatus` and propagate upward, see + [Pass/Fail Behavior](pass_fail_behavior.md). + +## Nested steps + +Use `step.substep(name=...)` to open a child step. Substeps nest arbitrarily +deep, and a failure at any depth propagates up to fail the parent and the +report. + +```python title="test_nested_steps.py" +import time + + +def test_phased_check(step): + """Phase a single test into setup/exercise/verify substeps.""" + with step.substep(name="setup", description="Power on and wait for boot") as setup: + setup.measure(name="boot_time_s", value=2.1, bounds={"max": 5.0}, unit="s") + + with step.substep(name="exercise", description="Drive the test sequence"): + time.sleep(0.01) + + with step.substep(name="verify", description="Read final state") as verify: + verify.measure(name="final_state", value="IDLE", bounds="IDLE") + + +def test_deeply_nested(step): + """A failure at the bottom fails everyone above it.""" + with step.substep(name="level_1") as l1: + with l1.substep(name="level_2") as l2: + with l2.substep(name="level_3") as l3: + l3.measure(name="leaf_value", value=42, bounds={"min": 0, "max": 100}) +``` + +Each step gets a hierarchical `step_path` (`1`, `1.1`, `1.1.2`, `2`, …) assigned +by `ReportContext`. Sibling substeps within the same parent auto-increment; +opening a new top-level step starts a new branch. + +### Mirroring the test layout + +The plugin opens a parent step for each Python package (`__init__.py` +directory), test file, and test class above every test, plus a parent step for +each `@pytest.mark.parametrize` axis. Every layer is on by default and +individually opt-out via ini flags (`sift_package_step`, `sift_module_step`, +`sift_class_step`, `sift_parametrize_nesting`). Class/module/package docstrings +become the matching step's description. + +### Linking a Run to the report + +`report_context` is the session-scoped fixture; mutating it in one test affects +the whole report. + +```python +def test_link_run_to_report(report_context, sift_client): + run = sift_client.runs.create(...) # however you create your run + report_context.report.update({"run_id": run.id_}) +``` + +The same `update({...})` pattern works for any field on `TestReportUpdate`, +including `serial_number`, `part_number`, `system_operator`, and `metadata`. + +## How pytest layout maps to a Sift report + +The plugin builds the report tree by hooking pytest's collection: every test +node it sees becomes a step. What you control is which constructs create nodes +and where you nest substeps inside them. Common layouts and the resulting report +trees: + +### Flat module of test functions + +The default. Each function is one step directly under the report. + +```python title="test_battery.py" +def test_voltage(step): ... +def test_current(step): ... +def test_temperature(step): ... +``` + +```text title="Sift report" +TestReport +├── test_voltage +├── test_current +└── test_temperature +``` + +### Modules nested under a package + +Two test files under the same Python package (directory with `__init__.py`) +share that package step as their parent. + +```python title="suites/__init__.py" +``` + +```python title="suites/test_battery.py" +def test_voltage(step): ... +def test_current(step): ... +``` + +```python title="suites/test_thermal.py" +def test_idle_temp(step): ... +def test_load_temp(step): ... +``` + +```text title="Sift report" +TestReport +└── suites + ├── test_battery.py + │ ├── test_voltage + │ └── test_current + └── test_thermal.py + ├── test_idle_temp + └── test_load_temp +``` + +### Test classes (and nested classes) + +`class TestFoo:` and `class TestOuter: class TestInner:` produce class and +nested class steps automatically, with no manual fixture needed. + +```python title="test_charging.py" +class TestCharging: + """Charging subsystem.""" + + def test_starts_at_zero(self, step): ... + def test_reaches_full(self, step): ... + def test_thermal_throttle(self, step): ... +``` + +```text title="Sift report" +TestReport +└── test_charging.py + └── TestCharging + ├── test_starts_at_zero + ├── test_reaches_full + └── test_thermal_throttle +``` + +The class's docstring becomes the step description. + +### Parametrized tests + +Parametrized tests cluster under a parent step named after the test function, +with one inner parent per parametrize axis (outer-to-inner in decorator-on-page +order). Stacked parametrize produces nested step levels. + +```python +@pytest.mark.parametrize("voltage", [3.3, 5.0, 12.0]) +def test_rail(step, voltage): + step.measure(name="rail_v", value=voltage, bounds={"min": 0.0}) +``` + +```text title="Sift report" +TestReport +└── test_module.py + └── test_rail + ├── voltage=3.3 + ├── voltage=5.0 + └── voltage=12.0 +``` + +Stacked parametrize: + +```python +@pytest.mark.parametrize("voltage", ["high", "low"]) +@pytest.mark.parametrize("component", ["motor", "valve"]) +def test_iso(step, voltage, component): ... +``` + +```text title="Sift report" +TestReport +└── test_module.py + └── test_iso + ├── voltage='high' + │ ├── component='motor' + │ └── component='valve' + └── voltage='low' + ├── component='motor' + └── component='valve' +``` + +Set `sift_parametrize_nesting = false` in `pytest.ini` to fall back to flat leaf +names (`test_rail[3.3]`). + +### Helper functions + +Helpers called from a test do not auto-create a step. The plugin only sees +pytest-collected nodes. To represent helper work in the report, open a substep +at the call site and pass it into the helper: + +```python +def measure_rail(step, name, value, bounds): + return step.measure(name=name, value=value, bounds=bounds, unit="V") + + +def test_power_rails(step): + with step.substep(name="3.3V rail") as rail_3v3: + measure_rail(rail_3v3, "rail_v", 3.31, {"min": 3.2, "max": 3.4}) + + with step.substep(name="5V rail") as rail_5v: + measure_rail(rail_5v, "rail_v", 5.02, {"min": 4.9, "max": 5.1}) +``` + +```text title="Sift report" +TestReport +└── test_power_rails + ├── 3.3V rail + │ └── rail_v (measurement) + └── 5V rail + └── rail_v (measurement) +``` + +!!! tip "Docstring-as-description is top-level only" + The plugin reads the test function's docstring and uses it as the step + description. Docstrings on helper functions are not picked up. Pass + `description="..."` explicitly on `substep(...)` if you want one. + +### Fixtures that contribute steps + +A fixture can open its own substep around setup/teardown by using `step` (for +function-scope) or `report_context.new_step(...)` (for any scope). The substep +ends when the fixture's `yield` returns, which makes the report tree mirror the +lifecycle. + +```python +@pytest.fixture +def warmed_up_dut(step): + with step.substep(name="warmup", description="Bring DUT to operating temp"): + # ... do warmup work ... + yield "dut-handle" + + +def test_steady_state(step, warmed_up_dut): + step.measure(name="temp_c", value=37.2, bounds={"min": 35.0, "max": 40.0}) +``` + +```text title="Sift report" +TestReport +└── test_steady_state + ├── warmup (from fixture) + └── temp_c (measurement) +``` + +## Measurement variants + +`step.measure(...)` records exactly one measurement. For datasets coming off a +sensor or calculated channel, use one of the bulk variants. + +### `measure_avg`: one row, the mean + +`measure_avg` accepts a Python list, a NumPy array, or a pandas `Series`, takes +the mean, and evaluates it against bounds. + +```python +import numpy as np +import pandas as pd + + +def test_avg_with_list(step): + samples = [4.97, 5.01, 5.03, 4.99, 5.02] + step.measure_avg( + name="bus_voltage_avg", + values=samples, + bounds={"min": 4.9, "max": 5.1}, + unit="V", + ) + + +def test_avg_with_numpy(step): + samples = np.linspace(99.5, 100.5, num=50) + step.measure_avg( + name="cpu_temp_avg", + values=samples, + bounds={"min": 95.0, "max": 105.0}, + unit="C", + ) + + +def test_avg_with_pandas(step): + series = pd.Series([0.998, 1.001, 0.999, 1.002, 1.000]) + step.measure_avg( + name="reference_clock_ratio", + values=series, + bounds={"min": 0.99, "max": 1.01}, + ) +``` + +### `measure_all`: only out-of-bounds rows + +Records measurements only for samples that fail bounds, so an all-pass dataset +of N samples doesn't add N rows to the report. Returns `True` when every sample +is in bounds. + +```python +def test_only_outliers_recorded(step): + samples = [10.1, 10.2, 10.3, 99.9, 10.0, 10.1] # 99.9 is the outlier + all_in_bounds = step.measure_all( + name="pressure_psi", + values=samples, + bounds={"min": 9.0, "max": 11.0}, + unit="psi", + ) + # Returns False because 99.9 is out of bounds. The step is already + # marked failed; call this only if you also want pytest to fail. + step.fail_if_measurements_failed() +``` + +!!! note "`measure_all` requires at least one bound" + Passing `bounds={}` raises `ValueError("No bounds provided")`. At least one + of `min` or `max` must be set. + +### `report_outcome`: externally computed pass/fail + +When the decision is computed elsewhere, drop it onto the report as a named +substep with an optional reason. Returns the result you passed in, so you can +use it inline. + +```python +def test_external_checks(step): + step.report_outcome( + name="config_loaded", + result=True, + reason="loaded /etc/dut/config.yaml", + ) + + # Failures show up as a failed substep without raising. + rare_warning_seen = False + step.report_outcome( + name="no_rare_warning", + result=not rare_warning_seen, + reason="grep'd dmesg for the known-flaky warning", + ) +``` + +### Bounds reference + +| Pass to `bounds=` | Value type | Effect | +|---|---|---| +| `{"min": x, "max": y}` (either key optional) | `int` / `float` | Numeric window. One-sided is fine. | +| `NumericBounds(min=x, max=y)` | `int` / `float` | Same as the dict form, explicit. | +| `"expected-string"` | `str` (or `bool`) | Exact equality. For `bool` values, compares lowercased string (`"true"`/`"false"`). | +| `True` or `False` | `bool` (or `str`) | Exact equality. For `str` values, compares lowercased strings. | +| `None` | any | Records the value but does not evaluate it; measurement is recorded as `passed=True`. | + +The `unit` argument is a free-form string label (e.g. `"V"`, `"C"`, `"psi"`). + +## Report metadata captured automatically + +Every report the plugin creates includes: + +- `name` and `test_case`: derived from the first positional argument to `pytest`. When it resolves to an existing path the plugin uses the basename for `name` and the full path string for `test_case`; otherwise both fall back to `pytest `. `name` always has a UTC ISO timestamp appended. See examples below. +- `test_system_name`: `socket.gethostname()`. +- `system_operator`: `getpass.getuser()`. +- `start_time` / `end_time`: set on session enter/exit. +- `status`: starts at `IN_PROGRESS`, finalized to `PASSED` or `FAILED` on session exit (failure if any step failed or an exception escaped the session). +- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-git-metadata` or when not in a git repo. + +Example invocations: + +| Pytest invocation | Report `name` | Report `test_case` | +|---|---|---| +| `pytest tests/test_battery.py` | `test_battery.py 2026-05-04T12:00:00.123456+00:00` | `tests/test_battery.py` | +| `pytest tests/` | `tests 2026-05-04T12:00:00.123456+00:00` | `tests` | +| `pytest -k voltage` | `pytest -k voltage 2026-05-04T12:00:00.123456+00:00` | `pytest -k voltage` | + +To override defaults (e.g. set a serial number, system operator, or extra +metadata), call `report_context.report.update({...})` from any test or fixture. +See [Linking a Run](#linking-a-run-to-the-report) for the same pattern applied +to `run_id`. diff --git a/python/docs/guides/pytest_plugin/running_modes.md b/python/docs/guides/pytest_plugin/running_modes.md new file mode 100644 index 000000000..e69688cf1 --- /dev/null +++ b/python/docs/guides/pytest_plugin/running_modes.md @@ -0,0 +1,138 @@ +# Running Modes + +The plugin runs in one of three modes, picked at invocation. This page covers +how each mode behaves, the log-file/replay pipeline, and how to replay a saved +log against Sift. + +## Running the suite + +```bash +# Full run against your Sift tenant +pytest + +# Pin the log file so you can replay it later if the import worker dies +pytest --sift-log-file=./sift-results.jsonl +``` + +## The three modes + +| Mode | Flag | Network | Log file | `step.measure(...)` | When to use | +|---|---|---|---|---|---| +| Online (default) | _(none)_ | yes (pings at session start, aborts if it fails) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant | +| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, air-gapped labs, CI without network | +| Disabled | `--sift-disabled` | none | none | bounds eval; returns a real bool | local dev or CI that doesn't have (or want) Sift | + +Pass both flags and disabled wins: it skips Sift entirely and supersedes every +other setting. + +## Online mode (default) + +`report_context` resolves `client_has_connection` at session start. The default +implementation calls `sift_client.ping.ping()`. A failed ping aborts the whole +session with `pytest.UsageError` and points at `--sift-offline` and +`--sift-disabled` as escape hatches. + +This is loud on purpose. A CI run that silently no-ops on a flaky network won't +get noticed until somebody goes looking for the report, which is usually weeks +later, which is usually too late. + +With the default `--sift-log-file` setting on, create/update calls are written +to a JSONL log file during the run and an `import-test-result-log --incremental` +worker replays them against Sift in the background. If the worker crashes +mid-session (connection failure, API error) or is still draining its backlog at +session end, the failure is logged at session end with a `replay-test-result-log` +command for manual recovery. Test outcomes are unaffected and the local log +file is preserved. Pass `--sift-log-file=false` to make every create/update +synchronous against the API instead. + +### Overriding the connection check + +Override `client_has_connection` when ping isn't the right signal, for example a +token cache that's only warm when authenticated: + +```python title="conftest.py" +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="session") +def client_has_connection(sift_client) -> bool: + return Path("~/.sift-token-cache").expanduser().is_file() +``` + +The override is ignored under `--sift-offline` and `--sift-disabled`. + +## Offline mode (`--sift-offline`) + +Same fixtures, same `step.measure(...)` semantics as online. The difference is +where the writes go: every create/update lands in a JSONL log file instead of +hitting the Sift API. The session-start ping is skipped, missing `SIFT_*` env +vars are tolerated (placeholders are filled), and the replay worker +(`import-test-result-log --incremental`) does not get spawned at session end. + +```bash +pytest --sift-offline --sift-log-file=./run.jsonl +``` + +Once you have connectivity, replay it: + +```bash +import-test-result-log ./run.jsonl +``` + +That replay creates the report, steps, and measurements against Sift. See +[Replaying a saved log file](#replaying-a-saved-log-file) for cleanup and the +incremental flag. + +`--sift-log-file=none` is rejected when offline is set. The log file is the only +sink in offline mode, so without it the results are gone. + +!!! warning "Pin the log path" + Without `--sift-log-file=`, offline mode writes to a + `tempfile.NamedTemporaryFile` and only surfaces the path via a `logger.info` + line. Pin a known path when you intend to replay later. + +## Disabled mode (`--sift-disabled`) + +The plugin stays loaded with the same fixtures and markers as the other modes. +Nothing contacts Sift, no log file is written, and no `SIFT_*` env vars are +required. `step.measure(...)`, `step.measure_avg(...)`, `step.measure_all(...)`, +`step.substep(...)`, and `report_context.report.update({...})` all behave +normally: bounds evaluate and you get a real pass/fail boolean back. + +Entities returned in disabled mode report `is_simulated == True` (on +`TestReport`, `TestStep`, `TestMeasurement`, and `ReportContext`) so consumers +and tests can branch on provenance. Offline-mode entities also report +`is_simulated == True`. + +How to turn it on, in the order most projects pick: + +```bash +# In an .envrc, devcontainer, or CI job config +export SIFT_DISABLED=1 + +# Per-invocation kill-switch +pytest --sift-disabled + +# Per-project default (uncommon; online is usually the right default) +# pyproject.toml: +# [tool.pytest.ini_options] +# sift_disabled = true +``` + +Good fit for local dev without Sift credentials. Also for library consumers who +don't have a Sift tenant. Also useful in CI for runs that shouldn't add noise to +the report stream, like a PR job re-running the same suite five times in a row. + +## Replaying a saved log file + +When the worker doesn't finish cleanly the plugin will print a hint mentioning +`import-test-result-log`. To import: + +```bash +import-test-result-log +``` + +That replays the saved JSONL log as a single batch (no `--incremental`) and +deletes the file when it lives under the system temp dir. diff --git a/python/mkdocs.yml b/python/mkdocs.yml index af174aa4f..5a9c73e82 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -51,6 +51,10 @@ extra: provider: mike alias: true +# Kept out of the nav but still built so the old URL redirects to the guide. +not_in_nav: | + /examples/pytest_plugin.md + nav: - Home: index.md - Sift Client API @@ -59,11 +63,14 @@ nav: - examples/index.md - Basic Usage: examples/basic.ipynb - Data Ingestion: examples/ingestion.ipynb - # Will migrate to Guides in the future - - Pytest Plugin: examples/pytest_plugin.md - Pytest Plugin Quickstart: examples/pytest_plugin_quickstart.md - Guides: + - guides/index.md - Pytest Plugin: + - Overview: guides/pytest_plugin/index.md + - Configuration & Defaults: guides/pytest_plugin/configuration.md + - Running Modes: guides/pytest_plugin/running_modes.md + - Report Structure: guides/pytest_plugin/report_structure.md - Pass/Fail Behavior: guides/pytest_plugin/pass_fail_behavior.md # - Guides: # - Logging From 287f41a612298cf5136071582812c0438faf984b Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 13:37:39 -0700 Subject: [PATCH 08/19] Python(docs): add v0.17.0 changelog entry for pytest plugin Co-Authored-By: Claude Opus 4.7 (1M context) --- python/CHANGELOG.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index cc9fc06a8..8aa9d816a 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -3,6 +3,30 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [v0.17.0] - Unreleased + +### What's New +#### Pytest Plugin +The client now ships a pytest plugin that turns a pytest run into a `TestReport` in Sift. Register it with a single `pytest_plugins = ["sift_client.pytest_plugin"]` line in your top-level `conftest.py`. Each test function becomes a `TestStep`, measurements appear as rows under that step, and failures roll up through nested substeps to the report. Enable it for a test by taking the autouse `step` fixture as an argument and calling `step.measure(...)` to record values against bounds. + +Highlights: +- **Hierarchical report tree.** Packages, modules, classes, and parametrize axes above a test each become a parent step, so the report mirrors your test layout. Arbitrary substeps can be opened inside a test. +- **Three running modes.** Online (default) pings Sift at session start and streams create/update calls during the run; offline records to a JSONL log for later replay; disabled evaluates bounds locally without contacting Sift. Select with `--sift-offline` or `--sift-disabled`. +- **Graceful connection handling.** Online mode aborts at session start if Sift is unreachable or credentials are invalid, so a misconfigured job fails fast. If the connection drops mid-run, tests keep running and the log keeps writing locally; remaining entries upload afterward via the import command the plugin prints on exit. +- **Pass/fail mapping.** Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard exit) maps to a `TestStatus` and propagates to parent steps and the report. `step.measure(...)` returns a pass/fail boolean without raising, so all measurements land in the report even when one fails; `step.fail_if_measurements_failed()` fails the test at the end without adding assertion noise to `error_info`. +- **Assertion messages as error info.** Assertion failure messages are reported as the step's error info. +- **Git metadata.** Repo, branch, and commit are captured on the report automatically. + +See the [Pytest Plugin guide](https://github.com/sift-stack/sift/blob/main/python/docs/guides/pytest_plugin/index.md) and the runnable quickstart example for full configuration. + +### Full Changelog +- [Pytest plugin improvements](https://github.com/sift-stack/sift/pull/567) +- [Graceful handling of missing connection](https://github.com/sift-stack/sift/pull/569) +- [Hierarchical pytest report tree](https://github.com/sift-stack/sift/pull/570) +- [Pass/fail behavior improvements](https://github.com/sift-stack/sift/pull/568) +- [Report assertion message as error info](https://github.com/sift-stack/sift/pull/587) +- [Pytest docs reorganization](https://github.com/sift-stack/sift/pull/589) + ## [v0.16.2] - May 21, 2026 ### Bugfixes From 47556306f87eee9b3fe7ecb4fd99b806176977df Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 13:50:03 -0700 Subject: [PATCH 09/19] revert rapidyaml version change --- python/pyproject.toml | 6 +----- python/uv.lock | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 0bb07e84a..a2cd6a410 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -26,11 +26,7 @@ keywords = ["sift", "sift-stack", "siftstack", "sift_py"] dependencies = [ "grpcio~=1.13", "PyYAML~=6.0", - # TODO: rapidyaml 0.13.0 ships C++ source that fails to compile against - # the GCC version on current GitHub Actions runners (csubstr operator= - # and SFINAE errors in the bundled c4core). Cap below 0.13 until either - # rapidyaml ships fixed sdists or we move to binary wheels. - "rapidyaml>=0.11,<0.13", + "rapidyaml~=0.11", "pandas>=2.0,<3.1", "protobuf>=5.0", "pydantic~=2.10", diff --git a/python/uv.lock b/python/uv.lock index 038a7ce09..9ed71e17b 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -4315,7 +4315,7 @@ wheels = [ [[package]] name = "sift-stack-py" -version = "0.16.2" +version = "0.17.0.dev0" source = { editable = "." } dependencies = [ { name = "alive-progress", version = "3.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, From fe9c0a38187477e43508a39fbb171a0803b005cd Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 14:19:02 -0700 Subject: [PATCH 10/19] Python(chore): use inprocess to improve test performance (#590) --- .../_tests/pytest_plugin/conftest.py | 18 ++++--- .../_tests/pytest_plugin/test_hierarchy.py | 48 +++++++++---------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/python/lib/sift_client/_tests/pytest_plugin/conftest.py b/python/lib/sift_client/_tests/pytest_plugin/conftest.py index 783a12bf4..7afee768d 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/conftest.py +++ b/python/lib/sift_client/_tests/pytest_plugin/conftest.py @@ -9,13 +9,17 @@ block inside ``pytest_configure``, useful for inspecting internal state without running tests against a real backend -Every test in this suite invokes the inner session via -``pytester.runpytest_subprocess(...)`` rather than ``pytester.runpytest(...)``. -``runpytest`` runs the inner pytest in-process, which re-imports the Sift -plugin on each test; the plugin transitively imports numpy, whose C -extensions refuse to initialize twice in one process and raise -``cannot load module more than once per process``. Spawning a subprocess -gives each inner session a fresh interpreter and sidesteps that guard. +The offline-log tests (``test_hierarchy.py``, ``test_pass_fail.py``) drive the +inner session in-process via ``pytester.runpytest_inprocess(...)``. This is +fast because the outer session already preloads the plugin (``pyproject.toml`` +sets ``addopts = "... -p sift_client.pytest_plugin ..."``), so the numpy C +extensions the plugin pulls in are imported once for the whole outer process +and reused by every inner run — no per-test interpreter spawn, and no +``cannot load module more than once per process`` re-init guard to trip. + +Tests that need true process isolation (fresh env vars, credential and +connection resolution, ini parsing) still use ``pytester.runpytest_subprocess(...)`` +so the inner session starts from a clean interpreter. """ from __future__ import annotations diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index 1efd4e817..9e0dd52e0 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -76,7 +76,7 @@ def test_b(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -97,7 +97,7 @@ def test_a(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -125,7 +125,7 @@ def test_a(self, v): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -150,7 +150,7 @@ def test_y(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -175,7 +175,7 @@ def test_free(): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -205,7 +205,7 @@ def test_b(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -230,7 +230,7 @@ def test_b(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -252,7 +252,7 @@ def test_a(self): ''' ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -284,7 +284,7 @@ def test_y(self, w): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -396,7 +396,7 @@ def test_c(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2, failed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -434,7 +434,7 @@ def test_b(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2, failed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -476,7 +476,7 @@ def test_b(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -499,7 +499,7 @@ def test_a(self): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -524,7 +524,7 @@ def test_a(v): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -565,7 +565,7 @@ def test_y(self): """ ), ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -593,7 +593,7 @@ def test_one(): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -637,7 +637,7 @@ def test_two(): ) # ``importlib`` import mode is required so two packages with the same # name on disk don't collide during sys.path-based import. - result = pytester.runpytest_subprocess("-v", "--import-mode=importlib") + result = pytester.runpytest_inprocess("-v", "--import-mode=importlib") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -666,7 +666,7 @@ def test_one(): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -697,7 +697,7 @@ def test_a(self, v): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -730,7 +730,7 @@ def test_rail(v): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -759,7 +759,7 @@ def test_iso(voltage, component): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=4) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -798,7 +798,7 @@ def test_widget(widget): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=2) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -831,7 +831,7 @@ def test_two(w): """ ), ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=4) steps = capture.load_steps(log_file) by_name = _by_name(steps) @@ -855,7 +855,7 @@ def test_chain(a, b): """ ) ) - result = pytester.runpytest_subprocess("-v") + result = pytester.runpytest_inprocess("-v") result.assert_outcomes(passed=1) steps = capture.load_steps(log_file) leaf = next(s for s in steps if s["name"].startswith("b=")) From ae0babe532de7826b6dde491a2253b7a4bbfac24 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Thu, 28 May 2026 14:54:05 -0700 Subject: [PATCH 11/19] Python(feat): pytest summary output (#594) --- python/CHANGELOG.md | 1 + .../guides/pytest_plugin/configuration.md | 3 + .../guides/pytest_plugin/running_modes.md | 82 +++++ .../_internal/grpc_transport/transport.py | 25 +- python/lib/sift_client/_internal/rest.py | 4 +- python/lib/sift_client/_internal/urls.py | 55 +++ .../_tests/pytest_plugin/conftest.py | 2 +- .../pytest_plugin/test_terminal_output.py | 195 ++++++++++ python/lib/sift_client/_tests/test_urls.py | 74 ++++ python/lib/sift_client/client.py | 27 ++ python/lib/sift_client/pytest_plugin.py | 340 ++++++++++++++++++ .../lib/sift_client/sift_types/test_report.py | 32 ++ .../sift_client/transport/base_connection.py | 6 + .../util/test_results/context_manager.py | 60 +++- 14 files changed, 879 insertions(+), 27 deletions(-) create mode 100644 python/lib/sift_client/_internal/urls.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py create mode 100644 python/lib/sift_client/_tests/test_urls.py diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 8aa9d816a..1b8c43a93 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -16,6 +16,7 @@ Highlights: - **Pass/fail mapping.** Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard exit) maps to a `TestStatus` and propagates to parent steps and the report. `step.measure(...)` returns a pass/fail boolean without raising, so all measurements land in the report even when one fails; `step.fail_if_measurements_failed()` fails the test at the end without adding assertion noise to `error_info`. - **Assertion messages as error info.** Assertion failure messages are reported as the step's error info. - **Git metadata.** Repo, branch, and commit are captured on the report automatically. +- **Terminal output.** The plugin prints a session header with the SDK version and active mode, and an end-of-run `Sift report` panel showing the test case, outcome, step and measurement breakdowns (color-coded), test system/operator, plus a link to the report (online), the saved log and upload command (offline), or a disabled note. Both suppress under `-q`. `SiftClient.app_url` exposes the web-app origin; set `sift_report_url_base` for on-prem or custom deployments. `--sift-open-report` opens the report in a browser at session end. See the [Pytest Plugin guide](https://github.com/sift-stack/sift/blob/main/python/docs/guides/pytest_plugin/index.md) and the runnable quickstart example for full configuration. diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md index 6ed78f931..3b3151111 100644 --- a/python/docs/guides/pytest_plugin/configuration.md +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -132,6 +132,8 @@ def sift_client() -> SiftClient: | `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). | | `--sift-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. | | `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | +| `--sift-report-url-base=` | derived from REST URI | Web-app origin used to build the clickable report link in the terminal footer (e.g. `https://app.siftstack.com`). Set this for on-prem or custom deployments whose API host can't be mapped to a frontend automatically. Also honored via the `SIFT_APP_URL` environment variable. When unset, the link is derived from the REST URI for known Sift hosts. | +| `--sift-open-report` | off | Open the resulting report in a browser at session end. Online mode only; a no-op when the report URL can't be resolved. Intended for local development. | These can be passed permanently via `addopts`: @@ -158,6 +160,7 @@ CLI flags, when passed, override the ini values. | `sift_module_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test module (file). | | `sift_class_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test class, including nested classes. | | `sift_parametrize_nesting` | bool (default `true`) | _(ini-only)_. Clusters parametrized tests under shared parents (`test_x`, `axis=value`) instead of flat leaves (`test_x[value]`). | +| `sift_open_report` | bool (default `false`) | `--sift-open-report` | ```toml title="pyproject.toml" [tool.pytest.ini_options] diff --git a/python/docs/guides/pytest_plugin/running_modes.md b/python/docs/guides/pytest_plugin/running_modes.md index e69688cf1..9289428e4 100644 --- a/python/docs/guides/pytest_plugin/running_modes.md +++ b/python/docs/guides/pytest_plugin/running_modes.md @@ -25,6 +25,88 @@ pytest --sift-log-file=./sift-results.jsonl Pass both flags and disabled wins: it skips Sift entirely and supersedes every other setting. +## Terminal output + +Each run prints a header with the SDK version and active mode, and an end-of-run +`Sift report` panel summarizing the outcome. Both are suppressed under `-q`. The +panel is color-coded when the terminal supports it (green pass, red +failure/error, yellow skip, cyan link) and plain text otherwise (`--color=no`, +captured output, CI logs). + +The section title carries the report name (truncated if long). The `Steps` row +tallies every step in the report by final status, so it counts substeps and the +package/module/class/parametrize grouping steps too — its totals are expected to +exceed pytest's own test count. The `Measurements` row tallies recorded +measurements (`step.measure(...)`) and is omitted when there are none. The +`Test case` and `System` rows echo the report's test case, test system, and +operator. + +**Online** shows the report metadata, step and measurement breakdowns, and a +clickable link. The web host is derived from the REST URI for known Sift hosts; +for on-prem or custom deployments set `--sift-report-url-base` +(ini: `sift_report_url_base`, env: `SIFT_APP_URL`). Add `--sift-open-report` to +open the report in a browser at session end. + +```text +============================= test session starts ============================== +platform linux -- Python 3.11.8, pytest-8.3.2, pluggy-1.5.0 +Sift: sift-stack-py 0.17.0 — online mode +collected 12 items + +tests/test_battery.py ........ [ 66%] +tests/test_thermal.py .... [100%] + +================ Sift report · pytest tests/ 2026-05-27T22:44:23Z ============== + Test case pytest tests/ + Status PASSED online · sift-stack-py 0.17.0 + Steps 14 passed + Measurements 42 passed + System ci-runner-7 · cibot + Log file /tmp/sift-a1b2c3.jsonl + Report https://app.siftstack.com/test-results/0193f1a2-7c44-7e5b-9b1a-2f6c0d9e84aa +============================== 12 passed in 3.45s ============================== +``` + +If the background uploader doesn't finish, the panel still links the report and +flags that it may be incomplete: + +```text +================ Sift report · pytest tests/ 2026-05-27T22:44:23Z ============== + Test case pytest tests/ + Status FAILED online · sift-stack-py 0.17.0 + Steps 11 passed · 2 failed · 1 error + Measurements 40 passed · 3 failed + System ci-runner-7 · cibot + Log file /tmp/sift-a1b2c3.jsonl + Report https://app.siftstack.com/test-results/0193f1a2-7c44-7e5b-9b1a-2f6c0d9e84aa + may be incomplete — finish with: import-test-result-log /tmp/sift-a1b2c3.jsonl +``` + +When the web host can't be resolved and no override is set, the `Report` row +shows the report id instead of a link. + +**Offline** shows the metadata and breakdowns, then the upload command under a +small rule (the log path is part of the command): + +```text +================ Sift report · pytest tests/ 2026-05-27T22:44:23Z ============== + Test case pytest tests/ + Status PASSED offline · not uploaded + Steps 14 passed + Measurements 42 passed + System ci-runner-7 · cibot + Log file ./run.jsonl +------------------------------ to upload to Sift ------------------------------- + >> import-test-result-log ./run.jsonl +``` + +**Disabled** notes that no report was created: + +```text +===================================== Sift ===================================== +Sift disabled — no test report created. +``` + ## Online mode (default) `report_context` resolves `client_has_connection` at session start. The default diff --git a/python/lib/sift_client/_internal/grpc_transport/transport.py b/python/lib/sift_client/_internal/grpc_transport/transport.py index 7e0bc5425..e088befa0 100644 --- a/python/lib/sift_client/_internal/grpc_transport/transport.py +++ b/python/lib/sift_client/_internal/grpc_transport/transport.py @@ -8,7 +8,6 @@ from importlib.metadata import PackageNotFoundError, version from typing import TYPE_CHECKING, Any, TypedDict, cast -from urllib.parse import ParseResult, urlparse import grpc import grpc.aio as grpc_aio @@ -21,6 +20,7 @@ Metadata, MetadataInterceptor, ) +from sift_client._internal.urls import parse_host if TYPE_CHECKING: from sift_client._internal.grpc_transport._async_interceptors.base import ClientAsyncInterceptor @@ -78,7 +78,7 @@ def use_sift_channel( credentials = get_ssl_credentials(cert_via_openssl) options = _compute_channel_options(config) - api_uri = _clean_uri(config["uri"], use_ssl) + api_uri = parse_host(config["uri"]) channel = grpc.secure_channel(api_uri, credentials, options) interceptors = _compute_sift_interceptors(config, metadata) return grpc.intercept_channel(channel, *interceptors) @@ -98,7 +98,7 @@ def use_sift_async_channel( return _use_insecure_sift_async_channel(config, metadata) return grpc_aio.secure_channel( - target=_clean_uri(config["uri"], use_ssl), + target=parse_host(config["uri"]), credentials=get_ssl_credentials(cert_via_openssl), options=_compute_channel_options(config), interceptors=_compute_sift_async_interceptors(config, metadata), @@ -112,7 +112,7 @@ def _use_insecure_sift_channel( FOR DEVELOPMENT PURPOSES ONLY """ options = _compute_channel_options(config) - api_uri = _clean_uri(config["uri"], False) + api_uri = parse_host(config["uri"]) channel = grpc.insecure_channel(api_uri, options) interceptors = _compute_sift_interceptors(config, metadata) return grpc.intercept_channel(channel, *interceptors) @@ -125,7 +125,7 @@ def _use_insecure_sift_async_channel( FOR DEVELOPMENT PURPOSES ONLY """ return grpc_aio.insecure_channel( - target=_clean_uri(config["uri"], False), + target=parse_host(config["uri"]), options=_compute_channel_options(config), interceptors=_compute_sift_async_interceptors(config, metadata), ) @@ -205,21 +205,6 @@ def _metadata_async_interceptor( return MetadataAsyncInterceptor(md) -def _clean_uri(uri: str, use_ssl: bool) -> str: - """ - This will automatically transform the URI to an acceptable form regardless of whether or not - users included the scheme in the URL or included trailing slashes. - """ - - if "http://" in uri or "https://" in uri: - parsed: ParseResult = urlparse(uri) - return parsed.netloc - - full_uri = f"https://{uri}" if use_ssl else f"http://{uri}" - parsed_res: ParseResult = urlparse(full_uri) - return parsed_res.netloc - - def _compute_user_agent() -> str: try: return f"sift_stack_py/{version('sift_stack_py')}" diff --git a/python/lib/sift_client/_internal/rest.py b/python/lib/sift_client/_internal/rest.py index ee0239b79..6a9d1c9d1 100644 --- a/python/lib/sift_client/_internal/rest.py +++ b/python/lib/sift_client/_internal/rest.py @@ -6,7 +6,7 @@ from typing_extensions import NotRequired from urllib3.util import Retry -from sift_client._internal.grpc_transport.transport import _clean_uri +from sift_client._internal.urls import parse_host _DEFAULT_REST_RETRY = Retry(total=3, status_forcelist=[500, 502, 503, 504], backoff_factor=1) @@ -33,7 +33,7 @@ class SiftRestConfig(TypedDict): def compute_uri(restconf: SiftRestConfig) -> str: uri = restconf["uri"] use_ssl = restconf.get("use_ssl", True) - clean_uri = _clean_uri(uri, use_ssl) + clean_uri = parse_host(uri) if use_ssl: return f"https://{clean_uri}" diff --git a/python/lib/sift_client/_internal/urls.py b/python/lib/sift_client/_internal/urls.py new file mode 100644 index 000000000..99dd1816f --- /dev/null +++ b/python/lib/sift_client/_internal/urls.py @@ -0,0 +1,55 @@ +"""Helpers for turning Sift API endpoints into web-app (frontend) URLs. + +The Sift frontend can be hosted on several domains and the backend exposes no +field for its own URL, so the frontend origin is derived client-side from the +API host. This table mirrors the canonical mapping used by the Grafana +datasource (sift-stack/sift-grafana-datasource, +``src/components/sharelink/getFrontendHostnameDefaults.ts``). Hosts outside the +table (on-prem and custom deployments) require an explicit override. +""" + +from __future__ import annotations + +from urllib.parse import urlparse + +# API host (host[:port], no scheme) -> frontend origin (with scheme). +_API_HOST_TO_FRONTEND_ORIGIN: dict[str, str] = { + "api.siftstack.com": "https://app.siftstack.com", + "gov.api.siftstack.com": "https://gov.siftstack.com", +} + + +def parse_origin(url: str) -> str: + """Normalize a URL or bare host into a ``scheme://host[:port]`` origin. + + Bare hosts (no scheme) are assumed to be ``https``. + """ + candidate = url if "://" in url else f"https://{url}" + parsed = urlparse(candidate) + return f"{parsed.scheme}://{parsed.netloc}".rstrip("/") + + +def parse_host(url: str) -> str: + """Extract ``host[:port]`` from a URL or bare host string.""" + candidate = url if "://" in url else f"https://{url}" + return urlparse(candidate).netloc + + +def frontend_origin_for_api(api_base_url: str, override: str | None = None) -> str | None: + """Return the Sift web-app origin for a given API base URL. + + Args: + api_base_url: The REST API base URL (e.g. ``https://api.siftstack.com``). + override: An explicit frontend origin (host or full URL) to use instead + of the derived value. Set this for on-prem or custom deployments + whose API host isn't in the built-in mapping. + + Returns: + The frontend origin (e.g. ``https://app.siftstack.com``), or ``None`` + when no override is given and the API host isn't recognized. + """ + if override: + return parse_origin(override) + if not api_base_url: + return None + return _API_HOST_TO_FRONTEND_ORIGIN.get(parse_host(api_base_url)) diff --git a/python/lib/sift_client/_tests/pytest_plugin/conftest.py b/python/lib/sift_client/_tests/pytest_plugin/conftest.py index 7afee768d..ba775e04b 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/conftest.py +++ b/python/lib/sift_client/_tests/pytest_plugin/conftest.py @@ -29,7 +29,7 @@ import pytest -_SIFT_ENV_VARS = ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI", "SIFT_DISABLED") +_SIFT_ENV_VARS = ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI", "SIFT_DISABLED", "SIFT_APP_URL") @pytest.fixture diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py b/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py new file mode 100644 index 000000000..76550cc22 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py @@ -0,0 +1,195 @@ +"""Tests for the plugin's terminal output (session header + report footer). + +Driven through inner pytester sessions. Online output is exercised by the +``SiftClient.app_url`` unit tests (``_tests/test_urls.py``) since a live link +needs a real backend; here we cover the deterministic disabled/offline footers +and the ``-q`` suppression both share. +""" + +from __future__ import annotations + +from collections import Counter +from types import SimpleNamespace +from typing import TYPE_CHECKING, Callable + +from sift_client._internal.low_level_wrappers._test_results_log import LogTracking +from sift_client.pytest_plugin import ( + _measurement_segments, + _resolve_real_report_id, + _step_count_segments, +) +from sift_client.sift_types.test_report import TestStatus + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +class TestStepCountSegments: + def test_lists_nonzero_statuses_in_order_with_color(self) -> None: + counts = Counter({TestStatus.PASSED: 4, TestStatus.FAILED: 2, TestStatus.SKIPPED: 1}) + assert _step_count_segments(counts) == [ + ("4 passed", {"green": True}), + ("2 failed", {"red": True}), + ("1 skipped", {"yellow": True}), + ] + + def test_error_and_aborted_are_red(self) -> None: + counts = Counter({TestStatus.ERROR: 1, TestStatus.ABORTED: 1}) + assert _step_count_segments(counts) == [ + ("1 error", {"red": True}), + ("1 aborted", {"red": True}), + ] + + def test_empty_is_empty(self) -> None: + assert _step_count_segments(Counter()) == [] + + +class TestMeasurementSegments: + def test_passed_green_failed_red(self) -> None: + assert _measurement_segments(Counter({True: 2, False: 1})) == [ + ("2 passed", {"green": True}), + ("1 failed", {"red": True}), + ] + + def test_empty_is_empty(self) -> None: + assert _measurement_segments(Counter()) == [] + + +class TestResolveRealReportId: + """``_resolve_real_report_id`` maps the footer to the real server report id.""" + + def test_synchronous_online_uses_report_id_directly(self) -> None: + # No log file, non-simulated report (``--sift-log-file=false`` path). + context = SimpleNamespace( + report=SimpleNamespace(id_="real-123", is_simulated=False), + log_file=None, + ) + assert _resolve_real_report_id(context) == "real-123" + + def test_incremental_resolves_via_sidecar(self, tmp_path: Path) -> None: + log_file = tmp_path / "run.jsonl" + log_file.write_text("") + LogTracking(id_map={"sim-1": "real-1"}).save(log_file) + context = SimpleNamespace( + report=SimpleNamespace(id_="sim-1", is_simulated=True), + log_file=log_file, + ) + assert _resolve_real_report_id(context) == "real-1" + + def test_empty_report_id_returns_none(self) -> None: + # An unset/empty id must not produce a ``/test-results/`` link. + context = SimpleNamespace( + report=SimpleNamespace(id_="", is_simulated=False), + log_file=None, + ) + assert _resolve_real_report_id(context) is None + + def test_incremental_unmapped_returns_none(self, tmp_path: Path) -> None: + # Worker died before mapping the report: no sidecar entry. + log_file = tmp_path / "run.jsonl" + log_file.write_text("") + context = SimpleNamespace( + report=SimpleNamespace(id_="sim-1", is_simulated=True), + log_file=log_file, + ) + assert _resolve_real_report_id(context) is None + + +class TestHeader: + def test_header_shows_version_and_mode( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """The session header reports the SDK version and the active mode.""" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=1) + result.stdout.fnmatch_lines(["*sift-stack-py*disabled mode*"]) + + def test_header_suppressed_under_quiet( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``-q`` suppresses the header, matching pytest's own platform header.""" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled", "-q") + result.assert_outcomes(passed=1) + result.stdout.no_fnmatch_line("*sift-stack-py*") + + +class TestDisabledFooter: + def test_footer_notes_no_report( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled") + result.assert_outcomes(passed=1) + result.stdout.fnmatch_lines(["*Sift disabled*no test report created*"]) + + def test_footer_suppressed_under_quiet( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-disabled", "-q") + result.assert_outcomes(passed=1) + result.stdout.no_fnmatch_line("*Sift disabled*") + + +class TestOfflineFooter: + def test_footer_shows_log_path_and_replay_command( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Offline footer points at the saved log file and the replay command.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + result.stdout.fnmatch_lines( + [ + "*Test case*", + "*Status*offline*not uploaded*", + "*Steps*passed*", + "*Measurements*1 passed*", + "*System*", + f"*Log file*{log_path}", + "*to upload to Sift*", + f"*import-test-result-log {log_path}", + ] + ) + + def test_sift_open_report_flag_is_accepted_offline( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``--sift-open-report`` is a no-op offline (no resolvable URL) and never errors.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") + result = pytester.runpytest_subprocess( + "--sift-offline", f"--sift-log-file={log_path}", "--sift-open-report" + ) + result.assert_outcomes(passed=1) diff --git a/python/lib/sift_client/_tests/test_urls.py b/python/lib/sift_client/_tests/test_urls.py new file mode 100644 index 000000000..be9febd52 --- /dev/null +++ b/python/lib/sift_client/_tests/test_urls.py @@ -0,0 +1,74 @@ +"""Tests for web-app URL derivation (``_internal/urls.py`` and ``SiftClient.app_url``).""" + +from __future__ import annotations + +import pytest + +from sift_client import SiftClient, SiftConnectionConfig +from sift_client._internal.urls import frontend_origin_for_api + + +class TestFrontendOriginForApi: + @pytest.mark.parametrize( + ("api_base_url", "expected"), + [ + ("https://api.siftstack.com", "https://app.siftstack.com"), + ("https://gov.api.siftstack.com", "https://gov.siftstack.com"), + # Bare host (no scheme) resolves the same as the full URL. + ("api.siftstack.com", "https://app.siftstack.com"), + ], + ) + def test_known_hosts(self, api_base_url: str, expected: str) -> None: + assert frontend_origin_for_api(api_base_url) == expected + + def test_unknown_host_returns_none(self) -> None: + assert frontend_origin_for_api("https://api.acme.example.com") is None + + def test_empty_returns_none(self) -> None: + assert frontend_origin_for_api("") is None + + def test_override_wins_over_derivation(self) -> None: + # Override applies even for a known host. + assert ( + frontend_origin_for_api("https://api.siftstack.com", override="https://app.acme.test") + == "https://app.acme.test" + ) + + def test_override_normalizes_bare_host(self) -> None: + assert ( + frontend_origin_for_api("https://api.acme.example.com", override="sift.acme.test") + == "https://sift.acme.test" + ) + + +class TestSiftClientAppUrl: + def _client(self, rest_url: str, app_url: str | None = None) -> SiftClient: + return SiftClient( + connection_config=SiftConnectionConfig( + api_key="k", + grpc_url="grpc-api.siftstack.com:443", + rest_url=rest_url, + ), + app_url=app_url, + ) + + def test_derives_from_known_rest_host(self) -> None: + assert self._client("https://api.siftstack.com").app_url == "https://app.siftstack.com" + + def test_unknown_host_without_override_is_none(self) -> None: + assert self._client("https://api.acme.example.com").app_url is None + + def test_override_used_for_unknown_host(self) -> None: + client = self._client("https://api.acme.example.com", app_url="https://sift.acme.test") + assert client.app_url == "https://sift.acme.test" + + def test_override_from_connection_config(self) -> None: + client = SiftClient( + connection_config=SiftConnectionConfig( + api_key="k", + grpc_url="grpc-api.siftstack.com:443", + rest_url="https://api.acme.example.com", + app_url="https://sift.acme.test", + ) + ) + assert client.app_url == "https://sift.acme.test" diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py index ff574adba..d77aff6c0 100644 --- a/python/lib/sift_client/client.py +++ b/python/lib/sift_client/client.py @@ -1,5 +1,6 @@ from __future__ import annotations +from sift_client._internal.urls import frontend_origin_for_api from sift_client.resources import ( AssetsAPI, AssetsAPIAsync, @@ -124,6 +125,7 @@ def __init__( grpc_url: str | None = None, rest_url: str | None = None, connection_config: SiftConnectionConfig | None = None, + app_url: str | None = None, ): """Initialize the SiftClient with specific connection parameters or a connection_config. @@ -132,6 +134,10 @@ def __init__( grpc_url: The Sift gRPC API URL. rest_url: The Sift REST API URL. connection_config: A SiftConnectionConfig object to configure the connection behavior of the SiftClient. + app_url: The Sift web-app origin (e.g. ``https://app.siftstack.com``). + Set this for on-prem or custom deployments whose API host can't be + mapped to a frontend automatically; see the ``app_url`` property. + A value here takes precedence over ``connection_config.app_url``. """ if not (api_key and grpc_url and rest_url) and not connection_config: raise ValueError( @@ -152,6 +158,12 @@ def __init__( WithGrpcClient.__init__(self, grpc_client=grpc_client) WithRestClient.__init__(self, rest_client=rest_client) + # Explicit web-app origin override; falls back to the connection config's + # value, then to host-based derivation in the ``app_url`` property. + self._app_url: str | None = app_url or ( + connection_config.app_url if connection_config else None + ) + # When set, test-results writes return synthesized responses without # contacting Sift. Read by `TestResultsAPIAsync._simulate`. Used by the # pytest plugin's ``--sift-disabled`` mode. @@ -198,3 +210,18 @@ def grpc_client(self) -> GrpcClient: def rest_client(self) -> RestClient: """The REST client used by the SiftClient for making REST API calls.""" return self._rest_client + + @property + def app_url(self) -> str | None: + """The Sift web-app origin for this client, or None if it can't be determined. + + Uses the explicit override passed at construction when set, otherwise + derives the origin from the REST host for known Sift deployments (e.g. + ``https://api.siftstack.com`` -> ``https://app.siftstack.com``). Returns + None for unrecognized hosts with no override. + + # TODO: Add a ``WithAppPage`` mixin on BaseType so resources (TestReport, + # Run, ...) can expose their own web-app link from ``_client.app_url`` plus + # a per-type path, instead of callers assembling paths by hand. + """ + return frontend_origin_for_api(self.rest_client.base_url, override=self._app_url) diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 09aca5e33..cf85b3abb 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -15,6 +15,7 @@ from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext from sift_client.util.test_results.context_manager import ( + _quiet_fork_stderr, format_assertion_message, format_truncated_traceback, ) @@ -42,6 +43,12 @@ class SiftPytestStepDrainError(RuntimeError): REPORT_CONTEXT: Any = None +# Set at session end with the resolved (real) report id/URL when online and +# uploaded. Read from a project's conftest in a later hook (e.g. +# ``pytest_unconfigure``) to post the link, write a file, etc. +SIFT_REPORT_ID_STASH_KEY = pytest.StashKey[str]() +SIFT_REPORT_URL_STASH_KEY = pytest.StashKey[str]() + _STASH_MISSING = object() _PARAMETRIZE_PATH_KEY = pytest.StashKey[Tuple[str, ...]]() @@ -297,6 +304,33 @@ class _Option: "this ini value.", ) +_REPORT_URL_BASE = _Option( + cli_flag="--sift-report-url-base", + ini_name="sift_report_url_base", + cli_help="Sift web-app origin used to build the clickable report link in the " + "terminal footer (e.g. https://app.siftstack.com). Set this for on-prem or " + "custom deployments whose API host can't be mapped to a frontend " + "automatically. Also honored via the SIFT_APP_URL env var. When unset, the " + "link is derived from the REST URI for known Sift hosts.", + ini_help="Default for --sift-report-url-base. The Sift web-app origin used to " + "build the report link in the terminal footer. Also honored via the " + "SIFT_APP_URL env var. When unset, the link is derived from the REST URI for " + "known Sift hosts.", +) + +_OPEN = _Option( + cli_flag="--sift-open-report", + ini_name="sift_open_report", + action="store_true", + cli_help="Open the resulting Sift test report in a browser at session end. " + "Online mode only; no-op when the report URL can't be resolved. Intended for " + "local development.", + ini_help="When true, open the report in a browser at session end (online only). " + "Defaults to false.", + ini_type="bool", + ini_default=False, +) + _AUTOUSE = _Option( ini_name="sift_autouse", ini_help="Default for the Sift autouse fixtures (report_context, step, " @@ -350,6 +384,8 @@ class _Option: _DISABLED, _GRPC_URI, _REST_URI, + _REPORT_URL_BASE, + _OPEN, _AUTOUSE, _PACKAGE_STEP, _MODULE_STEP, @@ -445,6 +481,305 @@ def _is_disabled(pytestconfig: pytest.Config | None) -> bool: return os.getenv("SIFT_DISABLED", "").lower() in ("1", "true", "yes") +def _sdk_version() -> str: + """Return the installed ``sift_stack_py`` version, or ``"unknown"``.""" + from importlib.metadata import PackageNotFoundError, version + + try: + return version("sift_stack_py") + except PackageNotFoundError: + return "unknown" + + +def _mode_label(config: pytest.Config) -> str: + """Resolve the active mode for the terminal header: disabled > offline > online.""" + if _is_disabled(config): + return "disabled" + if _is_offline(config): + return "offline" + return "online" + + +def pytest_report_header(config: pytest.Config) -> str | None: + """Emit a session-start header with the SDK version and active mode. + + Suppressed under ``-q`` (negative verbosity), matching how pytest hides its + own platform/plugin header. + """ + if config.get_verbosity() < 0: + return None + return f"Sift: sift-stack-py {_sdk_version()} — {_mode_label(config)} mode" + + +def _resolve_real_report_id(context: Any) -> str | None: + """Resolve the real server-side report id for the online footer link. + + In synchronous online mode (``--sift-log-file=false``) the report is created + directly against the API, so ``report.id_`` is already the real id. In the + default incremental mode the report is created through the simulate path + (a client-side UUID) and the background worker maps it to the real id on + replay, recording it in the ``.tracking`` sidecar's ``id_map``. By the + time this footer runs the session-scoped report context has torn down and + the worker has drained, so the sidecar is final. + + Returns ``None`` when the worker never mapped the report (e.g. it died before + replaying the create), meaning no real report exists to link. + """ + report = context.report + if not report.id_: + # No id was ever assigned (unset/empty); nothing to link. + return None + sim_id = str(report.id_) + if not getattr(report, "is_simulated", False): + return sim_id + log_file = getattr(context, "log_file", None) + if log_file is None: + return None + from sift_client._internal.low_level_wrappers._test_results_log import LogTracking + + return LogTracking.load(log_file).id_map.get(sim_id) + + +_LABEL_WIDTH = 13 + + +def _sift_kv(terminalreporter: Any, label: str, value: str, **value_markup: bool) -> None: + """Write an indented ``label value`` row, bolding the label. + + ``value_markup`` (e.g. ``green=True``, ``cyan=True``) styles only the value. + Color is dropped automatically when the terminal has no markup (not a TTY or + ``--color=no``), so captured/CI output stays plain text. + """ + terminalreporter.write(" ") + terminalreporter.write(f"{label:<{_LABEL_WIDTH}}", bold=True) + terminalreporter.write_line(value, **value_markup) + + +# Step-count breakdown order and labels for the footer's "Steps" row. +_STEP_COUNT_ORDER: tuple[tuple[TestStatus, str], ...] = ( + (TestStatus.PASSED, "passed"), + (TestStatus.FAILED, "failed"), + (TestStatus.ERROR, "error"), + (TestStatus.ABORTED, "aborted"), + (TestStatus.SKIPPED, "skipped"), + (TestStatus.IN_PROGRESS, "in progress"), +) + + +# Per-status color for the footer's step breakdown: green pass, red +# failure/error/abort, yellow skip; in-progress (and anything else) stays plain. +_STEP_STATUS_MARKUP: dict[TestStatus, dict[str, bool]] = { + TestStatus.PASSED: {"green": True}, + TestStatus.FAILED: {"red": True}, + TestStatus.ERROR: {"red": True}, + TestStatus.ABORTED: {"red": True}, + TestStatus.SKIPPED: {"yellow": True}, +} + + +def _step_count_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: + """Build ``(text, markup)`` segments for a step tally, non-zero only.""" + return [ + (f"{counts.get(status, 0)} {label}", _STEP_STATUS_MARKUP.get(status, {})) + for status, label in _STEP_COUNT_ORDER + if counts.get(status, 0) + ] + + +def _measurement_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: + """Build ``(text, markup)`` segments for a measurement tally, non-zero only.""" + segments: list[tuple[str, dict[str, bool]]] = [] + if counts.get(True, 0): + segments.append((f"{counts[True]} passed", {"green": True})) + if counts.get(False, 0): + segments.append((f"{counts[False]} failed", {"red": True})) + return segments + + +def _write_count_row( + terminalreporter: Any, label: str, segments: list[tuple[str, dict[str, bool]]] +) -> None: + """Write a ``label a · b · c`` row, applying each segment's color markup.""" + terminalreporter.write(" ") + terminalreporter.write(f"{label:<{_LABEL_WIDTH}}", bold=True) + for index, (text, markup) in enumerate(segments): + if index: + terminalreporter.write(" · ") + terminalreporter.write(text, **markup) + terminalreporter.write_line("") + + +def _report_panel_title(report: Any, terminalreporter: Any) -> str: + """``Sift report · `` for the section rule, truncated to the terminal width. + + The report name embeds a timestamp (and, for invocation-based runs, the + pytest args), so a long name is truncated with an ellipsis to keep the + separator line from wrapping. + """ + base = "Sift report" + name = getattr(report, "name", None) + if not name: + return base + title = f"{base} · {name}" + fullwidth = getattr(getattr(terminalreporter, "_tw", None), "fullwidth", 80) + # Reserve room for the separator characters and spaces write_sep adds. + limit = max(len(base), fullwidth - 8) + if len(title) > limit: + title = title[: limit - 1] + "…" + return title + + +def _maybe_open_report(url: str) -> None: + """Best-effort open the report URL in a browser (for ``--sift-open-report``). + + Skipped on CI or non-interactive sessions so a committed ``sift_open_report`` + setting can't spawn a browser on a headless agent; the flag is meant for + local development. + """ + import sys + import webbrowser + + if os.environ.get("CI") or not sys.stdout.isatty(): + return + try: + # webbrowser.open forks/execs the platform opener while the gRPC client's + # background threads are live; redirect fd 2 across the fork to swallow + # gRPC's prefork notice (same treatment as the plugin's other fork sites). + with _quiet_fork_stderr(): + webbrowser.open(url) + except Exception: + # Headless / no browser available: opening is a convenience, never fatal. + pass + + +def pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: pytest.Config) -> None: + """Emit a session-end Sift report summary, adapting per mode. + + The printed panel is suppressed under ``-q``, but programmatic side effects + (stashing the report ref for ``conftest.py``, ``--sift-open-report``) still run so + other plugins and CI steps can consume the result. The panel shows the + outcome (green/red), step and measurement tallies, and a per-mode action: a + report link (online), the upload command (offline), or a disabled note. + """ + quiet = config.get_verbosity() < 0 + + if _is_disabled(config): + if not quiet: + terminalreporter.write_sep("=", "Sift", cyan=True, bold=True) + terminalreporter.write_line("Sift disabled — no test report created.") + return + + context = REPORT_CONTEXT + if context is None: + # No gated test ran, so no report context was created. Nothing to show. + return + + log_file = getattr(context, "log_file", None) + offline = _is_offline(config) + + # Resolve the report link first so stashing and --sift-open-report run even under + # -q (programmatic consumers don't care about verbosity). Truthiness, not + # ``is not None``: a resolved-but-empty id (degenerate sidecar mapping, unset + # proto field) must fall through to the "not uploaded" path, not produce a + # ``/test-results/`` link. + report_id = None if offline else _resolve_real_report_id(context) + report_url = ( + f"{context.client.app_url}/test-results/{report_id}" + if report_id and context.client.app_url + else None + ) + if report_id: + config.stash[SIFT_REPORT_ID_STASH_KEY] = report_id + if report_url is not None: + config.stash[SIFT_REPORT_URL_STASH_KEY] = report_url + if _option_or_ini(config, _OPEN): + _maybe_open_report(report_url) + + if quiet: + return + + failed = bool(getattr(context, "any_failures", False)) + status_word, status_markup = ( + ("FAILED", {"red": True, "bold": True}) + if failed + else ("PASSED", {"green": True, "bold": True}) + ) + # Offline results live only in the local log until replayed, so the status + # row calls that out instead of repeating the version (already in the header). + status_context = ( + f"{_mode_label(config)} · not uploaded" + if offline + else f"{_mode_label(config)} · sift-stack-py {_sdk_version()}" + ) + + report = context.report + + terminalreporter.write_sep( + "=", _report_panel_title(report, terminalreporter), cyan=True, bold=True + ) + + # Identity row: the test case (test path or pytest invocation). + if report.test_case: + _sift_kv(terminalreporter, "Test case", str(report.test_case)) + + # Status row: colored outcome, then compact mode context. + terminalreporter.write(" ") + terminalreporter.write(f"{'Status':<{_LABEL_WIDTH}}", bold=True) + terminalreporter.write(status_word, **status_markup) + terminalreporter.write_line(f" {status_context}") + + # Step + measurement tallies (green pass, red failure, yellow skip). + _write_count_row( + terminalreporter, + "Steps", + _step_count_segments(context.step_status_counts) or [("no steps", {})], + ) + measurement_segments = _measurement_segments(context.measurement_counts) + if measurement_segments: + _write_count_row(terminalreporter, "Measurements", measurement_segments) + + # Provenance row: test system and operator. + system = " · ".join(part for part in (report.test_system_name, report.system_operator) if part) + if system: + _sift_kv(terminalreporter, "System", system) + + # Local log file (write-through backup online, sole sink offline). + if log_file is not None: + _sift_kv(terminalreporter, "Log file", str(log_file)) + + if offline: + if log_file is not None: + terminalreporter.write_sep("-", "to upload to Sift") + terminalreporter.write_line(f" >> import-test-result-log {log_file}", cyan=True) + return + + if not report_id: + # Incremental upload never mapped the report (the worker died before + # replaying the create), so there's no real report to link. + _sift_kv( + terminalreporter, + "Report", + f"not uploaded — replay with: import-test-result-log {log_file}", + yellow=True, + ) + elif report_url is not None: + _sift_kv(terminalreporter, "Report", report_url, cyan=True) + else: + _sift_kv( + terminalreporter, + "Report", + f"id {report_id} (set sift_report_url_base for a clickable link)", + ) + + if report_id and getattr(context, "replay_incomplete", False) and log_file is not None: + _sift_kv( + terminalreporter, + "", + f"may be incomplete — finish with: import-test-result-log {log_file}", + yellow=True, + ) + + def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool: """Resolve the Sift gate for a node: sift_exclude > sift_include > default. @@ -806,6 +1141,10 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: ) for env in missing: resolved[env] = _OFFLINE_DEFAULTS[env] + # Web-app origin for the report link: the sift_report_url_base CLI/ini option + # wins, then the SIFT_APP_URL env var, else host-based derivation in + # SiftClient.app_url. + report_url_base = _option_or_ini(pytestconfig, _REPORT_URL_BASE) or os.getenv("SIFT_APP_URL") # `or ""` is unreachable in practice since the `missing` check above guarantees # non-None values return SiftClient( @@ -813,6 +1152,7 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: api_key=resolved.get("SIFT_API_KEY") or "", grpc_url=resolved.get("SIFT_GRPC_URI") or "", rest_url=resolved.get("SIFT_REST_URI") or "", + app_url=report_url_base or None, ) ) diff --git a/python/lib/sift_client/sift_types/test_report.py b/python/lib/sift_client/sift_types/test_report.py index c4abfc548..dd786b02d 100644 --- a/python/lib/sift_client/sift_types/test_report.py +++ b/python/lib/sift_client/sift_types/test_report.py @@ -410,6 +410,38 @@ class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"], Simulat # Set by the low-level wrapper when this instance came from the simulate path _simulated: bool = False + def __str__(self) -> str: + """Human-readable form: ``[STATUS] name = value [unit] (bounds)``. + + Used for failure messages, logs, and the REPL. The string omits whichever + parts aren't set (no unit, no bounds), and falls back to ``?`` if no + value type is populated. The status prefix reflects ``self.passed``. + """ + status = "PASSED" if self.passed else "FAILED" + if self.numeric_value is not None: + value = f"{self.numeric_value}" + if self.unit: + value += f" {self.unit}" + elif self.string_value is not None: + value = repr(self.string_value) + elif self.boolean_value is not None: + value = str(self.boolean_value).lower() + else: + value = "?" + bounds = "" + nb = self.numeric_bounds + if nb is not None: + parts: list[str] = [] + if nb.min is not None: + parts.append(f"min {nb.min}") + if nb.max is not None: + parts.append(f"max {nb.max}") + if parts: + bounds = f" ({', '.join(parts)})" + elif self.string_expected_value: + bounds = f" (expected {self.string_expected_value!r})" + return f"[{status}] {self.name} = {value}{bounds}" + @classmethod def _from_proto( cls, proto: TestMeasurementProto, sift_client: SiftClient | None = None diff --git a/python/lib/sift_client/transport/base_connection.py b/python/lib/sift_client/transport/base_connection.py index 02f0e096e..6586412fe 100644 --- a/python/lib/sift_client/transport/base_connection.py +++ b/python/lib/sift_client/transport/base_connection.py @@ -24,6 +24,7 @@ def __init__( api_key: str, use_ssl: bool = True, cert_via_openssl: bool = False, + app_url: str | None = None, ): """Initialize the connection configuration. @@ -33,12 +34,17 @@ def __init__( api_key: The API key for authentication. use_ssl: Whether to use SSL/TLS for secure connections. cert_via_openssl: Whether to use OpenSSL for certificate validation. + app_url: The Sift web-app origin (e.g. ``https://app.siftstack.com``). + Set this for on-prem or custom deployments whose API host can't be + mapped to a frontend automatically. When unset, the web-app URL is + derived from ``rest_url`` for known hosts. """ self.api_key = api_key self.grpc_url = grpc_url self.rest_url = rest_url self.use_ssl = use_ssl self.cert_via_openssl = cert_via_openssl + self.app_url = app_url def get_grpc_config(self): """Create and return a GrpcConfig with the current settings. diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 48a89b2d9..41066b247 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -8,6 +8,7 @@ import tempfile import traceback import warnings +from collections import Counter from contextlib import AbstractContextManager, contextmanager from datetime import datetime, timezone from pathlib import Path @@ -19,6 +20,7 @@ from sift_client.sift_types.test_report import ( ErrorInfo, NumericBounds, + TestMeasurement, TestMeasurementCreate, TestReport, TestReportCreate, @@ -140,6 +142,19 @@ class ReportContext(AbstractContextManager): step_number_at_depth: dict[int, int] open_step_results: dict[str, bool] any_failures: bool + # Every step created in this report (including hierarchy/parametrize + # parents), retained after close so end-of-run summaries can tally final + # statuses. ``update`` mutates step instances in place, so these references + # reflect late status changes (e.g. a teardown-phase failure). + created_steps: list[TestStep] + # Every measurement recorded in this report, retained for end-of-run + # summaries. Appended in ``NewStep.measure``. A measurement's ``passed`` is + # fixed at creation, so the retained references stay accurate. + created_measurements: list[TestMeasurement] + # Set True in ``__exit__`` when the background replay worker timed out or + # exited non-zero, so callers (e.g. the pytest plugin footer) can flag that + # the uploaded report may be missing entries. + replay_incomplete: bool = False _import_proc: subprocess.Popen | None = None # Seconds to wait for the import worker subprocess to finish uploading # the JSONL backlog at session end before killing it. Tests substitute @@ -184,6 +199,9 @@ def __init__( self.step_number_at_depth = {} self.open_step_results = {} self.any_failures = False + self.created_steps = [] + self.created_measurements = [] + self.replay_incomplete = False if log_file is True: tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) @@ -279,6 +297,7 @@ def __exit__(self, exc_type, exc_value, traceback): except subprocess.TimeoutExpired: self._import_proc.kill() self._import_proc.wait() + self.replay_incomplete = True warnings.warn( f"Sift import worker did not exit in " f"{self._import_proc_timeout}s; killing it. " @@ -289,6 +308,7 @@ def __exit__(self, exc_type, exc_value, traceback): log_replay_instructions(self.log_file) return True # Ensures the session is marked as passed in pytest if self._import_proc.returncode != 0: + self.replay_incomplete = True stderr_text = ( stderr_bytes.decode("utf-8", errors="replace").strip() if stderr_bytes else "" ) @@ -311,6 +331,23 @@ def is_simulated(self) -> bool: """ return self.report.is_simulated + @property + def step_status_counts(self) -> Counter[TestStatus]: + """Tally of every created step by its current status. + + Includes hierarchy/parametrize parent steps. Read at the end of a run for + summaries; reflects late status changes since steps are mutated in place. + """ + return Counter(step.status for step in self.created_steps) + + @property + def measurement_counts(self) -> Counter[bool]: + """Tally of recorded measurements keyed by ``passed`` (True/False). + + Read at the end of a run for summaries. + """ + return Counter(m.passed for m in self.created_measurements) + def new_step( self, name: str, @@ -378,6 +415,8 @@ def create_step( ) self.step_stack.append(step) self.open_step_results[step.step_path] = True + # Retained for end-of-run tallies; never popped (unlike step_stack). + self.created_steps.append(step) return step @@ -388,6 +427,10 @@ def record_step_outcome(self, outcome: bool, step: TestStep): self.open_step_results[step.step_path] = False self.any_failures = True + def record_measurement(self, measurement: TestMeasurement) -> None: + """Retain a recorded measurement for end-of-run summaries.""" + self.created_measurements.append(measurement) + def mark_step_failed_after_close(self, step: TestStep): """Mark a step's parent as failed after the step has already been popped from the stack. @@ -466,6 +509,9 @@ def __init__( # substep / ``report_outcome`` failures are intentionally not folded # in here (see ``measurements_passed`` vs ``passed``). self._failed_measurement_count = 0 + # Out-of-bounds measurements recorded on this step, retained so + # ``fail_if_measurements_failed`` can name them in the failure message. + self._failed_measurements: list[TestMeasurement] = [] def __enter__(self): """Enter the context manager to create a new step. @@ -487,9 +533,7 @@ def measurements_passed(self) -> bool: """ return self._failed_measurement_count == 0 - def fail_if_measurements_failed( - self, message: str = "one or more measurements out of bounds" - ) -> None: + def fail_if_measurements_failed(self, message: str = "measurements out of bounds") -> None: """Fail the pytest test if any measurement on this step was out of bounds. Use instead of ``assert step.measurements_passed``: it fails via @@ -497,12 +541,18 @@ def fail_if_measurements_failed( assertion message to ``error_info``. No-op when every measurement passed. Call once at the end of the test so every measurement is still recorded before the failure fires. + + The failure message names each out-of-bounds measurement with its + recorded value and bounds. ``message`` is used as the header line. """ if self.measurements_passed: return import pytest - pytest.fail(message, pytrace=False) + failed = self._failed_measurements + header = f"{message} ({len(failed)}):" if failed else message + body = [f" - {m}" for m in failed] + pytest.fail("\n".join([header, *body]), pytrace=False) def update_step_from_result( self, @@ -662,8 +712,10 @@ def measure( create, log_file=self.report_context.log_file ) self.report_context.record_step_outcome(measurement.passed, self.current_step) + self.report_context.record_measurement(measurement) if not measurement.passed: self._failed_measurement_count += 1 + self._failed_measurements.append(measurement) return measurement.passed From d5cc95201fb7ed5de1489a6090df349358f1760f Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 29 May 2026 14:08:44 -0700 Subject: [PATCH 12/19] version bump to 0.17.0.dev1 --- python/pyproject.toml | 2 +- python/uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index a2cd6a410..2846fedba 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sift_stack_py" -version = "0.17.0.dev0" +version = "0.17.0.dev1" description = "Python client library for the Sift API" requires-python = ">=3.8" readme = { file = "README.md", content-type = "text/markdown" } diff --git a/python/uv.lock b/python/uv.lock index 9ed71e17b..b8c439b1a 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -4315,7 +4315,7 @@ wheels = [ [[package]] name = "sift-stack-py" -version = "0.17.0.dev0" +version = "0.17.0.dev1" source = { editable = "." } dependencies = [ { name = "alive-progress", version = "3.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, From e5f397c7ef029dbc26f20614fec4e26aad714fce Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 2 Jun 2026 12:04:30 -0700 Subject: [PATCH 13/19] Python(fix): pytest exit instead of raise on connection fail (#606) --- .../_tests/pytest_plugin/test_online.py | 19 +++++++++++++------ python/lib/sift_client/pytest_plugin.py | 13 +++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_online.py b/python/lib/sift_client/_tests/pytest_plugin/test_online.py index 876fffb0e..19a666d04 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_online.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_online.py @@ -1,10 +1,10 @@ """Tests for online mode (the default). Online mode requires connectivity to Sift. The plugin pings via -``client_has_connection`` at session start and aborts with -``pytest.UsageError`` on failure. Missing ``SIFT_API_KEY`` / -``SIFT_GRPC_URI`` / ``SIFT_REST_URI`` env vars are reported as a usage error -so the failure is actionable. +``client_has_connection`` at session start and aborts via ``pytest.exit`` on +failure, so the message prints once before any test runs. Missing +``SIFT_API_KEY`` / ``SIFT_GRPC_URI`` / ``SIFT_REST_URI`` env vars are reported +as a usage error so the failure is actionable. """ from __future__ import annotations @@ -23,7 +23,7 @@ def test_ping_failure_aborts( pytester: pytest.Pytester, clear_sift_env: None, ) -> None: - """Online mode with an unreachable ping aborts the session via UsageError.""" + """Online mode with an unreachable ping aborts the session before any test runs.""" pytester.makeconftest( """ import pytest @@ -46,12 +46,19 @@ def sift_client(): @pytest.mark.sift_include def test_should_not_run(): assert True + + @pytest.mark.sift_include + def test_should_not_run_either(): + assert True """ ) result = pytester.runpytest_subprocess() assert result.ret != 0 combined = "\n".join(result.outlines + result.errlines) - assert "Sift ping failed" in combined, combined + # ``pytest.exit`` stops on the first gated test's setup: the message + # appears once (not once per test) and nothing runs. + assert combined.count("Sift ping failed") == 1, combined + result.assert_outcomes() def test_missing_env_vars_named_in_error( self, diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index cf85b3abb..ed2d71fb6 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -1184,7 +1184,7 @@ def report_context( session end. * default (online): verify connectivity via ``client_has_connection`` before constructing the context. A failed ping aborts the session - with ``pytest.UsageError`` and points at ``--sift-offline`` and + with ``pytest.exit`` and points at ``--sift-offline`` and ``--sift-disabled`` as escape hatches. The log-file destination is controlled by @@ -1204,11 +1204,12 @@ def report_context( except Exception as exc: grpc_config = getattr(getattr(sift_client, "grpc_client", None), "_config", None) grpc_url = getattr(grpc_config, "uri", "") - raise pytest.UsageError( + pytest.exit( f"Sift ping failed against {grpc_url}: {exc}. " "Pass --sift-offline to run without contacting Sift, or " - "--sift-disabled to skip Sift entirely." - ) from exc + "--sift-disabled to skip Sift entirely.", + returncode=4, + ) yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig) @@ -1413,8 +1414,8 @@ def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRe """Verify the ``SiftClient`` can reach Sift via ``/ping``. Consulted at session start by ``report_context`` in online mode. A failed - ping raises through ``report_context`` and aborts the session with - ``pytest.UsageError``. Override this fixture in your conftest to use a + ping aborts the session via ``pytest.exit``. Override this fixture in your + conftest to use a different reachability signal (e.g. a cached auth token) for environments where pinging is the wrong check. Returns ``False`` in ``--sift-disabled`` mode without constructing a client. From eb8c32bfd4e9ba397c34d4e5d0c76fbf70d0ba7f Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 2 Jun 2026 12:15:06 -0700 Subject: [PATCH 14/19] Python(feat): flexible pytest naming and cleaned up options (#602) --- python/CHANGELOG.md | 4 +- .../docs/examples/pytest_plugin_quickstart.md | 24 +- .../guides/pytest_plugin/configuration.md | 262 ++++-- python/docs/guides/pytest_plugin/index.md | 16 +- .../guides/pytest_plugin/running_modes.md | 13 +- python/examples/pytest_plugin/README.md | 15 +- python/examples/pytest_plugin/conftest.py | 16 +- python/examples/pytest_plugin/pyproject.toml | 33 + python/examples/pytest_plugin/pytest.ini | 11 - .../tests/with_sift/test_with_sift_demo.py | 8 +- .../sift_client/_internal/pyproject_config.py | 84 ++ .../_tests/pytest_plugin/test_disabled.py | 14 - .../pytest_plugin/test_report_fields.py | 272 ++++++ .../_tests/pytest_plugin/test_report_name.py | 120 +++ .../pytest_plugin/test_settings_reference.py | 39 + .../pytest_plugin/test_typo_detector.py | 113 +++ python/lib/sift_client/pytest_plugin.py | 877 ++++++++++++++---- .../sift_types/_mixins/metadata.py | 19 + python/lib/sift_client/sift_types/asset.py | 2 + python/lib/sift_client/sift_types/report.py | 2 + python/lib/sift_client/sift_types/run.py | 2 + .../lib/sift_client/sift_types/test_report.py | 6 + .../util/test_results/context_manager.py | 16 +- python/pyproject.toml | 1 + python/uv.lock | 2 + 25 files changed, 1639 insertions(+), 332 deletions(-) create mode 100644 python/examples/pytest_plugin/pyproject.toml delete mode 100644 python/examples/pytest_plugin/pytest.ini create mode 100644 python/lib/sift_client/_internal/pyproject_config.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_report_fields.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_report_name.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py create mode 100644 python/lib/sift_client/sift_types/_mixins/metadata.py diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 1b8c43a93..4905ae0d7 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -16,7 +16,8 @@ Highlights: - **Pass/fail mapping.** Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard exit) maps to a `TestStatus` and propagates to parent steps and the report. `step.measure(...)` returns a pass/fail boolean without raising, so all measurements land in the report even when one fails; `step.fail_if_measurements_failed()` fails the test at the end without adding assertion noise to `error_info`. - **Assertion messages as error info.** Assertion failure messages are reported as the step's error info. - **Git metadata.** Repo, branch, and commit are captured on the report automatically. -- **Terminal output.** The plugin prints a session header with the SDK version and active mode, and an end-of-run `Sift report` panel showing the test case, outcome, step and measurement breakdowns (color-coded), test system/operator, plus a link to the report (online), the saved log and upload command (offline), or a disabled note. Both suppress under `-q`. `SiftClient.app_url` exposes the web-app origin; set `sift_report_url_base` for on-prem or custom deployments. `--sift-open-report` opens the report in a browser at session end. +- **Terminal output.** The plugin prints a session header with the SDK version and active mode, and an end-of-run `Sift report` panel showing the test case, outcome, step and measurement breakdowns (color-coded), test system/operator, plus a link to the report (online), the saved log and upload command (offline), or a disabled note. Both suppress under `-q`. `SiftClient.app_url` exposes the web-app origin; set `sift_app_url` for on-prem or custom deployments. `--sift-open-report` opens the report in a browser at session end. +- **Configurable report content via `[tool.sift.pytest.report]` and `SIFT_REPORT_*` env vars.** Static defaults (`name`, `test_case`, `test_system_name`, `system_operator`, `serial_number`, `part_number`, and `metadata`) live under `[tool.sift.pytest.report]` in `pyproject.toml`. `name` and `test_case` accept the `{target}`, `{command}`, `{args}`, `{rootdir}`, `{timestamp}`, `{count}`, `{git_repo}`, `{git_branch}`, `{git_commit}` placeholders. `[tool.sift.pytest.report.metadata]` is a TOML table whose typed values land on the report's metadata alongside git fields and the auto-recorded `pytest_command`. For dynamic per-run injection (CI, hardware-bench unit cycling), set `SIFT_REPORT_TEST_SYSTEM_NAME` / `_SYSTEM_OPERATOR` / `_SERIAL_NUMBER` / `_PART_NUMBER` env vars, which pytest-dotenv loads from `.env` for local dev. Env entries win over TOML. See the [Pytest Plugin guide](https://github.com/sift-stack/sift/blob/main/python/docs/guides/pytest_plugin/index.md) and the runnable quickstart example for full configuration. @@ -27,6 +28,7 @@ See the [Pytest Plugin guide](https://github.com/sift-stack/sift/blob/main/pytho - [Pass/fail behavior improvements](https://github.com/sift-stack/sift/pull/568) - [Report assertion message as error info](https://github.com/sift-stack/sift/pull/587) - [Pytest docs reorganization](https://github.com/sift-stack/sift/pull/589) +- [Configurable report name template and preserved pytest command](https://github.com/sift-stack/sift/pull/591) ## [v0.16.2] - May 21, 2026 diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md index b30f282c6..30012f9b4 100644 --- a/python/docs/examples/pytest_plugin_quickstart.md +++ b/python/docs/examples/pytest_plugin_quickstart.md @@ -16,7 +16,7 @@ For a conceptual reference (fixtures, ini flags, status semantics), see the ``` examples/pytest_plugin/ ├── conftest.py # registers the plugin -├── pytest.ini # available ini knobs (all commented at defaults) +├── pyproject.toml # pytest knobs + report name/test_case/metadata ├── .env.example # credential template └── tests/ ├── pytest_only/ # subpackage step @@ -32,21 +32,25 @@ above each test becomes its own parent step in the report tree. ## `conftest.py` -A single `pytest_plugins` declaration loads the plugin; `load_dotenv()` is -optional and just lets the default `sift_client` fixture pick up -`SIFT_API_KEY` / `SIFT_GRPC_URI` / `SIFT_REST_URI` from a local `.env`. +A single `pytest_plugins` declaration loads the plugin. The default +`sift_client` fixture reads `SIFT_API_KEY` / `SIFT_GRPC_URI` / `SIFT_REST_URI` +from the environment — set them in your shell, your CI secret store, or a +local `.env` (`pip install pytest-dotenv` auto-loads it). ```python title="conftest.py" --8<-- "examples/pytest_plugin/conftest.py" ``` -## `pytest.ini` +## `pyproject.toml` -Every knob is commented at its default value. Uncomment any line to opt out of -a layer of the step tree. +Pytest behavior knobs sit under `[tool.pytest.ini_options]`, each commented at +its default — uncomment any line to opt out of a layer of the step tree. The +report's display `name`, `test_case`, and free-form `metadata` are set under +`[tool.sift.pytest.report]`; `name` and `test_case` accept template +placeholders. -```ini title="pytest.ini" ---8<-- "examples/pytest_plugin/pytest.ini" +```toml title="pyproject.toml" +--8<-- "examples/pytest_plugin/pyproject.toml" ``` ## `.env.example` @@ -168,7 +172,7 @@ skip every measurement that follows. Expected pytest output is `16 passed, 3 failed, 1 skipped`. Flip any of the `sift_*_step` / `sift_parametrize_nesting` flags in -`pytest.ini` to `false` to collapse a layer. +`pyproject.toml` to `false` to collapse a layer. ## Next steps diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md index 3b3151111..7c7114543 100644 --- a/python/docs/guides/pytest_plugin/configuration.md +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -26,40 +26,45 @@ The `SIFT_GRPC_URI` and `SIFT_REST_URI` are the gRPC and REST endpoints for your Sift organization. You can find these on the Sift Manage page as well as generate an API key. -The default `sift_client` fixture reads its two URIs from environment first and -falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is -intentionally env-only, so keep it out of source control and supply it through -`pytest-dotenv` (see [API key handling](#api-key-handling) below). The env var -wins when both are set, so secrets injected into a CI environment continue to -override values committed to `pyproject.toml`. There are no CLI flags for +The default `sift_client` fixture reads its two URIs from the environment +first, then from the `sift_grpc_uri` / `sift_rest_uri` ini keys. +`SIFT_API_KEY` is intentionally env-only, so keep it out of source control (see +[API key handling](#api-key-handling) below). There are no CLI flags for credentials. -| Ini key | Environment variable | Notes | +| Setting | Where | Notes | |---|---|---| -| _(none)_ | `SIFT_API_KEY` | Env-only. Use `.env` + `pytest-dotenv` locally; inject from your secret store in CI. | -| `sift_grpc_uri` | `SIFT_GRPC_URI` | Stable per-org gRPC endpoint; safe to commit. | -| `sift_rest_uri` | `SIFT_REST_URI` | Stable per-org REST endpoint; safe to commit. | +| `SIFT_API_KEY` | env var only | Inject from your secret store in CI; for local dev use a `.env` (see below). Never read from a committed file. | +| `SIFT_GRPC_URI` | env > `sift_grpc_uri` ini | Stable per-org gRPC endpoint; safe to commit. | +| `SIFT_REST_URI` | env > `sift_rest_uri` ini | Stable per-org REST endpoint; safe to commit. | ### API key handling -`SIFT_API_KEY` is deliberately read from the process environment only. The -recommended workflow uses the -[`pytest-dotenv`](https://pypi.org/project/pytest-dotenv/) plugin (already a -dependency of `sift-stack-py`), which loads variables from a `.env` file into -`os.environ` before tests run. +`SIFT_API_KEY` is read from the process environment only — the plugin never +reads it from a committed file. How you get it into the environment is up to +you: -1. Add `.env` to `.gitignore`. -2. Drop your key into `.env` at the project root: +- **CI:** set `SIFT_API_KEY` directly via your provider's secret manager. +- **Local dev:** keep the values in a `.env` (gitignored) and let + [`pytest-dotenv`](https://pypi.org/project/pytest-dotenv/) load them — it is + not bundled with `sift-stack-py`, so install it explicitly: + + ```bash + pip install pytest-dotenv + ``` ```bash title=".env" SIFT_API_KEY=sk-...your-key... + SIFT_GRPC_URI=your-org.grpc.example.com + SIFT_REST_URI=https://your-org.rest.example.com ``` -3. In CI, set `SIFT_API_KEY` directly via your provider's secret manager - instead of committing a `.env` file. + Once installed, pytest-dotenv auto-loads `.env` from the rootdir before + tests run — no `conftest.py` glue and no `load_dotenv()` call. (Point it at + a different file with the `env_files` ini key if you prefer.) -`pytest-dotenv` picks the file up automatically; no `pytest_configure` glue is -needed. +Prefer real environment variables (shell exports, CI secrets) for anything you +can't keep in a local file. !!! warning "FedRAMP / shared environments" Pass `--sift-log-file=false` (or set the ini key to `"false"`) to skip the @@ -73,10 +78,6 @@ that's required. The plugin ships a default `sift_client` fixture that reads `SIFT_API_KEY`, `SIFT_GRPC_URI`, and `SIFT_REST_URI` from the environment. ```python title="conftest.py" -from dotenv import load_dotenv - -load_dotenv() - pytest_plugins = ["sift_client.pytest_plugin"] ``` @@ -93,12 +94,9 @@ plugin's default falls away in favor of your definition. import os import pytest -from dotenv import load_dotenv from sift_client import SiftClient, SiftConnectionConfig -load_dotenv() - pytest_plugins = ["sift_client.pytest_plugin"] @@ -120,47 +118,68 @@ def sift_client() -> SiftClient: |---|---|---|---| | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | | `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. | -| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently; see [ini options](#ini-options). | +| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently; see [settings reference](#settings-reference). | | `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | -## CLI options +## Settings reference -| Flag | Default | Effect | -|---|---|---| -| `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. | -| `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). | -| `--sift-log-file=` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. | -| `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. | -| `--sift-report-url-base=` | derived from REST URI | Web-app origin used to build the clickable report link in the terminal footer (e.g. `https://app.siftstack.com`). Set this for on-prem or custom deployments whose API host can't be mapped to a frontend automatically. Also honored via the `SIFT_APP_URL` environment variable. When unset, the link is derived from the REST URI for known Sift hosts. | -| `--sift-open-report` | off | Open the resulting report in a browser at session end. Online mode only; a no-op when the report URL can't be resolved. Intended for local development. | +Every setting the plugin reads, grouped by the three config kinds. Within a +group, a `—` means the setting can't be set from that surface. -These can be passed permanently via `addopts`: +Each kind has a home chosen for a specific workflow: -```ini title="pytest.ini" -[pytest] -addopts = --sift-offline -``` +- **Pytest behavior** lives in `[tool.pytest.ini_options]` (log/offline/disabled/git/`*_step`/autouse/parametrize). A CLI flag exists for the ones with a real ad-hoc override workflow. +- **Connection** comes from the environment first, falling back to the ini keys; the API key is env-only so secrets stay out of committed files. +- **Report content** takes static defaults from `[tool.sift.pytest.report]` and per-run dynamic values from `SIFT_REPORT_*` env vars (CI builds, hardware cycling, anything `.env`-driven; pytest-dotenv loads `.env` for local dev). + +**Precedence within a setting:** env > CLI flag > ini key > TOML > built-in +default. No setting exposes both env and CLI, so the chain isn't ambiguous in +practice. + +The plugin scans `SIFT_*` env vars and `[tool.sift.pytest.*]` keys at session +start; anything outside these tables fires a warning with a closest-match +suggestion, so typos like `SIFT_REPORT_SERIALNUM` surface immediately. -## Ini options + +### Pytest behavior -Set the matching ini key directly (recommended for stable per-project -configuration). Each CLI flag has a corresponding key under -`[tool.pytest.ini_options]` in `pyproject.toml` or `[pytest]` in `pytest.ini`. -CLI flags, when passed, override the ini values. +| Setting | CLI flag | Ini (`[tool.pytest.ini_options]`) | +|---|---|---| +| Path to the JSONL log of create/update calls (path \| true \| false \| none). | `--sift-log-file` | `sift_log_file` | +| Capture git repo/branch/commit on the report. | `--no-sift-git-metadata` | `sift_git_metadata` | +| Skip the session-start ping; route create/update through the JSONL log. | `--sift-offline` | `sift_offline` | +| Disable Sift entirely (no API calls, no log file). Supersedes --sift-offline. | `--sift-disabled` | `sift_disabled` | +| Open the resulting report in a browser at session end (online only; no-op when the report URL can't be resolved). | `--sift-open-report` | `sift_open_report` | +| Default for the Sift autouse fixtures (report_context, step, hierarchy/parametrize parents). | — | `sift_autouse` | +| Open a parent step for each Python package in the test path. | — | `sift_package_step` | +| Open a parent step for each test module. | — | `sift_module_step` | +| Open per-class parent steps, including nested classes. | — | `sift_class_step` | +| Cluster parametrized tests under shared parent steps (e.g. test_a -> v=1, v=2). | — | `sift_parametrize_nesting` | + +### Connection + +| Setting | Ini (`[tool.pytest.ini_options]`) | Env var | +|---|---|---| +| Sift API key (secret, env-only). | — | `SIFT_API_KEY` | +| Sift gRPC endpoint URI. | `sift_grpc_uri` | `SIFT_GRPC_URI` | +| Sift REST endpoint URI. | `sift_rest_uri` | `SIFT_REST_URI` | +| Sift web-app origin for the report link in the terminal footer (e.g. https://app.siftstack.com). When unset, the link is derived from the REST URI for known Sift hosts. | `sift_app_url` | `SIFT_APP_URL` | + +### Report content -| Ini key | Type | Equivalent CLI flag | +| Setting | TOML (`[tool.sift...]`) | Env var | |---|---|---| -| `sift_log_file` | string (`true` / `false` / `none` / path) | `--sift-log-file=` | -| `sift_git_metadata` | bool (default `true`) | `--no-sift-git-metadata` (sets to `false`) | -| `sift_offline` | bool (default `false`) | `--sift-offline` | -| `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) | -| `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ | -| `sift_package_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each Python package (directory with `__init__.py`) in the test path. | -| `sift_module_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test module (file). | -| `sift_class_step` | bool (default `true`) | _(ini-only)_. Opens a parent step for each test class, including nested classes. | -| `sift_parametrize_nesting` | bool (default `true`) | _(ini-only)_. Clusters parametrized tests under shared parents (`test_x`, `axis=value`) instead of flat leaves (`test_x[value]`). | -| `sift_open_report` | bool (default `false`) | `--sift-open-report` | +| Template for the report display name. Placeholders: {target}, {command}, {args}, {rootdir}, {timestamp}, {count}, {git_repo}, {git_branch}, {git_commit}. | `[tool.sift.pytest.report] name` | — | +| Template for the report's test_case field (same placeholders as report_name). | `[tool.sift.pytest.report] test_case` | — | +| Name of the test system / rig. Defaults to the host's name. | `[tool.sift.pytest.report] test_system_name` | `SIFT_REPORT_TEST_SYSTEM_NAME` | +| Operator running the test. Defaults to the OS user. | `[tool.sift.pytest.report] system_operator` | `SIFT_REPORT_SYSTEM_OPERATOR` | +| Serial number of the unit under test. | `[tool.sift.pytest.report] serial_number` | `SIFT_REPORT_SERIAL_NUMBER` | +| Part number of the unit under test. | `[tool.sift.pytest.report] part_number` | `SIFT_REPORT_PART_NUMBER` | +| Free-form report metadata, as a TOML table of scalar values. For dynamic per-run keys, attach them in conftest via the report_context fixture. | `[tool.sift.pytest.report.metadata]` (table) | — | + + +### Quick-start examples ```toml title="pyproject.toml" [tool.pytest.ini_options] @@ -168,9 +187,22 @@ sift_offline = true sift_git_metadata = false sift_grpc_uri = "your-org.sift.example:443" sift_rest_uri = "https://your-org.sift.example" + +[tool.sift.pytest.report] +name = "{rootdir} ({count} tests) {timestamp}" +test_system_name = "rig-7" + +[tool.sift.pytest.report.metadata] +build_id = "v1.2.3" ``` -```ini title="pytest.ini" +```bash title="CI env (set by your runner)" +SIFT_API_KEY=... # from a secret manager +SIFT_REPORT_SYSTEM_OPERATOR=ci-bot +SIFT_REPORT_SERIAL_NUMBER=$UNIT_SN # cycles per matrix job +``` + +```ini title="pytest.ini (alternative — pytest-execution flags only)" [pytest] sift_offline = true sift_git_metadata = false @@ -178,6 +210,116 @@ sift_grpc_uri = your-org.sift.example:443 sift_rest_uri = https://your-org.sift.example ``` +CLI flags can be made permanent via `addopts`: + +```ini title="pytest.ini" +[pytest] +addopts = --sift-offline +``` + +## Report content in depth + +The [settings reference](#settings-reference) above maps each report-content +field to its `[tool.sift.pytest.report]` key and `SIFT_REPORT_*` env var. This +section covers the two template fields and the metadata table in more detail. + +```toml title="pyproject.toml — static project defaults" +[tool.sift.pytest.report] +name = "{rootdir} {git_branch} ({count} tests) {timestamp}" +test_case = "{rootdir}-{git_branch}" +test_system_name = "rig-7" +system_operator = "ci-bot" +serial_number = "SN-001" +part_number = "PN-9000" +``` + +```bash title="Per-run overrides — CI or hardware-bench shell" +SIFT_REPORT_SERIAL_NUMBER=$UNIT_SN \ +SIFT_REPORT_SYSTEM_OPERATOR=$CI_ACTOR \ +pytest tests/ +``` + +### `name` vs `test_case` + +The two fields look similar but serve opposite purposes: + +- **`name`** is the report's **per-run display label** — what you see in the + Test Results list. It should be unique per run, which is why its default ends + in `{timestamp}`. +- **`test_case`** is the **cross-run grouping key** — reports that share a + `test_case` are treated as runs of the *same* case, so Sift can track its + pass/fail history over time. It should be stable across runs, which is why + its default has **no** timestamp. + +By default both derive from the same `{target}` (what ran), and the timestamp +is the only difference: `name` = `{target} {timestamp}` (distinct each run), +`test_case` = `{target}` (identical across runs of the same target, so they +group together). Set either explicitly to override — a static `test_case` like +`"{rootdir}"` is common when you want every run of a project to group under one +case regardless of which subset ran. + +### Templates for `name` and `test_case` + +`name` and `test_case` accept the same f-string-style placeholders: + +| Placeholder | Value | +|---|---| +| `{target}` | What ran, derived from the collected tests (not the command line) and anchored to the project name: `project/tests/test_x.py::test_y` for a single test (the `[param]` suffix is stripped), `project/tests/test_x.py` for a single file, `project/tests/motor` for several files' common directory, or just `project` for a whole-suite run. | +| `{command}` | The full pytest invocation, e.g. `pytest tests/ -k smoke`. | +| `{args}` | The invocation arguments without the leading `pytest`. | +| `{rootdir}` | The pytest rootdir name (typically the project directory). | +| `{timestamp}` | The report start time in ISO 8601 (UTC). | +| `{count}` | The number of collected tests in the run. | +| `{git_repo}` | The `origin` remote URL, or empty when not in a git repo. | +| `{git_branch}` | The current branch, or empty when not in a git repo. | +| `{git_commit}` | The current commit (`git describe --always --dirty`), or empty when not in a git repo. | + +**Defaults when unset.** Because `{target}` is derived from the collected +tests, the defaults reflect what actually ran and don't change with flag order +or `-k` / `-m` filters: + +(`` below is the rootdir directory name.) + +| Invocation | default `name` | default `test_case` | +|---|---|---| +| `pytest tests/test_motor.py::test_spin[12V]` | `/tests/test_motor.py::test_spin 2026-...` | `/tests/test_motor.py::test_spin` | +| `pytest -v tests/test_motor.py` | `/tests/test_motor.py 2026-...` | `/tests/test_motor.py` | +| `pytest -k motor` (hits `tests/motor/`) | `/tests/motor 2026-...` | `/tests/motor` | +| `pytest` (whole suite) | ` 2026-...` | `` | + +The git placeholders are resolved independently of `--no-sift-git-metadata` +(which only controls whether git values are stored on the report metadata) and +render empty outside a git checkout. An unknown placeholder is reported as a +warning and the value falls back to the default rather than failing the run. + +Regardless of the name, the full pytest command is always preserved on the +report's metadata under the `pytest_command` key, so the exact invocation stays +queryable and viewable in the report detail. + +### Report metadata + +`[tool.sift.pytest.report.metadata]` is a TOML table whose typed values land +on the report's metadata alongside the git fields and the auto-recorded +`pytest_command`. Use it for build IDs, fixture identifiers, shift labels, +and any key/value data not otherwise modeled. + +```toml title="pyproject.toml — static metadata defaults" +[tool.sift.pytest.report.metadata] +build_id = "v1.2.3" +fixture = "PSU-A" +shift = "night" +lane = 2 # ints, floats, and bools come through with their TOML type +verbose = true +``` + +For per-run dynamic entries (CI build IDs, cycling serial numbers), attach them +in your `conftest.py` through the `report_context` fixture rather than the TOML +table. + +Nested tables, lists, and `null` values in +`[tool.sift.pytest.report.metadata]` are skipped with a warning since the +report's metadata is a flat `dict[str, str | float | bool]`. + ## Controlling which tests produce reports By default every test in the session produces a Sift step. Two markers and one diff --git a/python/docs/guides/pytest_plugin/index.md b/python/docs/guides/pytest_plugin/index.md index 9344885b3..a649204a4 100644 --- a/python/docs/guides/pytest_plugin/index.md +++ b/python/docs/guides/pytest_plugin/index.md @@ -10,27 +10,27 @@ the report itself. Install the client and pytest: ```bash -pip install sift-stack-py pytest python-dotenv +pip install sift-stack-py pytest ``` -Set your connection details in a `.env` next to your tests: +The default `sift_client` fixture reads its connection details from the +environment: -```bash title=".env" +```bash SIFT_API_KEY="..." SIFT_GRPC_URI="..." SIFT_REST_URI="..." ``` -Find these on the Sift Manage page, where you can also generate an API key. +Find these on the Sift Manage page, where you can also generate an API key. Set +them in your shell or CI secret store. For local dev, `pip install +pytest-dotenv` and drop the same values in a `.env` next to your tests — it +loads them automatically, no code required. Register the plugin with a single `pytest_plugins` declaration in your top-level `conftest.py`: ```python title="conftest.py" -from dotenv import load_dotenv - -load_dotenv() - pytest_plugins = ["sift_client.pytest_plugin"] ``` diff --git a/python/docs/guides/pytest_plugin/running_modes.md b/python/docs/guides/pytest_plugin/running_modes.md index 9289428e4..6c5ab05be 100644 --- a/python/docs/guides/pytest_plugin/running_modes.md +++ b/python/docs/guides/pytest_plugin/running_modes.md @@ -43,8 +43,8 @@ operator. **Online** shows the report metadata, step and measurement breakdowns, and a clickable link. The web host is derived from the REST URI for known Sift hosts; -for on-prem or custom deployments set `--sift-report-url-base` -(ini: `sift_report_url_base`, env: `SIFT_APP_URL`). Add `--sift-open-report` to +for on-prem or custom deployments set `sift_app_url` +(ini) or the `SIFT_APP_URL` env var. Add `--sift-open-report` to open the report in a browser at session end. ```text @@ -191,16 +191,15 @@ and tests can branch on provenance. Offline-mode entities also report How to turn it on, in the order most projects pick: ```bash -# In an .envrc, devcontainer, or CI job config -export SIFT_DISABLED=1 - # Per-invocation kill-switch pytest --sift-disabled +``` +```toml # Per-project default (uncommon; online is usually the right default) # pyproject.toml: -# [tool.pytest.ini_options] -# sift_disabled = true +[tool.pytest.ini_options] +sift_disabled = true ``` Good fit for local dev without Sift credentials. Also for library consumers who diff --git a/python/examples/pytest_plugin/README.md b/python/examples/pytest_plugin/README.md index 6eeaf9a34..0a94b7f97 100644 --- a/python/examples/pytest_plugin/README.md +++ b/python/examples/pytest_plugin/README.md @@ -8,7 +8,7 @@ numeric / string / bool bounds, gate markers, and the ini opt-outs. ``` examples/pytest_plugin/ ├── conftest.py # registers the plugin -├── pytest.ini # available ini knobs (all commented at defaults) +├── pyproject.toml # pytest knobs + report name/test_case/metadata ├── .env.example # credential template (copy to .env for local runs) └── tests/ ├── pytest_only/ # subpackage step: `pytest_only` opens a parent step @@ -24,13 +24,14 @@ Every layer of organization shows up in the report tree: Python packages (directories with `__init__.py`), modules (test files), classes (including nested classes), and parametrize axes each open a parent step. Flip `sift_package_step`, `sift_module_step`, `sift_class_step`, or -`sift_parametrize_nesting` to `false` in `pytest.ini` to disable this behavior. +`sift_parametrize_nesting` to `false` in `pyproject.toml` to disable this behavior. ## Run it **Against a real Sift org**: ```bash +pip install pytest-dotenv # auto-loads .env; or export the vars yourself cp .env.example .env # Fill in SIFT_API_KEY / SIFT_GRPC_URI / SIFT_REST_URI pytest -v @@ -48,8 +49,8 @@ import-test-result-log /tmp/sift-demo.jsonl ## What the report tree looks like -With the plugin's defaults (everything in `pytest.ini` left commented), running -this demo produces a tree like: +With the plugin's defaults (the `[tool.pytest.ini_options]` knobs left +commented), running this demo produces a tree like: ``` TestReport (FAILED, since failures propagate up from leaves) @@ -107,14 +108,14 @@ skip every measurement that follows. Expected pytest output is `16 passed, 3 failed, 1 skipped`. Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in -`pytest.ini` to `false` to collapse a layer. +`pyproject.toml` to `false` to collapse a layer. ## What each file demonstrates | File | Feature | |---|---| -| `conftest.py` | Plugin registration via `pytest_plugins`; optional `load_dotenv()` | -| `pytest.ini` | The four nesting flags + git metadata flag at their defaults | +| `conftest.py` | Plugin registration via `pytest_plugins` (a single line) | +| `pyproject.toml` | Pytest nesting/git-metadata knobs at their defaults; report `name`, `test_case`, and `metadata` under `[tool.sift.pytest.report]` | | `tests/pytest_only/test_pytest_only_demo.py` | Plain pytest tests with no Sift APIs. The plugin captures pass/fail automatically; covers functions, fixtures, parametrize, classes, plus one each of `AssertionError` (FAILED), `pytest.skip` (SKIPPED), and a raised `ValueError` (ERROR) | | `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `step.fail_if_measurements_failed()` end-of-test call that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | | `tests/{pytest_only,with_sift}/__init__.py` | Each Python package (directory with `__init__.py`) becomes a parent step in the report tree | diff --git a/python/examples/pytest_plugin/conftest.py b/python/examples/pytest_plugin/conftest.py index 88253bd73..b019aef1d 100644 --- a/python/examples/pytest_plugin/conftest.py +++ b/python/examples/pytest_plugin/conftest.py @@ -1,15 +1,13 @@ """Project-level conftest for the pytest plugin demo. -A single ``pytest_plugins`` declaration is enough to load the plugin — its +A single ``pytest_plugins`` declaration is all that's needed — the plugin's fixtures, hooks, and CLI options register through standard pytest machinery -from there. ``load_dotenv()`` is optional; it just lets the default -``sift_client`` fixture pick up ``SIFT_API_KEY`` / ``SIFT_GRPC_URI`` / -``SIFT_REST_URI`` from a local ``.env`` when running against a real Sift org. -These can also be set as environment variables using your preferred method. -""" - -from dotenv import load_dotenv +from there. -load_dotenv() +The default ``sift_client`` fixture reads ``SIFT_API_KEY`` / ``SIFT_GRPC_URI`` +/ ``SIFT_REST_URI`` from the environment. Set them however you prefer: your CI +secret store, your shell, or a local ``.env`` loaded by ``pytest-dotenv`` +(``pip install pytest-dotenv`` and it auto-loads ``.env`` — no code here). +""" pytest_plugins = ["sift_client.pytest_plugin"] diff --git a/python/examples/pytest_plugin/pyproject.toml b/python/examples/pytest_plugin/pyproject.toml new file mode 100644 index 000000000..71280d16a --- /dev/null +++ b/python/examples/pytest_plugin/pyproject.toml @@ -0,0 +1,33 @@ +# Single config file for the demo. Pytest behavior lives under +# [tool.pytest.ini_options]; Sift report content lives under +# [tool.sift.pytest.report]. + +[tool.pytest.ini_options] +# Defaults give you the full step tree: every package, module, class, and +# parametrize axis becomes a parent step. These are the available knobs and +# their defaults — uncomment to opt out of a layer. +# +# sift_autouse = true # autouse fixtures (default: true) +# sift_package_step = true # Python package (dir with __init__.py) parent step (default: true) +# sift_module_step = true # module (test file) parent step (default: true) +# sift_class_step = true # class parent step incl. nested (default: true) +# sift_parametrize_nesting = true # parametrize parent steps (default: true) +# sift_git_metadata = true # git repo/branch/commit included on the report (default: true) + +[tool.sift.pytest.report] +# Display name for the report. Placeholders: {target} {command} {args} +# {rootdir} {timestamp} {count} {git_repo} {git_branch} {git_commit}. +# Omit to use the default "{target} {timestamp}". {target} reflects what ran, +# from the collected tests, anchored to the project name: e.g. +# project/tests/test_x.py::test_y (single test, [param] stripped), +# project/tests/motor (several files' common dir), or project (whole suite). +name = "pytest-plugin demo ({count} tests) {timestamp}" +# Grouping key across runs (same placeholders available). Omit to default to +# {target} (what ran). +test_case = "pytest-plugin-demo {git_branch}" + +[tool.sift.pytest.report.metadata] +# Free-form key/value metadata stamped on every report. Values keep their TOML +# type (string, int, float, bool). +ci_revision = 2 +test_source = 'pytest-plugin-demo' \ No newline at end of file diff --git a/python/examples/pytest_plugin/pytest.ini b/python/examples/pytest_plugin/pytest.ini deleted file mode 100644 index 90a1a824b..000000000 --- a/python/examples/pytest_plugin/pytest.ini +++ /dev/null @@ -1,11 +0,0 @@ -[pytest] -# Defaults give you the full step tree: every package, module, class, and -# parametrize axis becomes a parent step. These are the available ini options -# and their defaults. -# -# sift_autouse = true # autouse fixtures (default: true) -# sift_package_step = true # Python package (dir with __init__.py) parent step (default: true) -# sift_module_step = true # module (test file) parent step (default: true) -# sift_class_step = true # class parent step incl. nested (default: true) -# sift_parametrize_nesting = true # parametrize parent steps (default: true) -# sift_git_metadata = true # git repo/branch/commit included on the report (default: true) diff --git a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py index ee3eef513..7cbe8f8ce 100644 --- a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py +++ b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py @@ -121,11 +121,17 @@ def test_report_level_metadata(step, report_context) -> None: The same ``update({...})`` pattern works for any field on ``TestReportUpdate`` (``run_id``, ``serial_number``, ``part_number``, ``system_operator``, ``metadata``, ...). Useful for linking a session - to a Sift Run or tagging the report with build / operator info. + to a Sift Run or tagging the report with build / operator info at runtime. + + Updating ``metadata`` *replaces* the whole map server-side, so spread the + report's current metadata first to add keys without dropping the entries + configured under ``[tool.sift.pytest.report.metadata]`` (or the git + metadata and auto-recorded ``pytest_command``). """ report_context.report.update( { "metadata": { + **report_context.report.metadata, "build_id": "v1.2.3", "operator": "ci", } diff --git a/python/lib/sift_client/_internal/pyproject_config.py b/python/lib/sift_client/_internal/pyproject_config.py new file mode 100644 index 000000000..6a8bd177b --- /dev/null +++ b/python/lib/sift_client/_internal/pyproject_config.py @@ -0,0 +1,84 @@ +"""Loader for the ``[tool.sift]`` table in a project's ``pyproject.toml``. + +The pytest plugin consumes this loader to resolve report-content config (under +``[tool.sift.pytest.report]``) and SDK-level fallbacks (URIs under +``[tool.sift]``). A malformed or missing ``pyproject.toml`` returns ``{}`` so a +bad config file never aborts the session — the plugin falls back to its +built-in defaults and surfaces a single warning. +""" + +from __future__ import annotations + +import warnings +from pathlib import Path +from typing import TYPE_CHECKING, Any + +# ``tomllib`` landed in 3.11; ``tomli`` is the same parser packaged for older +# interpreters and is declared as a conditional install dep on 3.8-3.10. +try: + import tomllib # type: ignore[import-not-found,unused-ignore] +except ImportError: # pragma: no cover - exercised on 3.8-3.10 only + import tomli as tomllib # type: ignore[no-redef,import-not-found,unused-ignore] + +if TYPE_CHECKING: + import pytest + + +# Bound the upward walk so a misconfigured environment can't trigger an +# unbounded filesystem traversal looking for a project root that isn't there. +_MAX_PARENT_WALK = 3 + + +def _find_pyproject(config: pytest.Config) -> Path | None: + """Locate the active project's ``pyproject.toml``. + + Order: + 1. ``config.inipath`` when it is itself a ``pyproject.toml`` (the common + case: project uses ``[tool.pytest.ini_options]`` so pytest loaded the + ini settings directly from pyproject). + 2. ``/pyproject.toml``. + 3. A bounded walk upward from ``rootpath`` for monorepo layouts where + pytest's rootdir is a subdirectory and the project pyproject lives + higher up. + """ + inipath = config.inipath + if inipath is not None and inipath.name == "pyproject.toml" and inipath.is_file(): + return inipath + cur = Path(config.rootpath).resolve() + candidate = cur / "pyproject.toml" + if candidate.is_file(): + return candidate + for _ in range(_MAX_PARENT_WALK): + cur = cur.parent + candidate = cur / "pyproject.toml" + if candidate.is_file(): + return candidate + return None + + +def load_tool_sift(config: pytest.Config) -> dict[str, Any]: + """Return the parsed ``[tool.sift]`` table from the project's pyproject.toml. + + Returns ``{}`` when no pyproject is discoverable, when the file omits the + ``[tool.sift]`` table, or when parsing fails. A parse / IO failure emits a + single :class:`SiftPytestPluginWarning` so the session continues with + defaults rather than aborting on a malformed file. + """ + pyproject = _find_pyproject(config) + if pyproject is None: + return {} + try: + with pyproject.open("rb") as fh: + data = tomllib.load(fh) + except (OSError, tomllib.TOMLDecodeError) as exc: + # Deferred import: ``pytest_plugin`` imports this loader, so a + # top-level import here would close the cycle at module load time. + from sift_client.pytest_plugin import SiftPytestPluginWarning + + warnings.warn( + f"Failed to read {pyproject} for [tool.sift]: {type(exc).__name__}: {exc}", + SiftPytestPluginWarning, + stacklevel=2, + ) + return {} + return (data.get("tool") or {}).get("sift") or {} diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py index 90a5fcb56..263ac03ac 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py @@ -67,20 +67,6 @@ def test_disabled_does_not_require_credentials( result = pytester.runpytest_subprocess("--sift-disabled") result.assert_outcomes(passed=1) - def test_disabled_via_env_var( - self, - pytester: pytest.Pytester, - clear_sift_env: None, - write_plugin_conftest: Callable[[], None], - monkeypatch: pytest.MonkeyPatch, - ) -> None: - """``SIFT_DISABLED=1`` triggers disabled mode without the CLI flag.""" - write_plugin_conftest() - pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)") - monkeypatch.setenv("SIFT_DISABLED", "1") - result = pytester.runpytest_subprocess() - result.assert_outcomes(passed=1) - def test_disabled_supersedes_offline( self, pytester: pytest.Pytester, diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_report_fields.py b/python/lib/sift_client/_tests/pytest_plugin/test_report_fields.py new file mode 100644 index 000000000..a4c723b47 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_report_fields.py @@ -0,0 +1,272 @@ +"""Tests for [tool.sift.pytest.report] and the report-content env-var overrides. + +Report-content fields are configured under ``[tool.sift.pytest.report]`` in +pyproject.toml and overridden per-run via ``SIFT_REPORT_*`` env vars. These +tests drive offline-mode inner sessions and inspect the JSONL +``CreateTestReport`` line, which serializes every report field with its proto +type intact. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, Callable + +from google.protobuf import json_format +from sift.metadata.v1.metadata_pb2 import MetadataValue + +from sift_client.util.metadata import metadata_proto_to_dict + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def _create_report_dict(log_text: str) -> dict: + """Parse the JSON payload from the ``[CreateTestReport:...]`` log line.""" + for line in log_text.splitlines(): + if line.startswith("[CreateTestReport:"): + return json.loads(line[line.index("{") :]) + raise AssertionError(f"no CreateTestReport line in log:\n{log_text}") + + +def _metadata_pairs(report: dict) -> dict[str, str | float | bool]: + """Unwrap the report's JSON metadata map into a ``{key: value}`` dict. + + Each entry is the JSON form of a ``MetadataValue`` proto, so parse it back + into the proto and reuse the canonical ``metadata_proto_to_dict`` converter + rather than hand-walking the value slots. + """ + protos = [json_format.ParseDict(entry, MetadataValue()) for entry in report.get("metadata", [])] + return metadata_proto_to_dict(protos) + + +class TestReportFields: + def test_toml_resolves_every_field( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Every report-content field resolves from ``[tool.sift.pytest.report]``.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + test_case = "case-from-toml" + test_system_name = "rig-7" + system_operator = "ci-bot" + serial_number = "SN-001" + part_number = "PN-9000" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == "case-from-toml" + assert report["testSystemName"] == "rig-7" + assert report["systemOperator"] == "ci-bot" + assert report["serialNumber"] == "SN-001" + assert report["partNumber"] == "PN-9000" + + def test_test_case_template_renders( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``test_case`` accepts the same template placeholders as ``name``.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + test_case = "case-{rootdir}-{count}" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"].startswith("case-"), report["testCase"] + assert report["testCase"].endswith("-1"), report["testCase"] + + def test_default_target_single_test_is_function( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """One test collected -> default test_case is the project-anchored function nodeid. + + Derivation is from the collected items, so it doesn't depend on flag + order or which path form was typed; the value is anchored to the + rootdir (project) name. + """ + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile(test_demo="def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == f"{pytester.path.name}/test_demo.py::test_one", report[ + "testCase" + ] + + def test_default_target_single_test_strips_param( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """A parametrized single test drops the ``[param]`` suffix from the key.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile( + test_demo=( + "import pytest\n@pytest.mark.parametrize('v', [12])\ndef test_p(step, v): pass\n" + ) + ) + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == f"{pytester.path.name}/test_demo.py::test_p", report[ + "testCase" + ] + + def test_default_target_single_file( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Multiple tests in one file -> the default target is that file (anchored).""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile(test_demo="def test_a(step): pass\ndef test_b(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=2) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == f"{pytester.path.name}/test_demo.py", report["testCase"] + + def test_default_target_multiple_files_common_dir( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Tests across several files -> the default target is their common directory (anchored).""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + suite = pytester.mkdir("suite") + (suite / "test_a.py").write_text("def test_a(step): pass\n") + (suite / "test_b.py").write_text("def test_b(step): pass\n") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=2) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == f"{pytester.path.name}/suite", report["testCase"] + + def test_default_target_whole_tree_is_project( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Tests spanning the rootdir -> the default target is the bare project name.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + # Two files directly under rootdir -> common path is rootdir itself. + pytester.makepyfile(test_a="def test_a(step): pass", test_b="def test_b(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=2) + report = _create_report_dict(log_path.read_text()) + assert report["testCase"] == pytester.path.name, report["testCase"] + + def test_env_overrides_toml( + self, + pytester: pytest.Pytester, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """An env var wins over a value set in ``[tool.sift.pytest.report]``.""" + log_path = tmp_path / "run.jsonl" + monkeypatch.setenv("SIFT_REPORT_SYSTEM_OPERATOR", "env-wins") + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + system_operator = "ci-bot" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + report = _create_report_dict(log_path.read_text()) + assert report["systemOperator"] == "env-wins" + + def test_metadata_table_typed_values( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``[tool.sift.pytest.report.metadata]`` keeps TOML types end-to-end.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report.metadata] + build_id = "v1.2.3" + lane = 2 + verbose = true + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + pairs = _metadata_pairs(_create_report_dict(log_path.read_text())) + assert pairs.get("build_id") == "v1.2.3" + # Ints and floats share the proto's numeric slot. + assert pairs.get("lane") == 2 + assert pairs.get("verbose") is True + # Auto-recorded keys still present alongside the typed entries. + assert "pytest_command" in pairs + + def test_loader_warns_on_bad_toml( + self, + tmp_path: Path, + recwarn: pytest.WarningsRecorder, + ) -> None: + """A malformed pyproject.toml emits a warning and the loader returns ``{}``. + + pytest itself aborts the session when its own ``pyproject.toml`` is + unparseable, so the loader's graceful warning path only matters when + the file is reachable via the loader's own discovery (e.g. an upward + walk in a monorepo). Exercise the loader directly here. + """ + from types import SimpleNamespace + + from sift_client._internal.pyproject_config import load_tool_sift + + bad = tmp_path / "pyproject.toml" + bad.write_text('[tool.sift]\ngrpc_uri = "unterminated\n') + fake_config = SimpleNamespace(inipath=bad, rootpath=tmp_path) + + result = load_tool_sift(fake_config) # type: ignore[arg-type] + + assert result == {} + messages = [str(w.message) for w in recwarn.list] + assert any("[tool.sift]" in m and "Failed to read" in m for m in messages), messages diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_report_name.py b/python/lib/sift_client/_tests/pytest_plugin/test_report_name.py new file mode 100644 index 000000000..5808c5a78 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_report_name.py @@ -0,0 +1,120 @@ +"""Tests for report display-name templating. + +The report ``name`` is rendered from a template set under +``[tool.sift.pytest.report] name`` and defaults to ``"{target} {timestamp}"``. +The full pytest invocation is preserved on the report's metadata under +``pytest_command``. These tests drive offline-mode inner sessions and inspect +the JSONL ``CreateTestReport`` line for the rendered values. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + from pathlib import Path + + import pytest + + +def _create_report_line(content: str) -> str: + """Return the ``[CreateTestReport:...]`` JSONL line from a log file.""" + for line in content.splitlines(): + if line.startswith("[CreateTestReport:"): + return line + raise AssertionError(f"no CreateTestReport line in log:\n{content}") + + +class TestReportName: + def test_toml_template( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``[tool.sift.pytest.report] name`` renders placeholders into the report name.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + name = "TomlReport-{count}" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + line = _create_report_line(log_path.read_text()) + assert '"name":"TomlReport-1"' in line, line + + def test_full_command_preserved_in_metadata( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """The full pytest invocation is stored on the report metadata.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + line = _create_report_line(log_path.read_text()) + assert '"pytest_command"' in line, line + # The recorded command reflects the actual invocation. + assert "--sift-offline" in line, line + + def test_git_placeholders_render_empty_outside_repo( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Git placeholders are recognized and render empty when not in a repo. + + The inner pytester session runs in a temp dir that is not a git + checkout, so ``{git_branch}`` resolves to an empty string rather than + triggering the unknown-placeholder fallback. + """ + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + name = "R-{git_branch}-{count}" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + combined = "\n".join(result.outlines + result.errlines) + assert "Invalid sift_report_name template" not in combined, combined + line = _create_report_line(log_path.read_text()) + assert '"name":"R--1"' in line, line + + def test_invalid_template_falls_back_and_warns( + self, + pytester: pytest.Pytester, + tmp_path: Path, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """An unknown placeholder warns and falls back without aborting the session.""" + log_path = tmp_path / "run.jsonl" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + name = "{nope}" + """ + ) + pytester.makepyfile("def test_one(step): pass") + result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}") + result.assert_outcomes(passed=1) + combined = "\n".join(result.outlines + result.errlines) + assert "Invalid sift_report_name template" in combined, combined + # The report is still created despite the bad template. + _create_report_line(log_path.read_text()) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py b/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py new file mode 100644 index 000000000..ba6fbf5a5 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py @@ -0,0 +1,39 @@ +"""Guard rail that pins the docs settings table to the ``_OPTIONS`` registry. + +If you add or change a setting in ``lib/sift_client/pytest_plugin.py`` without +regenerating the Markdown table in ``docs/guides/pytest_plugin/configuration.md``, +this test fails with the up-to-date block to paste in. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + import pytest + + +# python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py -> python/ +_REPO_PYTHON_DIR = Path(__file__).resolve().parents[4] +_DOCS_PATH = _REPO_PYTHON_DIR / "docs/guides/pytest_plugin/configuration.md" + + +def test_settings_reference_docs_in_sync(pytestconfig: pytest.Config) -> None: + """The Markdown table under '## Settings reference' matches the registry verbatim.""" + if not _DOCS_PATH.exists(): + import pytest + + pytest.skip(f"{_DOCS_PATH} not present in this checkout") + from sift_client.pytest_plugin import _render_settings_reference + + rendered = _render_settings_reference() + content = _DOCS_PATH.read_text() + if rendered not in content: + import pytest + + pytest.fail( + "Settings reference is out of sync with the _OPTIONS registry. Replace the " + "table under '## Settings reference' in " + "docs/guides/pytest_plugin/configuration.md with:\n\n" + rendered + ) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py b/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py new file mode 100644 index 000000000..ed7a92dc4 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py @@ -0,0 +1,113 @@ +"""Tests for the unknown-setting warnings fired in ``pytest_configure``. + +The plugin scans ``SIFT_*`` env vars and ``[tool.sift.pytest.*]`` keys at +session start and emits a ``SiftPytestPluginWarning`` for anything not +declared in the central ``_OPTIONS`` registry. A typo (`SIFT_REPORT_SERIALNUM` +instead of `SIFT_REPORT_SERIAL_NUMBER`) would otherwise silently no-op. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Callable + +if TYPE_CHECKING: + import pytest + + +class TestTypoDetector: + def test_unknown_env_var_warns( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """An unknown ``SIFT_*`` env var emits a warning with a closest-match hint.""" + monkeypatch.setenv("SIFT_REPORT_SERIALNUM", "SN-1") # missing underscore + write_plugin_conftest() + pytester.makepyfile("def test_runs(): pass") + result = pytester.runpytest_subprocess("--sift-disabled") + combined = "\n".join(result.outlines + result.errlines) + assert "Unknown SIFT_* env var `SIFT_REPORT_SERIALNUM`" in combined, combined + assert "did you mean `SIFT_REPORT_SERIAL_NUMBER`" in combined, combined + + def test_known_env_var_silent( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + monkeypatch: pytest.MonkeyPatch, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Declared env vars don't warn.""" + monkeypatch.setenv("SIFT_REPORT_SERIAL_NUMBER", "SN-1") + write_plugin_conftest() + pytester.makepyfile("def test_runs(): pass") + result = pytester.runpytest_subprocess("--sift-disabled") + combined = "\n".join(result.outlines + result.errlines) + assert "Unknown SIFT_*" not in combined, combined + + def test_unknown_toml_key_warns( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """An unknown ``[tool.sift.pytest.report]`` key warns with a suggestion.""" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report] + serial_numbr = "SN-1" + """ + ) + pytester.makepyfile("def test_runs(): pass") + result = pytester.runpytest_subprocess("--sift-disabled") + combined = "\n".join(result.outlines + result.errlines) + assert "Unknown sift config key" in combined, combined + assert "pytest.report.serial_numbr" in combined, combined + assert "did you mean" in combined, combined + assert "serial_number" in combined, combined + + def test_unknown_toml_outside_pytest_scope_silent( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """``[tool.sift.X]`` outside ``tool.sift.pytest`` is not the plugin's concern. + + Other Sift tools may use ``tool.sift.`` (the build-time + ``[tool.sift.extras]`` in this repo's own pyproject is one example); + the detector intentionally only walks ``tool.sift.pytest``. + """ + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.something_else] + anything = "goes" + """ + ) + pytester.makepyfile("def test_runs(): pass") + result = pytester.runpytest_subprocess("--sift-disabled") + combined = "\n".join(result.outlines + result.errlines) + assert "Unknown sift config key" not in combined, combined + + def test_metadata_subtree_keys_are_user_defined( + self, + pytester: pytest.Pytester, + clear_sift_env: None, + write_plugin_conftest: Callable[[], None], + ) -> None: + """Keys under ``[tool.sift.pytest.report.metadata]`` don't trigger warnings.""" + write_plugin_conftest() + pytester.makepyprojecttoml( + """ + [tool.sift.pytest.report.metadata] + anything_at_all = "value" + another_thing = 42 + """ + ) + pytester.makepyfile("def test_runs(): pass") + result = pytester.runpytest_subprocess("--sift-disabled") + combined = "\n".join(result.outlines + result.errlines) + assert "Unknown sift config key" not in combined, combined diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index ed2d71fb6..4341bf122 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -11,10 +11,12 @@ import pytest from sift_client import SiftClient, SiftConnectionConfig +from sift_client._internal.pyproject_config import load_tool_sift from sift_client.errors import SiftWarning from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext from sift_client.util.test_results.context_manager import ( + _git_metadata, _quiet_fork_stderr, format_assertion_message, format_truncated_traceback, @@ -191,9 +193,9 @@ def _build_hierarchy_chain( broadly so a misbehaving collector doesn't abort the whole collection phase — that frame's docstring just becomes ``None``. """ - include_package = bool(_option_or_ini(config, _PACKAGE_STEP)) - include_module = bool(_option_or_ini(config, _MODULE_STEP)) - include_class = bool(_option_or_ini(config, _CLASS_STEP)) + include_package = bool(_PACKAGE_STEP.resolve(config)) + include_module = bool(_MODULE_STEP.resolve(config)) + include_class = bool(_CLASS_STEP.resolve(config)) chain: list[tuple[str, str, str | None, bool]] = [] # ``node.parent`` is typed as the internal ``_pytest.nodes.Node`` which @@ -220,207 +222,423 @@ def _build_hierarchy_chain( return tuple(reversed(chain)) +# Settings-reference categories. Each maps to a docs subsection and, in the +# renderer, to the column subset that category actually uses. +_CAT_BEHAVIOR = "Pytest behavior" +_CAT_CONNECTION = "Connection" +_CAT_REPORT = "Report content" +_CATEGORIES = (_CAT_BEHAVIOR, _CAT_CONNECTION, _CAT_REPORT) + +_TOOL_SIFT_KEY = pytest.StashKey[dict]() + + +def _tool_sift(config: pytest.Config | None) -> dict[str, Any]: + """Session-cached ``[tool.sift]`` table. + + Every option that reads TOML, plus the typo detector, would otherwise + re-parse pyproject.toml on the session-start path — and re-emit the + malformed-file warning each time. Parse once per session via the config + stash; ``load_tool_sift`` stays the uncached parser for direct callers. + """ + if config is None: + return {} + cached = config.stash.get(_TOOL_SIFT_KEY, None) + if cached is None: + cached = load_tool_sift(config) + config.stash[_TOOL_SIFT_KEY] = cached + return cached + + @dataclass(frozen=True) class _Option: - """A single Sift plugin setting, registered as a CLI flag and/or an ini key. + """One setting and the logic to resolve it from wherever it can be set. + + A setting may be read from an env var, a CLI flag, a pytest ini key, or a + ``[tool.sift...]`` TOML path. :meth:`resolve` walks the declared surfaces in + env > cli > ini > toml order. ``metadata`` is the one exception: a free-form + TOML table (``merge=True``) resolved by :meth:`resolve_merged`. - ``ini_name`` is used as both the ini key and the CLI ``dest``, so a value - set either way lands on the same config slot. ``cli_flag=None`` makes the - option ini-only (e.g. the URI fallbacks). + One registry of these drives ``pytest_addoption``, the resolvers, the docs + settings-reference table, and the typo detector, so a setting is added or + changed in one place. + + Surface fields (declare only the ones a setting uses): + + - ``cli`` / ``cli_action``: CLI flag (e.g. ``"--sift-offline"``) and + argparse action; ``cli_dest`` is derived from the flag. + - ``ini`` / ``ini_type`` / ``ini_default``: pytest ini key under + ``[tool.pytest.ini_options]`` and its pytest type + default. + - ``toml``: tuple path under ``[tool.sift...]``, e.g. + ``("pytest", "report", "name")`` -> ``tool.sift.pytest.report.name``. + - ``env``: full env var name, e.g. ``"SIFT_API_KEY"``. + + ``category`` groups the option in the docs settings reference (one of + ``_CATEGORIES``). """ - ini_name: str - ini_help: str - cli_flag: str | None = None - cli_help: str | None = None - action: str | None = None + name: str + help: str + category: str + cli: str | None = None + cli_action: str | None = None + ini: str | None = None ini_type: str | None = None ini_default: Any = None + toml: tuple[str, ...] | None = None + env: str | None = None + merge: bool = False + + @property + def cli_dest(self) -> str: + """Argparse ``dest`` for the option. + + When the option has both a CLI flag and an ini key, the dest matches + the ini name so ``config.getoption(ini_name)`` returns the CLI value + (and falls through to ``config.getini(ini_name)`` when the flag wasn't + passed). Without an ini key, the dest derives from the flag name. + """ + if self.ini: + return self.ini + if self.cli is None: + return self.name + return self.cli.lstrip("-").replace("-", "_") + + def __post_init__(self) -> None: + if self.cli_action and not self.cli: + raise ValueError(f"_Option({self.name!r}): cli_action requires cli") + if self.ini_type and not self.ini: + raise ValueError(f"_Option({self.name!r}): ini_type requires ini") + if self.merge and not self.toml: + raise ValueError(f"_Option({self.name!r}): merge=True needs toml") + if not any([self.cli, self.ini, self.toml, self.env]): + raise ValueError(f"_Option({self.name!r}): declares no surfaces") + if self.category not in _CATEGORIES: + raise ValueError(f"_Option({self.name!r}): category must be one of {_CATEGORIES}") + + def resolve(self, config: pytest.Config | None) -> Any: + """First set value from declared surfaces; ``None`` when unset everywhere. + + Walk order is env > cli > ini > toml. No current option declares both + env and cli, so the chain isn't ambiguous in practice. + ``getini`` returns the typed default for unset bool/list keys, so this + only returns ini values for booleans (always meaningful), non-empty + strings, and non-empty lists. + """ + if self.env: + env_value = os.getenv(self.env) + if env_value not in (None, ""): + return env_value + if config is None: + return None + if self.cli: + cli_value = config.getoption(self.cli_dest, default=None) + if cli_value is not None: + return cli_value + if self.ini: + try: + ini_value = config.getini(self.ini) + except (KeyError, ValueError): + ini_value = None + if isinstance(ini_value, bool): + return ini_value + if isinstance(ini_value, str) and ini_value: + return ini_value + if isinstance(ini_value, list) and ini_value: + return ini_value + if self.toml: + toml_value = _walk_toml(_tool_sift(config), self.toml) + if toml_value not in (None, ""): + return toml_value + return None - + def resolve_merged(self, config: pytest.Config | None) -> dict[str, str | float | bool]: + """For ``merge=True`` dict-shape settings: the free-form TOML table. + + TOML values that don't fit ``dict[str, str | float | bool]`` (nested + tables, lists, ``None``) are dropped with a warning so a malformed + entry can't crash report creation. + """ + result: dict[str, str | float | bool] = {} + if config is not None and self.toml: + base = _walk_toml(_tool_sift(config), self.toml) + if isinstance(base, dict): + for key, value in base.items(): + if not isinstance(key, str): + continue + if isinstance(value, (bool, str, int, float)): + # ``bool`` first since ``isinstance(True, int)`` is True. + result[key] = value # type: ignore[assignment] + continue + warnings.warn( + f"[tool.sift.{'.'.join(self.toml)}] entry {key!r} ignored: " + f"unsupported type {type(value).__name__}.", + SiftPytestPluginWarning, + stacklevel=2, + ) + return result + + +def _walk_toml(data: dict[str, Any], path: tuple[str, ...]) -> Any: + """Walk a parsed TOML tree along ``path``; return None on any missing key.""" + cur: Any = data + for key in path: + if not isinstance(cur, dict): + return None + cur = cur.get(key) + if cur is None: + return None + return cur + + +# --------------------------------------------------------------------------- +# Settings registry. +# +# Add new options here. The registry drives `pytest_addoption`, resolution, +# the docs settings-reference table, and the unknown-key typo detector, so a +# setting is declared once instead of wired up in several places. +# +# Where each setting lives follows a few principles: +# - Secrets (the API key) come from environment variables only, never a +# committed file. +# - Pytest behavior lives in [tool.pytest.ini_options] so it integrates with +# `pytest --help` / `--co` / `--trace-config`. +# - Sift report content lives in [tool.sift.pytest.report.*]. +# - Non-secret endpoints take an env var plus one static home (ini or toml, +# not both). +# - A CLI flag is added only when there is a real per-run override workflow; +# stable project config stays in ini/toml. +# - Dynamic per-run values are injected via environment variables (pytest-dotenv +# loads .env for local dev; CI sets the same names from its secret store). +# --------------------------------------------------------------------------- + +# Pytest behavior. The CLI flag survives because the per-run override is real. _LOG_FILE = _Option( - cli_flag="--sift-log-file", - ini_name="sift_log_file", - cli_help="Path to write the Sift test result log file. " - "Use 'true' (default) to auto-create a temp file, " - "False, 'false', or 'none' to disable logging, " - "or a file path to write to a specific location.", - ini_help="Default value for --sift-log-file. Same values accepted as " - "the CLI flag (path, 'true', 'false', 'none').", + name="log_file", + category=_CAT_BEHAVIOR, + help="Path to the JSONL log of create/update calls (path | true | false | none).", + cli="--sift-log-file", + ini="sift_log_file", ) - _GIT_METADATA = _Option( - cli_flag="--no-sift-git-metadata", - ini_name="sift_git_metadata", - action="store_false", - cli_help="Exclude git metadata from the Sift test results. " - "Git metadata (repo, branch, commit) is included by default.", - ini_help="Include git repo/branch/commit in the report (true/false). " - "Defaults to true. The --no-sift-git-metadata CLI flag overrides " - "this when passed.", + name="git_metadata", + category=_CAT_BEHAVIOR, + help="Capture git repo/branch/commit on the report.", + cli="--no-sift-git-metadata", + cli_action="store_false", + ini="sift_git_metadata", ini_type="bool", ini_default=True, ) - _OFFLINE = _Option( - cli_flag="--sift-offline", - ini_name="sift_offline", - action="store_true", - cli_help="Run without contacting Sift. All create/update calls are written " - "to a JSONL log file for later replay via `import-test-result-log`. " - "No session-start ping is attempted.", - ini_help="When true, run in offline mode (same effect as --sift-offline). Defaults to false.", + name="offline", + category=_CAT_BEHAVIOR, + help="Skip the session-start ping; route create/update through the JSONL log.", + cli="--sift-offline", + cli_action="store_true", + ini="sift_offline", ini_type="bool", ini_default=False, ) - _DISABLED = _Option( - cli_flag="--sift-disabled", - ini_name="sift_disabled", - action="store_true", - cli_help="Disable Sift integration entirely. Nothing contacts the API " - "and no log file is written. `step.measure(...)` still returns real " - "pass/fail booleans. Returned entities expose `is_simulated == True`. " - "Also honored via the `SIFT_DISABLED` env var. Supersedes every other " - "flag.", - ini_help="When true, run in disabled mode (same effect as --sift-disabled). " - "Also honored via the SIFT_DISABLED env var. Supersedes every other " - "setting. Defaults to false.", + name="disabled", + category=_CAT_BEHAVIOR, + help="Disable Sift entirely (no API calls, no log file). Supersedes --sift-offline.", + cli="--sift-disabled", + cli_action="store_true", + ini="sift_disabled", ini_type="bool", ini_default=False, ) -_GRPC_URI = _Option( - ini_name="sift_grpc_uri", - ini_help="Sift gRPC endpoint URI. The default `sift_client` fixture " - "prefers the SIFT_GRPC_URI environment variable and falls back to " - "this ini value.", -) - -_REST_URI = _Option( - ini_name="sift_rest_uri", - ini_help="Sift REST endpoint URI. The default `sift_client` fixture " - "prefers the SIFT_REST_URI environment variable and falls back to " - "this ini value.", -) - -_REPORT_URL_BASE = _Option( - cli_flag="--sift-report-url-base", - ini_name="sift_report_url_base", - cli_help="Sift web-app origin used to build the clickable report link in the " - "terminal footer (e.g. https://app.siftstack.com). Set this for on-prem or " - "custom deployments whose API host can't be mapped to a frontend " - "automatically. Also honored via the SIFT_APP_URL env var. When unset, the " - "link is derived from the REST URI for known Sift hosts.", - ini_help="Default for --sift-report-url-base. The Sift web-app origin used to " - "build the report link in the terminal footer. Also honored via the " - "SIFT_APP_URL env var. When unset, the link is derived from the REST URI for " - "known Sift hosts.", -) - _OPEN = _Option( - cli_flag="--sift-open-report", - ini_name="sift_open_report", - action="store_true", - cli_help="Open the resulting Sift test report in a browser at session end. " - "Online mode only; no-op when the report URL can't be resolved. Intended for " - "local development.", - ini_help="When true, open the report in a browser at session end (online only). " - "Defaults to false.", + name="open_report", + category=_CAT_BEHAVIOR, + help="Open the resulting report in a browser at session end (online only; " + "no-op when the report URL can't be resolved).", + cli="--sift-open-report", + cli_action="store_true", + ini="sift_open_report", ini_type="bool", ini_default=False, ) +# Pytest behavior: set-once project defaults (no CLI flag — no per-run override). _AUTOUSE = _Option( - ini_name="sift_autouse", - ini_help="Default for the Sift autouse fixtures (report_context, step, " - "_hierarchy_parents, _parametrize_parents). When true (default), tests " - "are included unless marked with @pytest.mark.sift_exclude. When false, " - "tests are skipped unless marked with @pytest.mark.sift_include. " - "Bulk-apply markers in a directory's conftest via " - "`pytest_collection_modifyitems`.", + name="autouse", + category=_CAT_BEHAVIOR, + help="Default for the Sift autouse fixtures (report_context, step, hierarchy/parametrize parents).", + ini="sift_autouse", ini_type="bool", ini_default=True, ) - _PACKAGE_STEP = _Option( - ini_name="sift_package_step", - ini_help="When true (default), open a parent step for each Python package " - "(directory with an ``__init__.py``) in the test path. Set to false to " - "flatten package grouping.", + name="package_step", + category=_CAT_BEHAVIOR, + help="Open a parent step for each Python package in the test path.", + ini="sift_package_step", ini_type="bool", ini_default=True, ) - _MODULE_STEP = _Option( - ini_name="sift_module_step", - ini_help="When true (default), open a per-module parent step. Set to false " - "to skip module-level grouping in the report tree.", + name="module_step", + category=_CAT_BEHAVIOR, + help="Open a parent step for each test module.", + ini="sift_module_step", ini_type="bool", ini_default=True, ) - _CLASS_STEP = _Option( - ini_name="sift_class_step", - ini_help="When true (default), open per-class parent steps (including nested " - "classes). Set to false to keep class methods at module level.", + name="class_step", + category=_CAT_BEHAVIOR, + help="Open per-class parent steps, including nested classes.", + ini="sift_class_step", ini_type="bool", ini_default=True, ) - _PARAMETRIZE_NESTING = _Option( - ini_name="sift_parametrize_nesting", - ini_help="When true (default), parametrized tests nest under shared parent " - "steps (e.g. test_a -> v=1, v=2). Set to false to keep the flat per-test " - "leaf naming (test_a[1], test_a[2]).", + name="parametrize_nesting", + category=_CAT_BEHAVIOR, + help="Cluster parametrized tests under shared parent steps (e.g. test_a -> v=1, v=2).", + ini="sift_parametrize_nesting", ini_type="bool", ini_default=True, ) +# Credentials. The API key is env-only; the URIs accept env + ini. +_API_KEY = _Option( + name="api_key", + category=_CAT_CONNECTION, + help="Sift API key (secret, env-only).", + env="SIFT_API_KEY", +) +_GRPC_URI = _Option( + name="grpc_uri", + category=_CAT_CONNECTION, + help="Sift gRPC endpoint URI.", + env="SIFT_GRPC_URI", + ini="sift_grpc_uri", +) +_REST_URI = _Option( + name="rest_uri", + category=_CAT_CONNECTION, + help="Sift REST endpoint URI.", + env="SIFT_REST_URI", + ini="sift_rest_uri", +) +_APP_URL = _Option( + name="app_url", + category=_CAT_CONNECTION, + help="Sift web-app origin for the report link in the terminal footer (e.g. " + "https://app.siftstack.com). When unset, the link is derived from the REST URI " + "for known Sift hosts.", + env="SIFT_APP_URL", + ini="sift_app_url", +) + +# Report content. Project defaults in [tool.sift.pytest.report]; CI injects +# per-run values via SIFT_REPORT_* env vars (pytest-dotenv handles .env files +# for local dev). +_REPORT_NAME = _Option( + name="report_name", + category=_CAT_REPORT, + help="Template for the report display name. Placeholders: {target}, {command}, {args}, " + "{rootdir}, {timestamp}, {count}, {git_repo}, {git_branch}, {git_commit}.", + toml=("pytest", "report", "name"), +) +_TEST_CASE = _Option( + name="test_case", + category=_CAT_REPORT, + help="Template for the report's test_case field (same placeholders as report_name).", + toml=("pytest", "report", "test_case"), +) +_TEST_SYSTEM_NAME = _Option( + name="test_system_name", + category=_CAT_REPORT, + help="Name of the test system / rig. Defaults to the host's name.", + env="SIFT_REPORT_TEST_SYSTEM_NAME", + toml=("pytest", "report", "test_system_name"), +) +_SYSTEM_OPERATOR = _Option( + name="system_operator", + category=_CAT_REPORT, + help="Operator running the test. Defaults to the OS user.", + env="SIFT_REPORT_SYSTEM_OPERATOR", + toml=("pytest", "report", "system_operator"), +) +_SERIAL_NUMBER = _Option( + name="serial_number", + category=_CAT_REPORT, + help="Serial number of the unit under test.", + env="SIFT_REPORT_SERIAL_NUMBER", + toml=("pytest", "report", "serial_number"), +) +_PART_NUMBER = _Option( + name="part_number", + category=_CAT_REPORT, + help="Part number of the unit under test.", + env="SIFT_REPORT_PART_NUMBER", + toml=("pytest", "report", "part_number"), +) +_METADATA = _Option( + name="metadata", + category=_CAT_REPORT, + help="Free-form report metadata, as a TOML table of scalar values. For " + "dynamic per-run keys, attach them in conftest via the report_context fixture.", + toml=("pytest", "report", "metadata"), + merge=True, +) + _OPTIONS: tuple[_Option, ...] = ( _LOG_FILE, _GIT_METADATA, _OFFLINE, _DISABLED, - _GRPC_URI, - _REST_URI, - _REPORT_URL_BASE, _OPEN, _AUTOUSE, _PACKAGE_STEP, _MODULE_STEP, _CLASS_STEP, _PARAMETRIZE_NESTING, + _API_KEY, + _GRPC_URI, + _REST_URI, + _APP_URL, + _REPORT_NAME, + _TEST_CASE, + _TEST_SYSTEM_NAME, + _SYSTEM_OPERATOR, + _SERIAL_NUMBER, + _PART_NUMBER, + _METADATA, ) def pytest_addoption(parser: pytest.Parser) -> None: - """Register Sift-specific command-line options and ini keys. + """Register every CLI flag and pytest ini key declared in ``_OPTIONS``. - Each option can be set on the command line or under ``[tool.pytest.ini_options]`` - in ``pyproject.toml`` (or ``[pytest]`` in ``pytest.ini``). CLI values take - precedence over ini values, which take precedence over the built-in default. + One loop drives both surfaces — adding a setting is one entry in the + registry, not three edits across this function and a docs table. """ group = parser.getgroup("sift", description="Sift test results") for opt in _OPTIONS: - if opt.cli_flag is not None: + if opt.cli is not None: cli_kwargs: dict[str, Any] = { - "dest": opt.ini_name, + "dest": opt.cli_dest, "default": None, - "help": opt.cli_help, + "help": opt.help, } - if opt.action is not None: - cli_kwargs["action"] = opt.action - group.addoption(opt.cli_flag, **cli_kwargs) - - ini_kwargs: dict[str, Any] = {"help": opt.ini_help, "default": opt.ini_default} - if opt.ini_type is not None: - ini_kwargs["type"] = opt.ini_type - parser.addini(opt.ini_name, **ini_kwargs) + if opt.cli_action is not None: + cli_kwargs["action"] = opt.cli_action + group.addoption(opt.cli, **cli_kwargs) + if opt.ini is not None: + ini_kwargs: dict[str, Any] = {"help": opt.help, "default": opt.ini_default} + if opt.ini_type is not None: + ini_kwargs["type"] = opt.ini_type + parser.addini(opt.ini, **ini_kwargs) def pytest_configure(config: pytest.Config) -> None: - """Register the Sift gate markers so they show up in `pytest --markers`.""" + """Register the Sift gate markers and warn on unknown ``SIFT_*`` settings.""" config.addinivalue_line( "markers", "sift_include: force the Sift autouse fixtures to activate for this test " @@ -431,6 +649,158 @@ def pytest_configure(config: pytest.Config) -> None: "sift_exclude: force the Sift autouse fixtures to skip this test " "regardless of the `sift_autouse` ini default.", ) + # Surface typos in env vars and [tool.sift...] keys at session start so a + # silent no-op (env var that doesn't match anything, table key the loader + # ignores) becomes visible. The registry is the source of truth for what's + # known. + _warn_on_unknown_env_vars() + _warn_on_unknown_toml_keys(config) + + +def _render_settings_reference() -> str: + """Render the Markdown settings reference from ``_OPTIONS``. + + One ``### `` subsection per category, each table showing only the + columns that category uses (so no dead all-``—`` columns). The plugin docs + at ``docs/guides/pytest_plugin/configuration.md`` embed this output verbatim + so the registry and the docs can't drift; + ``test_settings_reference_docs_in_sync`` is the guard rail. Regenerate with:: + + uv run python -c "from sift_client.pytest_plugin import _render_settings_reference; print(_render_settings_reference())" + """ + + def _cli_cell(opt: _Option) -> str: + return f"`{opt.cli}`" if opt.cli else "—" + + def _ini_cell(opt: _Option) -> str: + return f"`{opt.ini}`" if opt.ini else "—" + + def _toml_cell(opt: _Option) -> str: + if not opt.toml: + return "—" + if opt.merge: + return f"`[tool.sift.{'.'.join(opt.toml)}]` (table)" + section = ".".join(opt.toml[:-1]) + return f"`[tool.sift.{section}] {opt.toml[-1]}`" + + def _env_cell(opt: _Option) -> str: + if opt.env: + return f"`{opt.env}`" + return "—" + + # Per-category column layout: only the surfaces that category actually uses. + # Each column is (header, cell-renderer). + columns_by_category = { + _CAT_BEHAVIOR: [ + ("CLI flag", _cli_cell), + ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), + ], + _CAT_CONNECTION: [ + ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), + ("Env var", _env_cell), + ], + _CAT_REPORT: [ + ("TOML (`[tool.sift...]`)", _toml_cell), + ("Env var", _env_cell), + ], + } + + def _escape(cell: str) -> str: + # Literal pipes inside a Markdown table cell need backslash escaping or + # they'd be parsed as column separators. + return cell.replace("|", "\\|") + + blocks: list[str] = [] + for category in _CATEGORIES: + opts = [o for o in _OPTIONS if o.category == category] + if not opts: + continue + columns = columns_by_category[category] + headers = ["Setting", *(h for h, _ in columns)] + lines = [ + f"### {category}", + "", + "| " + " | ".join(headers) + " |", + "|" + "|".join(["---"] * len(headers)) + "|", + ] + for opt in opts: + cells = [opt.help, *(render(opt) for _, render in columns)] + lines.append("| " + " | ".join(_escape(c) for c in cells) + " |") + blocks.append("\n".join(lines)) + return "\n\n".join(blocks) + + +def _warn_on_unknown_env_vars() -> None: + """Emit a warning for any ``SIFT_*`` env var not declared in the registry. + + The registry declares each env var by its full name (``opt.env``); a + ``SIFT_*`` var that matches none of them is almost always a typo. + """ + import difflib + + known_full = {opt.env for opt in _OPTIONS if opt.env} + suggestion_pool = sorted(known_full) + for name in sorted(os.environ): + if not name.startswith("SIFT_"): + continue + if name in known_full: + continue + close = difflib.get_close_matches(name, suggestion_pool, n=1, cutoff=0.6) + hint = f" (did you mean `{close[0]}`?)" if close else "" + warnings.warn( + f"Unknown SIFT_* env var `{name}`{hint}; ignored.", + SiftPytestPluginWarning, + stacklevel=2, + ) + + +def _warn_on_unknown_toml_keys(config: pytest.Config) -> None: + """Walk ``[tool.sift.pytest.*]`` in pyproject.toml and warn on keys outside the registry. + + Only the ``tool.sift.pytest`` subtree is checked. Other ``tool.sift.*`` + subtrees are reserved for non-pytest Sift tooling (e.g. ``tool.sift.extras`` + is consumed by this repo's extras-generation script) and aren't our + concern. Free-form subtrees (``merge=True`` options like ``metadata``) + stop the walk — their keys are user-defined and not validated. + """ + import difflib + + data = _tool_sift(config) + pytest_table = (data or {}).get("pytest") + if not isinstance(pytest_table, dict): + return + # Build leaf/free-form/prefix sets relative to the ``("pytest", ...)`` root + # the registry already uses, so the walk runs on the table we just sliced. + leaves = {opt.toml for opt in _OPTIONS if opt.toml and not opt.merge} + free_form = {opt.toml for opt in _OPTIONS if opt.toml and opt.merge} + prefixes: set[tuple[str, ...]] = set() + for full in leaves | free_form: + for i in range(len(full)): + prefixes.add(full[:i]) + + def _walk(node: Any, base: tuple[str, ...]) -> None: + if base in free_form or not isinstance(node, dict): + return + for key, value in node.items(): + path = (*base, str(key)) + if path in leaves or path in free_form: + continue + if path in prefixes: + _walk(value, path) + continue + full_name = "tool.sift." + ".".join(path) + same_depth = [ + ".".join(p) for p in (leaves | free_form | prefixes) if len(p) == len(path) + ] + close = difflib.get_close_matches(".".join(path), same_depth, n=1, cutoff=0.6) + hint = f" (did you mean `tool.sift.{close[0]}`?)" if close else "" + warnings.warn( + f"Unknown sift config key `{full_name}`{hint}; ignored.", + SiftPytestPluginWarning, + stacklevel=2, + ) + + _walk(pytest_table, ("pytest",)) def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: @@ -472,13 +842,11 @@ def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: def _is_offline(pytestconfig: pytest.Config | None) -> bool: - return bool(_option_or_ini(pytestconfig, _OFFLINE)) + return bool(_OFFLINE.resolve(pytestconfig)) def _is_disabled(pytestconfig: pytest.Config | None) -> bool: - if bool(_option_or_ini(pytestconfig, _DISABLED)): - return True - return os.getenv("SIFT_DISABLED", "").lower() in ("1", "true", "yes") + return bool(_DISABLED.resolve(pytestconfig)) def _sdk_version() -> str: @@ -692,7 +1060,7 @@ def pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: pyte config.stash[SIFT_REPORT_ID_STASH_KEY] = report_id if report_url is not None: config.stash[SIFT_REPORT_URL_STASH_KEY] = report_url - if _option_or_ini(config, _OPEN): + if _OPEN.resolve(config): _maybe_open_report(report_url) if quiet: @@ -768,7 +1136,7 @@ def pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: pyte _sift_kv( terminalreporter, "Report", - f"id {report_id} (set sift_report_url_base for a clickable link)", + f"id {report_id} (set sift_app_url for a clickable link)", ) if report_id and getattr(context, "replay_incomplete", False) and log_file is not None: @@ -793,24 +1161,6 @@ def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bo return default -def _option_or_ini(pytestconfig: pytest.Config | None, opt: _Option) -> Any: - """Resolve a Sift plugin setting from CLI > ini > None. - - The ``addoption`` registrations use ``default=None`` so we can tell whether - the CLI was actually used. When the CLI didn't set a value, fall back to - the matching ``addini`` key. - """ - if pytestconfig is None: - return None - cli = pytestconfig.getoption(opt.ini_name, default=None) - if cli is not None: - return cli - try: - return pytestconfig.getini(opt.ini_name) - except (KeyError, ValueError): - return None - - def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | None: """Determine log_file value from CLI flag or ini key. @@ -828,7 +1178,7 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | Rejects ``--sift-log-file=none`` combined with ``--sift-offline`` since offline mode needs the log file as its sole sink. """ - raw = _option_or_ini(pytestconfig, _LOG_FILE) + raw = _LOG_FILE.resolve(pytestconfig) disabled = raw is False or (isinstance(raw, str) and raw.lower() in ("false", "none")) if disabled and _is_offline(pytestconfig): raise pytest.UsageError( @@ -1007,19 +1357,154 @@ def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): _finalize_after_teardown(item, report) +def _relativize(path: Path, rootpath: Path) -> str: + """Path relative to rootdir, or the basename when it sits outside the tree.""" + try: + rel = str(path.relative_to(rootpath)) + except ValueError: + return path.name + return "" if rel == "." else rel + + +def _strip_param(nodeid: str) -> str: + """Drop the trailing ``[param]`` from a nodeid, keeping ``file::Class::func``. + + The parametrize id is a variation of the test, not its identity — leaving it + in would make a re-parametrization silently shift the grouping key. Splits on + the last ``::`` segment and cuts at its first ``[``; class/function names + never contain ``[``, so nested brackets in a param value can't confuse it. + """ + head, sep, leaf = nodeid.rpartition("::") + leaf = leaf.split("[", 1)[0] + return f"{head}{sep}{leaf}" + + +def _derive_target(request: pytest.FixtureRequest, args: tuple[str, ...]) -> str: + """Describe what was run, from the collected items rather than the command line. + + Collection is the ground truth of selection — independent of flag order, + ``-k`` / ``-m`` filters, or which path form was typed. Every value is + anchored to the rootdir (project) name so the shape is uniform; granularity + narrows with the selection: + + * a single test -> ``project/tests/test_motor.py::test_spin`` (param stripped) + * a single file -> ``project/tests/test_motor.py`` + * many files -> their common directory, ``project/tests/motor`` + * whole tree / nothing collected / paths outside rootdir -> ``project`` + + The report is session-level and individual tests are its steps, so the + file/directory grain is the natural unit of "what ran" for the report + itself. The verbatim invocation stays available via ``{command}`` and the + ``pytest_command`` metadata key. + """ + rootpath = request.config.rootpath + root = rootpath.name + + def _anchor(rel: str) -> str: + return f"{root}/{rel}" if rel else root + + items = list(getattr(request.session, "items", ()) or ()) + if not items: + return root + if len(items) == 1: + return _anchor(_strip_param(items[0].nodeid)) + paths = {p for p in (getattr(i, "path", None) for i in items) if p is not None} + if not paths: + return root + if len(paths) == 1: + return _anchor(_relativize(next(iter(paths)), rootpath)) + try: + common = Path(os.path.commonpath([str(p) for p in paths])) + except ValueError: + # e.g. paths on different drives (Windows); fall back to the project. + return root + return _anchor(_relativize(common, rootpath)) + + +def _build_template_fields( + target: str, + command: str, + args: tuple[str, ...], + request: pytest.FixtureRequest, +) -> dict[str, Any]: + """Build the placeholder mapping shared by the name and test_case templates.""" + items = getattr(request.session, "items", ()) or () + git = _git_metadata() or {} + return { + "target": target, + "command": command, + "args": " ".join(args), + "rootdir": request.config.rootpath.name, + "timestamp": datetime.now(timezone.utc).isoformat(), + "count": len(items), + "git_repo": git.get("git_repo", ""), + "git_branch": git.get("git_branch", ""), + "git_commit": git.get("git_commit", ""), + } + + +def _format_template( + template: str, + fields: dict[str, Any], + *, + fallback: str, + option_label: str, +) -> str: + """Format ``template`` with ``fields``; on bad input, warn and return ``fallback``. + + A bad template should never block test results from being recorded, so the + rendering errors collapse to a warning + fallback rather than aborting the + session. + """ + try: + return template.format(**fields) + except (KeyError, IndexError, ValueError) as exc: + warnings.warn( + f"Invalid {option_label} template {template!r} ({exc}); using fallback.", + SiftPytestPluginWarning, + stacklevel=2, + ) + return fallback + + def _report_context_impl( sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config | None = None, ) -> Generator[ReportContext, None, None]: args = request.config.invocation_params.args - test_path = Path(args[0]) if args else None - if test_path is not None and test_path.exists(): - base_name = test_path.name - test_case: Path | str = test_path - else: - base_name = "pytest " + " ".join(args) if args else "pytest" - test_case = base_name + # ``target`` is "what ran", derived from the collected items (see + # _derive_target) — invocation-independent, unlike parsing the command + # line. Both the display name and test_case default to it; the verbatim + # command stays available via {command} and the pytest_command metadata. + target = _derive_target(request, args) + command = "pytest " + " ".join(args) if args else "pytest" + fields = _build_template_fields(target, command, args, request) + name_template = _REPORT_NAME.resolve(pytestconfig) or "{target} {timestamp}" + name = _format_template( + name_template, + fields, + fallback=f"{target} {fields['timestamp']}", + option_label="sift_report_name", + ) + test_case_template = _TEST_CASE.resolve(pytestconfig) + test_case = ( + _format_template( + test_case_template, + fields, + fallback=target, + option_label="sift_test_case", + ) + if test_case_template + else target + ) + # Metadata starts from the [tool.sift.pytest.report.metadata] TOML table, and + # the auto-recorded pytest_command layers in last so the user can't + # accidentally overwrite it. + report_metadata: dict[str, str | float | bool] = { + **_METADATA.resolve_merged(pytestconfig), + "pytest_command": command, + } # Mode → ReportContext flags: # online (default): log_file=, replay_log_file=True # --sift-offline: log_file=, replay_log_file=False @@ -1027,15 +1512,19 @@ def _report_context_impl( disabled = sift_client._simulate offline = False if disabled else _is_offline(pytestconfig) log_file: str | Path | bool | None = False if disabled else _resolve_log_file(pytestconfig) - git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA) - include_git_metadata = True if git_metadata is None else bool(git_metadata) + include_git_metadata = bool(_GIT_METADATA.resolve(pytestconfig)) with ReportContext( sift_client, - name=f"{base_name} {datetime.now(timezone.utc).isoformat()}", - test_case=str(test_case), + name=name, + test_case=test_case, + test_system_name=_TEST_SYSTEM_NAME.resolve(pytestconfig) or None, + system_operator=_SYSTEM_OPERATOR.resolve(pytestconfig) or None, + serial_number=_SERIAL_NUMBER.resolve(pytestconfig) or None, + part_number=_PART_NUMBER.resolve(pytestconfig) or None, log_file=log_file, include_git_metadata=include_git_metadata, replay_log_file=not (disabled or offline), + metadata=report_metadata, ) as context: global REPORT_CONTEXT REPORT_CONTEXT = context @@ -1054,12 +1543,6 @@ def _report_context_impl( _drain_hierarchy_stack() -_CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = ( - ("SIFT_API_KEY", None), # env-only; never read from ini to keep secrets out of source control. - ("SIFT_GRPC_URI", _GRPC_URI), - ("SIFT_REST_URI", _REST_URI), -) - # Placeholder credentials used in --sift-offline mode when env/ini values # are missing. Offline mode never makes network calls, so the values are # only syntactically required by SiftConnectionConfig. @@ -1088,19 +1571,6 @@ def _build_disabled_client() -> SiftClient: return client -def _resolve_credential( - pytestconfig: pytest.Config | None, env_name: str, opt: _Option | None -) -> str | None: - """Resolve a Sift credential: env var first, then ini key (if registered), else None.""" - env_value = os.getenv(env_name) - if env_value: - return env_value - if opt is None or pytestconfig is None: - return None - ini_value = pytestconfig.getini(opt.ini_name) - return ini_value if isinstance(ini_value, str) and ini_value else None - - @pytest.fixture(scope="session") def sift_client(pytestconfig: pytest.Config) -> SiftClient: """Default ``SiftClient`` resolved from environment variables and ini keys. @@ -1126,33 +1596,34 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: """ if _is_disabled(pytestconfig): return _build_disabled_client() - resolved = {env: _resolve_credential(pytestconfig, env, opt) for env, opt in _CREDENTIAL_KEYS} + resolved = { + "SIFT_API_KEY": _API_KEY.resolve(pytestconfig), + "SIFT_GRPC_URI": _GRPC_URI.resolve(pytestconfig), + "SIFT_REST_URI": _REST_URI.resolve(pytestconfig), + } missing = [env for env, value in resolved.items() if not value] if missing and not _is_offline(pytestconfig): raise pytest.UsageError( "Sift credentials missing: " + ", ".join(missing) + ". Set the environment variable(s) — pytest-dotenv loads them " - "from a `.env` file automatically — or set the URIs via " - "`sift_grpc_uri` / `sift_rest_uri` under `[tool.pytest.ini_options]` " + "from a `.env` file automatically — or set the URIs under " + "`sift_grpc_uri` / `sift_rest_uri` in `[tool.pytest.ini_options]` " "in pyproject.toml, or override the sift_client fixture in your " "conftest.py, or pass --sift-offline / --sift-disabled to run " "without contacting Sift." ) for env in missing: resolved[env] = _OFFLINE_DEFAULTS[env] - # Web-app origin for the report link: the sift_report_url_base CLI/ini option - # wins, then the SIFT_APP_URL env var, else host-based derivation in - # SiftClient.app_url. - report_url_base = _option_or_ini(pytestconfig, _REPORT_URL_BASE) or os.getenv("SIFT_APP_URL") - # `or ""` is unreachable in practice since the `missing` check above guarantees - # non-None values + # Web-app origin for the report link: the SIFT_APP_URL env var wins, then the + # sift_app_url ini key, else host-based derivation in SiftClient.app_url. + app_url = _APP_URL.resolve(pytestconfig) return SiftClient( connection_config=SiftConnectionConfig( - api_key=resolved.get("SIFT_API_KEY") or "", - grpc_url=resolved.get("SIFT_GRPC_URI") or "", - rest_url=resolved.get("SIFT_REST_URI") or "", - app_url=report_url_base or None, + api_key=resolved["SIFT_API_KEY"] or "", + grpc_url=resolved["SIFT_GRPC_URI"] or "", + rest_url=resolved["SIFT_REST_URI"] or "", + app_url=app_url or None, ) ) @@ -1223,7 +1694,7 @@ def _step_impl( # by ``_parametrize_parents``. When parametrize-nesting is disabled, fall # back to the bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf # remains uniquely identifiable. - if _option_or_ini(request.config, _PARAMETRIZE_NESTING): + if _PARAMETRIZE_NESTING.resolve(request.config): path = node.stash.get(_PARAMETRIZE_PATH_KEY, ()) name = path[-1] if path else str(node.name) else: @@ -1261,7 +1732,7 @@ def _hierarchy_parents( Gated off when the item is excluded (avoids eager ``report_context`` setup). """ - default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + default = bool(_AUTOUSE.resolve(pytestconfig)) if not _sift_enabled_for(request.node, default): return None # Fall back to computing the chain on-demand for items that bypassed @@ -1343,10 +1814,10 @@ def _parametrize_parents( diff against a subsequent test's chain pops them, or until ``pytest_sessionfinish`` drains anything left at session end. """ - default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + default = bool(_AUTOUSE.resolve(pytestconfig)) if not _sift_enabled_for(request.node, default): return None - if not _option_or_ini(pytestconfig, _PARAMETRIZE_NESTING): + if not _PARAMETRIZE_NESTING.resolve(pytestconfig): return None # Fall back to on-demand computation for dynamically-inserted items; # see _hierarchy_parents for the same rationale. @@ -1401,7 +1872,7 @@ def step( ``SiftClient(_simulate=True)`` placeholder, so every write returns a synthesized response without contacting Sift. """ - default = bool(_option_or_ini(pytestconfig, _AUTOUSE)) + default = bool(_AUTOUSE.resolve(pytestconfig)) if not _sift_enabled_for(request.node, default): yield None return diff --git a/python/lib/sift_client/sift_types/_mixins/metadata.py b/python/lib/sift_client/sift_types/_mixins/metadata.py new file mode 100644 index 000000000..b53fa5dce --- /dev/null +++ b/python/lib/sift_client/sift_types/_mixins/metadata.py @@ -0,0 +1,19 @@ +"""Placeholder for a future ``MetadataMixin`` (not yet implemented). + +TODO(metadata-mixin): metadata updates REPLACE the whole map. +``entity.update({"metadata": {...}})`` builds a field mask over ``metadata`` +(see ``ModelUpdate.to_proto_with_mask`` in ``sift_types/_base.py``) and replaces +it server-side — callers must spread the current ``.metadata`` first or silently +drop existing keys (config defaults, git fields, ``pytest_command``). + +Planned shape: a ``MetadataMixin`` exposing a read-merge-write helper such as +``add_metadata(**kv)`` / ``merge_metadata(dict)``, implemented as +``self.update({"metadata": {**self.metadata, **kv}})``. Mix into every read +entity that carries a ``metadata`` field — ``Asset``, ``Run``, ``Report``, +``TestReport``, ``TestStep``, ``TestMeasurement`` — alongside +``FileAttachmentsMixin`` and ``SimulatedMixin``. It stays a mixin (not a +``BaseType`` method) because it relies on the ``metadata`` field, which not +every ``BaseType`` subclass has (e.g. ``CalculatedChannel`` exposes metadata +only on its Create/Update models, so it is out of scope). Until it exists, +merge at the call site. +""" diff --git a/python/lib/sift_client/sift_types/asset.py b/python/lib/sift_client/sift_types/asset.py index 78217934f..ea0895929 100644 --- a/python/lib/sift_client/sift_types/asset.py +++ b/python/lib/sift_client/sift_types/asset.py @@ -27,6 +27,8 @@ class Asset(BaseType[AssetProto, "Asset"], FileAttachmentsMixin): modified_date: datetime modified_by_user_id: str tags: list[str | Tag] + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] is_archived: bool diff --git a/python/lib/sift_client/sift_types/report.py b/python/lib/sift_client/sift_types/report.py index 42f349f42..34f64e2f1 100644 --- a/python/lib/sift_client/sift_types/report.py +++ b/python/lib/sift_client/sift_types/report.py @@ -108,6 +108,8 @@ class Report(BaseType[ReportProto, "Report"]): summaries: list[ReportRuleSummary] tags: list[str] rerun_from_report_id: str | None = None + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] job_id: str archived_date: datetime | None = None diff --git a/python/lib/sift_client/sift_types/run.py b/python/lib/sift_client/sift_types/run.py index ec6690896..e91225342 100644 --- a/python/lib/sift_client/sift_types/run.py +++ b/python/lib/sift_client/sift_types/run.py @@ -40,6 +40,8 @@ class Run(BaseType[RunProto, "Run"], FileAttachmentsMixin): created_by_user_id: str modified_by_user_id: str organization_id: str + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] tags: list[str] asset_ids: list[str] diff --git a/python/lib/sift_client/sift_types/test_report.py b/python/lib/sift_client/sift_types/test_report.py index dd786b02d..b8b1f2236 100644 --- a/python/lib/sift_client/sift_types/test_report.py +++ b/python/lib/sift_client/sift_types/test_report.py @@ -167,6 +167,8 @@ class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin, Simula start_time: datetime end_time: datetime error_info: ErrorInfo | None = None + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] | None = None # Set by the resource layer when this instance was produced from a logging-mode call _log_file: str | Path | None = None @@ -402,6 +404,8 @@ class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"], Simulat passed: bool timestamp: datetime description: str | None = None + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] | None = None channel_names: list[str] | None = None @@ -645,6 +649,8 @@ class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin, test_case: str start_time: datetime end_time: datetime + # NOTE: update() replaces this map wholesale. See TODO(metadata-mixin) in + # sift_types/_mixins/metadata.py before adding keys at runtime. metadata: dict[str, str | float | bool] serial_number: str | None = None part_number: str | None = None diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 41066b247..4b2e2ab9d 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -169,9 +169,12 @@ def __init__( test_system_name: str | None = None, system_operator: str | None = None, test_case: str | None = None, + serial_number: str | None = None, + part_number: str | None = None, log_file: str | Path | bool | None = None, include_git_metadata: bool = False, replay_log_file: bool = True, + metadata: dict[str, str | float | bool] | None = None, ): """Initialize a new report context. @@ -181,10 +184,15 @@ def __init__( test_system_name: The name of the test system. Will default to the hostname if not provided. system_operator: The operator of the test system. Will default to the current user if not provided. test_case: The name of the test case. Will default to the basename of the file containing the test if not provided. + serial_number: Optional serial_number stored on the report. Unset when None. + part_number: Optional part_number stored on the report. Unset when None. log_file: If True, create a temp log file. If a path, use that path. If False/None, no log file is written and create/update calls the API. include_git_metadata: If True, include git metadata in the report. + metadata: Structured key/value metadata to attach to the report. Merged + on top of git metadata when ``include_git_metadata`` is True, so + explicit keys win on collision. replay_log_file: When True (the default) and ``log_file`` is set, spawn ``import-test-result-log --incremental`` to push log entries to Sift in the background during the session. When @@ -216,6 +224,10 @@ def __init__( test_case = test_case if test_case else os.path.basename(__file__) test_system_name = test_system_name if test_system_name else socket.gethostname() system_operator = system_operator if system_operator else getpass.getuser() + combined_metadata = { + **(_git_metadata() or {} if include_git_metadata else {}), + **(metadata or {}), + } create = TestReportCreate( name=name, test_system_name=test_system_name, @@ -224,7 +236,9 @@ def __init__( end_time=datetime.now(timezone.utc), status=TestStatus.IN_PROGRESS, system_operator=system_operator, - metadata=_git_metadata() if include_git_metadata else None, # type: ignore + serial_number=serial_number, + part_number=part_number, + metadata=combined_metadata or None, # type: ignore ) self.report = client.test_results.create(create, log_file=self.log_file) diff --git a/python/pyproject.toml b/python/pyproject.toml index 2846fedba..fdc16f7c0 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "googleapis-common-protos>=1.60", "protoc-gen-openapiv2>=0.0.1", "filelock~=3.13", + 'tomli~=2.0; python_version < "3.11"', ] [project.urls] diff --git a/python/uv.lock b/python/uv.lock index b8c439b1a..91eaf3c61 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -4348,6 +4348,7 @@ dependencies = [ { name = "requests", version = "2.32.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, { name = "requests", version = "2.34.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "requests-toolbelt" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "types-protobuf", version = "5.29.1.20241207", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, { name = "types-protobuf", version = "6.32.1.20251210", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, { name = "types-protobuf", version = "7.34.1.20260518", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, @@ -4645,6 +4646,7 @@ requires-dist = [ { name = "sift-stream-bindings", marker = "extra == 'docs-build'", specifier = "==0.3.0" }, { name = "sift-stream-bindings", marker = "extra == 'sift-stream'", specifier = "==0.3.0" }, { name = "sift-stream-bindings", marker = "extra == 'sift-stream-bindings'", specifier = "==0.3.0" }, + { name = "tomli", marker = "python_full_version < '3.11'", specifier = "~=2.0" }, { name = "tomlkit", marker = "extra == 'dev'", specifier = "~=0.13.3" }, { name = "tomlkit", marker = "extra == 'dev-all'", specifier = "~=0.13.3" }, { name = "tomlkit", marker = "extra == 'development'", specifier = "~=0.13.3" }, From 5504ed7b914fa4d571086499898e63d76efed210 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 2 Jun 2026 17:47:13 -0700 Subject: [PATCH 15/19] Python(fix): Add unit tests and fix incremental upload bug (#611) --- .../low_level_wrappers/_test_results_log.py | 8 +- .../low_level_wrappers/test_results.py | 14 +- .../test_incremental_replay.py | 143 ++++++++++++++++++ .../_tests/resources/test_test_results.py | 69 +++++++++ .../_tests/util/test_report_context.py | 6 +- .../lib/sift_client/resources/test_results.py | 3 +- .../scripts/import_test_result_log.py | 3 +- .../util/test_results/context_manager.py | 2 +- 8 files changed, 238 insertions(+), 10 deletions(-) create mode 100644 python/lib/sift_client/_tests/_internal/low_level_wrappers/test_incremental_replay.py diff --git a/python/lib/sift_client/_internal/low_level_wrappers/_test_results_log.py b/python/lib/sift_client/_internal/low_level_wrappers/_test_results_log.py index 24e0534d7..383f2d5a3 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/_test_results_log.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/_test_results_log.py @@ -143,9 +143,13 @@ class _ReplayState: @dataclass class ReplayResult: - """Result of replaying a log file.""" + """Result of replaying a log file. - report: TestReport + ``report`` is None on an incremental resume tick that uploaded only steps or + measurements; the report itself was created on an earlier tick. + """ + + report: TestReport | None = None steps: list[TestStep] = field(default_factory=list) measurements: list[TestMeasurement] = field(default_factory=list) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py index ff0c2b515..184833e50 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py @@ -1072,13 +1072,17 @@ async def _replay_update_report( id_map: dict[str, str], state: _ReplayState, ) -> None: - if state.report is None: - raise ValueError("UpdateTestReport found before CreateTestReport") request = UpdateTestReportRequest() json_format.Parse(json_str, request) request.test_report.test_report_id = self._map_id( id_map, request.test_report.test_report_id ) + # Batch/simulate replays the whole log in order, so a missing report means + # the log is malformed. Incremental replay may have created the report on an + # earlier tick (its real ID lives in id_map), so state.report is legitimately + # None here -- the mapped ID is enough to issue the update. + if simulate and state.report is None: + raise ValueError("UpdateTestReport found before CreateTestReport") state.report = await self.update_test_report( request=request, simulate=simulate, existing=state.report ) @@ -1203,6 +1207,7 @@ async def _incremental_import_log_file(self, log_path: Path) -> ReplayResult: next tick. """ tracking = LogTracking.load(log_path) + resuming = tracking.last_uploaded_line > 0 id_map = tracking.id_map state = _ReplayState() @@ -1221,7 +1226,10 @@ async def _incremental_import_log_file(self, log_path: Path) -> ReplayResult: tracking.last_uploaded_line += 1 tracking.save(log_path) - if state.report is None: + # On a resume tick the CreateTestReport line was consumed on an earlier + # tick, so state.report is expected to be None; the report already exists + # on the server. Only a genuine first pass over an empty log is an error. + if state.report is None and not resuming: raise ValueError("No CreateTestReport found in log file") return ReplayResult( diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_incremental_replay.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_incremental_replay.py new file mode 100644 index 000000000..ab95ddea8 --- /dev/null +++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_incremental_replay.py @@ -0,0 +1,143 @@ +"""Unit tests for incremental log-replay resume, with no live backend. + +These pin the resume-tick behavior of +``TestResultsLowLevelClient.import_log_file(incremental=True)``: the +CreateTestReport line is uploaded on an earlier tick, so a resuming tick rebuilds +replay state from scratch and must apply the remaining lines without an +in-memory report. The real gRPC create/update calls are stubbed, so these run +offline -- unlike the end-to-end resume test, which needs the integration server. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from sift_client._internal.low_level_wrappers._test_results_log import LogTracking +from sift_client._internal.low_level_wrappers.test_results import ( + # Aliased so pytest doesn't try to collect the `Test`-prefixed client as a suite. + TestResultsLowLevelClient as ResultsLowLevelClient, +) +from sift_client.sift_types.test_report import ( + TestReport, + TestReportCreate, + TestReportUpdate, + TestStatus, + TestStep, + TestStepCreate, + TestStepType, +) + +T0 = datetime(2026, 1, 1, tzinfo=timezone.utc) + + +def _make_report(id_: str) -> TestReport: + return TestReport( + id_=id_, + status=TestStatus.FAILED, + name="n", + test_system_name="s", + test_case="c", + start_time=T0, + end_time=T0, + metadata={}, + is_archived=False, + ) + + +def _make_step(id_: str) -> TestStep: + return TestStep( + id_=id_, + test_report_id="real-report", + name="step", + step_type=TestStepType.ACTION, + step_path="1", + status=TestStatus.PASSED, + start_time=T0, + end_time=T0, + ) + + +def _report_create() -> TestReportCreate: + return TestReportCreate( + status=TestStatus.IN_PROGRESS, + name="n", + test_system_name="s", + test_case="c", + start_time=T0, + end_time=T0, + ) + + +@pytest.mark.asyncio +async def test_resume_applies_trailing_report_update(tmp_path): + """Resume whose remaining chunk is the final UpdateTestReport must apply it. + + Pre-fix this raised "UpdateTestReport found before CreateTestReport"; the + status update then never landed and the report stayed IN_PROGRESS. + """ + log_file = tmp_path / "resume_report_update.jsonl" + client = ResultsLowLevelClient(grpc_client=MagicMock()) + + # Build the log offline via the simulate path: CreateTestReport + UpdateTestReport. + report = await client.create_test_report(test_report=_report_create(), log_file=log_file) + update = TestReportUpdate(status=TestStatus.FAILED) + update.resource_id = report.id_ + await client.update_test_report(update=update, log_file=log_file) + + # An earlier tick already uploaded the CreateTestReport (line 1); the report + # exists on the server under its real ID. + LogTracking(last_uploaded_line=1, id_map={report.id_: "real-report"}).save(log_file) + + # Stub the real RPC the resumed tick will issue. + client.update_test_report = AsyncMock(return_value=_make_report("real-report")) + + result = await client.import_log_file(log_file, incremental=True) + + client.update_test_report.assert_awaited_once() + sent = client.update_test_report.await_args.kwargs["request"] + assert sent.test_report.test_report_id == "real-report" + assert sent.test_report.status == TestStatus.FAILED.value + assert result.report is not None + assert result.report.id_ == "real-report" + + +@pytest.mark.asyncio +async def test_resume_with_only_steps_does_not_require_report(tmp_path): + """A resume tick carrying only steps must not demand an in-memory report. + + Pre-fix this raised "No CreateTestReport found in log file" (the field-report + trace), aborting replay of the remaining step lines. + """ + log_file = tmp_path / "resume_steps_only.jsonl" + client = ResultsLowLevelClient(grpc_client=MagicMock()) + + report = await client.create_test_report(test_report=_report_create(), log_file=log_file) + await client.create_test_step( + test_step=TestStepCreate( + test_report_id=report.id_, + name="s1", + step_type=TestStepType.ACTION, + step_path="1", + status=TestStatus.PASSED, + start_time=T0, + end_time=T0, + ), + log_file=log_file, + ) + + LogTracking(last_uploaded_line=1, id_map={report.id_: "real-report"}).save(log_file) + + client.create_test_step = AsyncMock(return_value=_make_step("real-step")) + + result = await client.import_log_file(log_file, incremental=True) + + client.create_test_step.assert_awaited_once() + sent = client.create_test_step.await_args.kwargs["request"] + # The step's report ID was remapped from the simulated ID to the real one. + assert sent.test_step.test_report_id == "real-report" + # The report was created on the earlier tick, so this resume tick has no report. + assert result.report is None + assert len(result.steps) == 1 diff --git a/python/lib/sift_client/_tests/resources/test_test_results.py b/python/lib/sift_client/_tests/resources/test_test_results.py index d0ccf4d1b..ce6d7707a 100644 --- a/python/lib/sift_client/_tests/resources/test_test_results.py +++ b/python/lib/sift_client/_tests/resources/test_test_results.py @@ -715,6 +715,75 @@ def test_import_log_file_round_trip(self, sift_client, nostromo_run, tmp_path): replayed_m = replayed_measurements_by_name[direct_m.name] compare_test_measurement_fields(replayed_m, direct_m) + def test_incremental_import_resumes_after_report_created( + self, sift_client, nostromo_run, tmp_path + ): + """Incremental replay must survive a resume after the report was created. + + Regression: a resume tick rebuilds replay state from scratch, so the + CreateTestReport line (already uploaded on an earlier tick) is skipped and + the in-memory report is None. The replay must still apply the remaining + lines -- including the final UpdateTestReport -- rather than raising + "No CreateTestReport found" and leaving the report stuck IN_PROGRESS. + """ + t0 = datetime.now(timezone.utc) + log_file = tmp_path / "incremental_resume.jsonl" + + # Build a complete simulation log (no real resources created yet). + report = sift_client.test_results.create( + { + "status": TestStatus.IN_PROGRESS, + "name": "Incremental Resume Report", + "test_system_name": "IR System", + "test_case": "IR Case", + "start_time": t0, + "end_time": t0 + timedelta(seconds=30), + "run_id": nostromo_run.id_, + }, + log_file=log_file, + ) + step = sift_client.test_results.create_step( + TestStepCreate( + test_report_id=report.id_, + name="IR Step 1", + step_type=TestStepType.ACTION, + step_path="1", + status=TestStatus.IN_PROGRESS, + start_time=t0, + end_time=t0 + timedelta(seconds=10), + ), + log_file=log_file, + ) + sift_client.test_results.update_step( + step, + {"status": TestStatus.FAILED}, + log_file=log_file, + ) + sift_client.test_results.update( + test_report=report, + update=TestReportUpdate(status=TestStatus.FAILED), + log_file=log_file, + ) + + all_lines = log_file.read_text().splitlines() + assert all_lines[0].startswith("[CreateTestReport:") + + # First tick: only the CreateTestReport is present. This creates the real + # report and advances the tracking cursor past line 1. + log_file.write_text(all_lines[0] + "\n") + first = sift_client.test_results.import_log_file(log_file, incremental=True) + real_report_id = first.report.id_ + assert real_report_id is not None + + # Later tick: the rest of the log is now available. Resuming past the + # CreateTestReport line must not raise, and the final UpdateTestReport must + # land so the report ends FAILED rather than IN_PROGRESS. + log_file.write_text("\n".join(all_lines) + "\n") + sift_client.test_results.import_log_file(log_file, incremental=True) + + refetched = sift_client.test_results.get(test_report_id=real_report_id) + assert refetched.status == TestStatus.FAILED + @pytest.mark.asyncio async def test_malformed_log_line_skipped(self, tmp_path): """Malformed lines raise a ValueError during iteration.""" diff --git a/python/lib/sift_client/_tests/util/test_report_context.py b/python/lib/sift_client/_tests/util/test_report_context.py index e92e57bb8..73d738a7d 100644 --- a/python/lib/sift_client/_tests/util/test_report_context.py +++ b/python/lib/sift_client/_tests/util/test_report_context.py @@ -76,7 +76,9 @@ def test_worker_timeout_kills_and_warns() -> None: assert rc._import_proc.poll() is not None messages = "\n".join(str(w.message) for w in recorded) assert "did not exit in 0.2s" in messages - assert "import-test-result-log" in messages + # Recovery must resume from the tracking cursor, not batch-replay (which would + # duplicate already-uploaded entries), so the hint carries --incremental. + assert "import-test-result-log --incremental" in messages def test_worker_nonzero_exit_warns_stderr_no_raise() -> None: @@ -96,4 +98,4 @@ def test_worker_nonzero_exit_warns_stderr_no_raise() -> None: messages = "\n".join(str(w.message) for w in recorded) assert "exited with code 2" in messages assert "rpc deadline exceeded" in messages - assert "import-test-result-log" in messages + assert "import-test-result-log --incremental" in messages diff --git a/python/lib/sift_client/resources/test_results.py b/python/lib/sift_client/resources/test_results.py index 9e88b6081..10ef70920 100644 --- a/python/lib/sift_client/resources/test_results.py +++ b/python/lib/sift_client/resources/test_results.py @@ -671,7 +671,8 @@ async def import_log_file( A ReplayResult containing the created report, steps, and measurements. """ result = await self._low_level_client.import_log_file(log_file, incremental=incremental) - result.report = self._apply_client_to_instance(result.report) + if result.report is not None: + result.report = self._apply_client_to_instance(result.report) result.steps = self._apply_client_to_instances(result.steps) result.measurements = self._apply_client_to_instances(result.measurements) return result diff --git a/python/lib/sift_client/scripts/import_test_result_log.py b/python/lib/sift_client/scripts/import_test_result_log.py index 7e14e4d59..3f66af1da 100644 --- a/python/lib/sift_client/scripts/import_test_result_log.py +++ b/python/lib/sift_client/scripts/import_test_result_log.py @@ -20,7 +20,8 @@ def _print_result(result: ReplayResult) -> None: - print(f"Report: {result.report.name} (id={result.report.id_})") + if result.report is not None: + print(f"Report: {result.report.name} (id={result.report.id_})") print(f"Steps: {len(result.steps)}") for step in result.steps: print(f" - {step.step_path} [{step.status}]") diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 4b2e2ab9d..497404c45 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -81,7 +81,7 @@ def log_replay_instructions(log_file: str | Path | None) -> None: return warnings.warn( f"Sift log file was not fully replayed: {log_file}. " - f"Re-run with `import-test-result-log {log_file}` to complete the upload.", + f"Re-run with `import-test-result-log --incremental {log_file}` to complete the upload.", SiftWarning, stacklevel=2, ) From a3a8f47b32cc79098f8d37d78d4a874611d0f43c Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 2 Jun 2026 17:51:31 -0700 Subject: [PATCH 16/19] version bump --- python/CHANGELOG.md | 5 +++++ python/pyproject.toml | 2 +- python/uv.lock | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 4905ae0d7..2ae4b6a88 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -29,6 +29,11 @@ See the [Pytest Plugin guide](https://github.com/sift-stack/sift/blob/main/pytho - [Report assertion message as error info](https://github.com/sift-stack/sift/pull/587) - [Pytest docs reorganization](https://github.com/sift-stack/sift/pull/589) - [Configurable report name template and preserved pytest command](https://github.com/sift-stack/sift/pull/591) +- [Use in-process transport to improve test performance](https://github.com/sift-stack/sift/pull/590) +- [End-of-run report summary panel and session header](https://github.com/sift-stack/sift/pull/594) +- [Exit instead of raise on connection failure](https://github.com/sift-stack/sift/pull/606) +- [Flexible report naming and consolidated settings registry](https://github.com/sift-stack/sift/pull/602) +- [Fix incremental upload resume bug](https://github.com/sift-stack/sift/pull/611) ## [v0.16.2] - May 21, 2026 diff --git a/python/pyproject.toml b/python/pyproject.toml index fdc16f7c0..b04bce6d3 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sift_stack_py" -version = "0.17.0.dev1" +version = "0.17.0.dev2" description = "Python client library for the Sift API" requires-python = ">=3.8" readme = { file = "README.md", content-type = "text/markdown" } diff --git a/python/uv.lock b/python/uv.lock index 91eaf3c61..d6391b311 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -4315,7 +4315,7 @@ wheels = [ [[package]] name = "sift-stack-py" -version = "0.17.0.dev1" +version = "0.17.0.dev2" source = { editable = "." } dependencies = [ { name = "alive-progress", version = "3.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, From 223f81e752161e68c2af9007014c6e053615d939 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Wed, 3 Jun 2026 15:09:24 -0700 Subject: [PATCH 17/19] Python(chore): Reorganize pytest code (#610) --- .../guides/pytest_plugin/configuration.md | 2 +- .../_internal/pytest_plugin/__init__.py | 0 .../_internal/pytest_plugin/modes.py | 68 + .../_internal/pytest_plugin/options.py | 579 +++++ .../_internal/pytest_plugin/report.py | 506 ++++ .../_internal/pytest_plugin/steps.py | 310 +++ .../_internal/pytest_plugin/terminal.py | 231 ++ .../pytest_plugin/test_configuration.py | 45 +- .../_tests/pytest_plugin/test_hierarchy.py | 16 +- .../pytest_plugin/test_settings_reference.py | 8 +- .../pytest_plugin/test_terminal_output.py | 28 +- .../pytest_plugin/test_typo_detector.py | 2 +- python/lib/sift_client/pytest_plugin.py | 2074 +++-------------- 13 files changed, 2067 insertions(+), 1802 deletions(-) create mode 100644 python/lib/sift_client/_internal/pytest_plugin/__init__.py create mode 100644 python/lib/sift_client/_internal/pytest_plugin/modes.py create mode 100644 python/lib/sift_client/_internal/pytest_plugin/options.py create mode 100644 python/lib/sift_client/_internal/pytest_plugin/report.py create mode 100644 python/lib/sift_client/_internal/pytest_plugin/steps.py create mode 100644 python/lib/sift_client/_internal/pytest_plugin/terminal.py diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md index 7c7114543..a05897cd4 100644 --- a/python/docs/guides/pytest_plugin/configuration.md +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -141,7 +141,7 @@ The plugin scans `SIFT_*` env vars and `[tool.sift.pytest.*]` keys at session start; anything outside these tables fires a warning with a closest-match suggestion, so typos like `SIFT_REPORT_SERIALNUM` surface immediately. - + ### Pytest behavior | Setting | CLI flag | Ini (`[tool.pytest.ini_options]`) | diff --git a/python/lib/sift_client/_internal/pytest_plugin/__init__.py b/python/lib/sift_client/_internal/pytest_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/lib/sift_client/_internal/pytest_plugin/modes.py b/python/lib/sift_client/_internal/pytest_plugin/modes.py new file mode 100644 index 000000000..317bcfa96 --- /dev/null +++ b/python/lib/sift_client/_internal/pytest_plugin/modes.py @@ -0,0 +1,68 @@ +"""Run-mode detection and the per-test Sift gate. + +Resolves the active mode (disabled > offline > online) from the ``DISABLED_OPTION`` / +``OFFLINE_OPTION`` options, and decides whether the Sift autouse fixtures activate for +a given node via the ``sift_include`` / ``sift_exclude`` markers. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from sift_client._internal.pytest_plugin.options import ( + AUTOUSE_OPTION, + DISABLED_OPTION, + OFFLINE_OPTION, +) + +if TYPE_CHECKING: + import pytest + + +def is_offline(pytestconfig: pytest.Config | None) -> bool: + return bool(OFFLINE_OPTION.resolve(pytestconfig)) + + +def is_disabled(pytestconfig: pytest.Config | None) -> bool: + return bool(DISABLED_OPTION.resolve(pytestconfig)) + + +def sdk_version() -> str: + """Return the installed ``sift_stack_py`` version, or ``"unknown"``.""" + from importlib.metadata import PackageNotFoundError, version + + try: + return version("sift_stack_py") + except PackageNotFoundError: + return "unknown" + + +def mode_label(config: pytest.Config) -> str: + """Resolve the active mode for the terminal header: disabled > offline > online.""" + if is_disabled(config): + return "disabled" + if is_offline(config): + return "offline" + return "online" + + +def sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool: + """Resolve the Sift gate for a node: sift_exclude > sift_include > default. + + `get_closest_marker` walks the node hierarchy upward, so markers applied + at any level (function, class, module, package, session) are honored. + """ + if node.get_closest_marker("sift_exclude"): + return False + if node.get_closest_marker("sift_include"): + return True + return default + + +def gate_enabled(node: pytest.Item | pytest.Collector, config: pytest.Config) -> bool: + """Whether the Sift autouse fixtures should activate for ``node``. + + Combines the ``sift_autouse`` ini default with the per-test marker gate, so + the ``step`` and parent-step fixtures share one entry point. + """ + return sift_enabled_for(node, bool(AUTOUSE_OPTION.resolve(config))) diff --git a/python/lib/sift_client/_internal/pytest_plugin/options.py b/python/lib/sift_client/_internal/pytest_plugin/options.py new file mode 100644 index 000000000..c3b6801a1 --- /dev/null +++ b/python/lib/sift_client/_internal/pytest_plugin/options.py @@ -0,0 +1,579 @@ +"""Declarative settings registry for the Sift pytest plugin. + +Every plugin setting is declared once as an :class:`Option` in the ``PLUGIN_OPTIONS`` +registry. That single registry drives ``pytest_addoption``, value resolution, +the docs settings-reference table, and the unknown-key typo detector, so a +setting is added or changed in one place instead of wired up across several. +""" + +from __future__ import annotations + +import os +import warnings +from dataclasses import dataclass +from typing import Any + +import pytest + +from sift_client._internal.pyproject_config import load_tool_sift + +# Settings-reference categories. Each maps to a docs subsection and, in the +# renderer, to the column subset that category actually uses. +CAT_BEHAVIOR = "Pytest behavior" +CAT_CONNECTION = "Connection" +CAT_REPORT = "Report content" +CATEGORIES = (CAT_BEHAVIOR, CAT_CONNECTION, CAT_REPORT) + +tool_sift_key = pytest.StashKey[dict]() + + +def tool_sift(config: pytest.Config | None) -> dict[str, Any]: + """Session-cached ``[tool.sift]`` table. + + Every option that reads TOML, plus the typo detector, would otherwise + re-parse pyproject.toml on the session-start path, and re-emit the + malformed-file warning each time. Parse once per session via the config + stash; ``load_tool_sift`` stays the uncached parser for direct callers. + """ + if config is None: + return {} + cached = config.stash.get(tool_sift_key, None) + if cached is None: + cached = load_tool_sift(config) + config.stash[tool_sift_key] = cached + return cached + + +@dataclass(frozen=True) +class Option: + """A single setting plus the logic to resolve it from wherever it can be set. + + A setting may come from an env var, a CLI flag, a pytest ini key, or a + ``[tool.sift...]`` TOML path. :meth:`resolve` walks the declared surfaces in + env > cli > ini > toml order; ``metadata`` (``merge=True``) is the one + free-form table, resolved by :meth:`resolve_merged`. The single ``PLUGIN_OPTIONS`` + registry of these drives ``pytest_addoption``, the resolvers, the docs + settings-reference table, and the typo detector. + + Declare only the surface fields a setting uses: + + - ``cli`` / ``cli_action``: CLI flag and argparse action (``cli_dest`` derived). + - ``ini`` / ``ini_type`` / ``ini_default``: pytest ini key + type/default. + - ``toml``: tuple path under ``[tool.sift...]``, e.g. + ``("pytest", "report", "name")`` -> ``tool.sift.pytest.report.name``. + - ``env``: full env var name, e.g. ``"SIFT_API_KEY"``. + + ``category`` groups the option in the docs reference (one of ``CATEGORIES``). + """ + + name: str + help: str + category: str + cli: str | None = None + cli_action: str | None = None + ini: str | None = None + ini_type: str | None = None + ini_default: Any = None + toml: tuple[str, ...] | None = None + env: str | None = None + merge: bool = False + + @property + def cli_dest(self) -> str: + """Argparse ``dest`` for the option. + + When the option has both a CLI flag and an ini key, the dest matches + the ini name so ``config.getoption(ini_name)`` returns the CLI value + (and falls through to ``config.getini(ini_name)`` when the flag wasn't + passed). Without an ini key, the dest derives from the flag name. + """ + if self.ini: + return self.ini + if self.cli is None: + return self.name + return self.cli.lstrip("-").replace("-", "_") + + def __post_init__(self) -> None: + if self.cli_action and not self.cli: + raise ValueError(f"Option({self.name!r}): cli_action requires cli") + if self.ini_type and not self.ini: + raise ValueError(f"Option({self.name!r}): ini_type requires ini") + if self.merge and not self.toml: + raise ValueError(f"Option({self.name!r}): merge=True needs toml") + if not any([self.cli, self.ini, self.toml, self.env]): + raise ValueError(f"Option({self.name!r}): declares no surfaces") + if self.category not in CATEGORIES: + raise ValueError(f"Option({self.name!r}): category must be one of {CATEGORIES}") + + def resolve(self, config: pytest.Config | None) -> Any: + """First set value from declared surfaces; ``None`` when unset everywhere. + + Walk order is env > cli > ini > toml. No current option declares both + env and cli, so the chain isn't ambiguous in practice. + ``getini`` returns the typed default for unset bool/list keys, so this + only returns ini values for booleans (always meaningful), non-empty + strings, and non-empty lists. + """ + if self.env: + env_value = os.getenv(self.env) + if env_value not in (None, ""): + return env_value + if config is None: + return None + if self.cli: + cli_value = config.getoption(self.cli_dest, default=None) + if cli_value is not None: + return cli_value + if self.ini: + try: + ini_value = config.getini(self.ini) + except (KeyError, ValueError): + ini_value = None + if isinstance(ini_value, bool): + return ini_value + if isinstance(ini_value, str) and ini_value: + return ini_value + if isinstance(ini_value, list) and ini_value: + return ini_value + if self.toml: + toml_value = _walk_toml(tool_sift(config), self.toml) + if toml_value not in (None, ""): + return toml_value + return None + + def resolve_merged(self, config: pytest.Config | None) -> dict[str, str | float | bool]: + """For ``merge=True`` dict-shape settings: the free-form TOML table. + + TOML values that don't fit ``dict[str, str | float | bool]`` (nested + tables, lists, ``None``) are dropped with a warning so a malformed + entry can't crash report creation. + """ + from sift_client.pytest_plugin import SiftPytestPluginWarning + + result: dict[str, str | float | bool] = {} + if config is not None and self.toml: + base = _walk_toml(tool_sift(config), self.toml) + if isinstance(base, dict): + for key, value in base.items(): + if not isinstance(key, str): + continue + if isinstance(value, (bool, str, int, float)): + # ``bool`` first since ``isinstance(True, int)`` is True. + result[key] = value # type: ignore[assignment] + continue + warnings.warn( + f"[tool.sift.{'.'.join(self.toml)}] entry {key!r} ignored: " + f"unsupported type {type(value).__name__}.", + SiftPytestPluginWarning, + stacklevel=2, + ) + return result + + +def _walk_toml(data: dict[str, Any], path: tuple[str, ...]) -> Any: + """Walk a parsed TOML tree along ``path``; return None on any missing key.""" + cur: Any = data + for key in path: + if not isinstance(cur, dict): + return None + cur = cur.get(key) + if cur is None: + return None + return cur + + +# --------------------------------------------------------------------------- +# Settings registry. +# +# Add new options here. The registry drives `pytest_addoption`, resolution, +# the docs settings-reference table, and the unknown-key typo detector, so a +# setting is declared once instead of wired up in several places. +# +# Where each setting lives follows a few principles: +# - Secrets (the API key) come from environment variables only, never a +# committed file. +# - Pytest behavior lives in [tool.pytest.ini_options] so it integrates with +# `pytest --help` / `--co` / `--trace-config`. +# - Sift report content lives in [tool.sift.pytest.report.*]. +# - Non-secret endpoints take an env var plus one static home (ini or toml, +# not both). +# - A CLI flag is added only when there is a real per-run override workflow; +# stable project config stays in ini/toml. +# - Dynamic per-run values are injected via environment variables (pytest-dotenv +# loads .env for local dev; CI sets the same names from its secret store). +# --------------------------------------------------------------------------- + +# Pytest behavior. The CLI flag survives because the per-run override is real. +LOG_FILE_OPTION = Option( + name="log_file", + category=CAT_BEHAVIOR, + help="Path to the JSONL log of create/update calls (path | true | false | none).", + cli="--sift-log-file", + ini="sift_log_file", +) +GIT_METADATA_OPTION = Option( + name="git_metadata", + category=CAT_BEHAVIOR, + help="Capture git repo/branch/commit on the report.", + cli="--no-sift-git-metadata", + cli_action="store_false", + ini="sift_git_metadata", + ini_type="bool", + ini_default=True, +) +OFFLINE_OPTION = Option( + name="offline", + category=CAT_BEHAVIOR, + help="Skip the session-start ping; route create/update through the JSONL log.", + cli="--sift-offline", + cli_action="store_true", + ini="sift_offline", + ini_type="bool", + ini_default=False, +) +DISABLED_OPTION = Option( + name="disabled", + category=CAT_BEHAVIOR, + help="Disable Sift entirely (no API calls, no log file). Supersedes --sift-offline.", + cli="--sift-disabled", + cli_action="store_true", + ini="sift_disabled", + ini_type="bool", + ini_default=False, +) + +OPEN_OPTION = Option( + name="open_report", + category=CAT_BEHAVIOR, + help="Open the resulting report in a browser at session end (online only; " + "no-op when the report URL can't be resolved).", + cli="--sift-open-report", + cli_action="store_true", + ini="sift_open_report", + ini_type="bool", + ini_default=False, +) + +# Pytest behavior: set-once project defaults (no CLI flag, no per-run override). +AUTOUSE_OPTION = Option( + name="autouse", + category=CAT_BEHAVIOR, + help="Default for the Sift autouse fixtures (report_context, step, hierarchy/parametrize parents).", + ini="sift_autouse", + ini_type="bool", + ini_default=True, +) +PACKAGE_STEP_OPTION = Option( + name="package_step", + category=CAT_BEHAVIOR, + help="Open a parent step for each Python package in the test path.", + ini="sift_package_step", + ini_type="bool", + ini_default=True, +) +MODULE_STEP_OPTION = Option( + name="module_step", + category=CAT_BEHAVIOR, + help="Open a parent step for each test module.", + ini="sift_module_step", + ini_type="bool", + ini_default=True, +) +CLASS_STEP_OPTION = Option( + name="class_step", + category=CAT_BEHAVIOR, + help="Open per-class parent steps, including nested classes.", + ini="sift_class_step", + ini_type="bool", + ini_default=True, +) +PARAMETRIZE_NESTING_OPTION = Option( + name="parametrize_nesting", + category=CAT_BEHAVIOR, + help="Cluster parametrized tests under shared parent steps (e.g. test_a -> v=1, v=2).", + ini="sift_parametrize_nesting", + ini_type="bool", + ini_default=True, +) + +# Credentials. The API key is env-only; the URIs accept env + ini. +API_KEY_OPTION = Option( + name="api_key", + category=CAT_CONNECTION, + help="Sift API key (secret, env-only).", + env="SIFT_API_KEY", +) +GRPC_URI_OPTION = Option( + name="grpc_uri", + category=CAT_CONNECTION, + help="Sift gRPC endpoint URI.", + env="SIFT_GRPC_URI", + ini="sift_grpc_uri", +) +REST_URI_OPTION = Option( + name="rest_uri", + category=CAT_CONNECTION, + help="Sift REST endpoint URI.", + env="SIFT_REST_URI", + ini="sift_rest_uri", +) +APP_URL_OPTION = Option( + name="app_url", + category=CAT_CONNECTION, + help="Sift web-app origin for the report link in the terminal footer (e.g. " + "https://app.siftstack.com). When unset, the link is derived from the REST URI " + "for known Sift hosts.", + env="SIFT_APP_URL", + ini="sift_app_url", +) + +# Report content. Project defaults in [tool.sift.pytest.report]; CI injects +# per-run values via SIFT_REPORT_* env vars (pytest-dotenv handles .env files +# for local dev). +REPORT_NAME_OPTION = Option( + name="report_name", + category=CAT_REPORT, + help="Template for the report display name. Placeholders: {target}, {command}, {args}, " + "{rootdir}, {timestamp}, {count}, {git_repo}, {git_branch}, {git_commit}.", + toml=("pytest", "report", "name"), +) +TEST_CASE_OPTION = Option( + name="test_case", + category=CAT_REPORT, + help="Template for the report's test_case field (same placeholders as report_name).", + toml=("pytest", "report", "test_case"), +) +TEST_SYSTEM_NAME_OPTION = Option( + name="test_system_name", + category=CAT_REPORT, + help="Name of the test system / rig. Defaults to the host's name.", + env="SIFT_REPORT_TEST_SYSTEM_NAME", + toml=("pytest", "report", "test_system_name"), +) +SYSTEM_OPERATOR_OPTION = Option( + name="system_operator", + category=CAT_REPORT, + help="Operator running the test. Defaults to the OS user.", + env="SIFT_REPORT_SYSTEM_OPERATOR", + toml=("pytest", "report", "system_operator"), +) +SERIAL_NUMBER_OPTION = Option( + name="serial_number", + category=CAT_REPORT, + help="Serial number of the unit under test.", + env="SIFT_REPORT_SERIAL_NUMBER", + toml=("pytest", "report", "serial_number"), +) +PART_NUMBER_OPTION = Option( + name="part_number", + category=CAT_REPORT, + help="Part number of the unit under test.", + env="SIFT_REPORT_PART_NUMBER", + toml=("pytest", "report", "part_number"), +) +METADATA_OPTION = Option( + name="metadata", + category=CAT_REPORT, + help="Free-form report metadata, as a TOML table of scalar values. For " + "dynamic per-run keys, attach them in conftest via the report_context fixture.", + toml=("pytest", "report", "metadata"), + merge=True, +) + +PLUGIN_OPTIONS: tuple[Option, ...] = ( + LOG_FILE_OPTION, + GIT_METADATA_OPTION, + OFFLINE_OPTION, + DISABLED_OPTION, + OPEN_OPTION, + AUTOUSE_OPTION, + PACKAGE_STEP_OPTION, + MODULE_STEP_OPTION, + CLASS_STEP_OPTION, + PARAMETRIZE_NESTING_OPTION, + API_KEY_OPTION, + GRPC_URI_OPTION, + REST_URI_OPTION, + APP_URL_OPTION, + REPORT_NAME_OPTION, + TEST_CASE_OPTION, + TEST_SYSTEM_NAME_OPTION, + SYSTEM_OPERATOR_OPTION, + SERIAL_NUMBER_OPTION, + PART_NUMBER_OPTION, + METADATA_OPTION, +) + + +def register_options(parser: pytest.Parser) -> None: + """Register every option's CLI flag and ini key on the pytest parser. + + One loop drives both surfaces, so adding a setting is one entry in + ``PLUGIN_OPTIONS``, not edits scattered across the ``pytest_addoption`` hook. + """ + group = parser.getgroup("sift", description="Sift test results") + for opt in PLUGIN_OPTIONS: + if opt.cli is not None: + cli_kwargs: dict[str, Any] = { + "dest": opt.cli_dest, + "default": None, + "help": opt.help, + } + if opt.cli_action is not None: + cli_kwargs["action"] = opt.cli_action + group.addoption(opt.cli, **cli_kwargs) + if opt.ini is not None: + ini_kwargs: dict[str, Any] = {"help": opt.help, "default": opt.ini_default} + if opt.ini_type is not None: + ini_kwargs["type"] = opt.ini_type + parser.addini(opt.ini, **ini_kwargs) + + +def render_settings_reference() -> str: + """Render the Markdown settings reference from ``PLUGIN_OPTIONS``. + + One ``### `` subsection per category, each table showing only the + columns that category uses (so no dead all-``—`` columns). The plugin docs + at ``docs/guides/pytest_plugin/configuration.md`` embed this output verbatim + so the registry and the docs can't drift; + ``test_settings_reference_docs_in_sync`` is the guard rail. Regenerate with:: + + uv run python -c "from sift_client._internal.pytest_plugin.options import render_settings_reference; print(render_settings_reference())" + """ + + def _cli_cell(opt: Option) -> str: + return f"`{opt.cli}`" if opt.cli else "—" + + def _ini_cell(opt: Option) -> str: + return f"`{opt.ini}`" if opt.ini else "—" + + def _toml_cell(opt: Option) -> str: + if not opt.toml: + return "—" + if opt.merge: + return f"`[tool.sift.{'.'.join(opt.toml)}]` (table)" + section = ".".join(opt.toml[:-1]) + return f"`[tool.sift.{section}] {opt.toml[-1]}`" + + def _env_cell(opt: Option) -> str: + if opt.env: + return f"`{opt.env}`" + return "—" + + # Per-category column layout: only the surfaces that category actually uses. + # Each column is (header, cell-renderer). + columns_by_category = { + CAT_BEHAVIOR: [ + ("CLI flag", _cli_cell), + ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), + ], + CAT_CONNECTION: [ + ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), + ("Env var", _env_cell), + ], + CAT_REPORT: [ + ("TOML (`[tool.sift...]`)", _toml_cell), + ("Env var", _env_cell), + ], + } + + def _escape(cell: str) -> str: + # Literal pipes inside a Markdown table cell need backslash escaping or + # they'd be parsed as column separators. + return cell.replace("|", "\\|") + + blocks: list[str] = [] + for category in CATEGORIES: + opts = [o for o in PLUGIN_OPTIONS if o.category == category] + if not opts: + continue + columns = columns_by_category[category] + headers = ["Setting", *(h for h, _ in columns)] + lines = [ + f"### {category}", + "", + "| " + " | ".join(headers) + " |", + "|" + "|".join(["---"] * len(headers)) + "|", + ] + for opt in opts: + cells = [opt.help, *(render(opt) for _, render in columns)] + lines.append("| " + " | ".join(_escape(c) for c in cells) + " |") + blocks.append("\n".join(lines)) + return "\n\n".join(blocks) + + +def warn_on_unknown_env_vars() -> None: + """Emit a warning for any ``SIFT_*`` env var not declared in the registry. + + The registry declares each env var by its full name (``opt.env``); a + ``SIFT_*`` var that matches none of them is almost always a typo. + """ + import difflib + + from sift_client.pytest_plugin import SiftPytestPluginWarning + + known_full = {opt.env for opt in PLUGIN_OPTIONS if opt.env} + suggestion_pool = sorted(known_full) + for name in sorted(os.environ): + if not name.startswith("SIFT_"): + continue + if name in known_full: + continue + close = difflib.get_close_matches(name, suggestion_pool, n=1, cutoff=0.6) + hint = f" (did you mean `{close[0]}`?)" if close else "" + warnings.warn( + f"Unknown SIFT_* env var `{name}`{hint}; ignored.", + SiftPytestPluginWarning, + stacklevel=2, + ) + + +def warn_on_unknown_toml_keys(config: pytest.Config) -> None: + """Walk ``[tool.sift.pytest.*]`` in pyproject.toml and warn on keys outside the registry. + + Only the ``tool.sift.pytest`` subtree is checked. Other ``tool.sift.*`` + subtrees are reserved for non-pytest Sift tooling (e.g. ``tool.sift.extras`` + is consumed by this repo's extras-generation script) and aren't our + concern. Free-form subtrees (``merge=True`` options like ``metadata``) + stop the walk; their keys are user-defined and not validated. + """ + import difflib + + from sift_client.pytest_plugin import SiftPytestPluginWarning + + data = tool_sift(config) + pytest_table = (data or {}).get("pytest") + if not isinstance(pytest_table, dict): + return + # Build leaf/free-form/prefix sets relative to the ``("pytest", ...)`` root + # the registry already uses, so the walk runs on the table we just sliced. + leaves = {opt.toml for opt in PLUGIN_OPTIONS if opt.toml and not opt.merge} + free_form = {opt.toml for opt in PLUGIN_OPTIONS if opt.toml and opt.merge} + prefixes: set[tuple[str, ...]] = set() + for full in leaves | free_form: + for i in range(len(full)): + prefixes.add(full[:i]) + + def _walk(node: Any, base: tuple[str, ...]) -> None: + if base in free_form or not isinstance(node, dict): + return + for key, value in node.items(): + path = (*base, str(key)) + if path in leaves or path in free_form: + continue + if path in prefixes: + _walk(value, path) + continue + full_name = "tool.sift." + ".".join(path) + same_depth = [ + ".".join(p) for p in (leaves | free_form | prefixes) if len(p) == len(path) + ] + close = difflib.get_close_matches(".".join(path), same_depth, n=1, cutoff=0.6) + hint = f" (did you mean `tool.sift.{close[0]}`?)" if close else "" + warnings.warn( + f"Unknown sift config key `{full_name}`{hint}; ignored.", + SiftPytestPluginWarning, + stacklevel=2, + ) + + _walk(pytest_table, ("pytest",)) diff --git a/python/lib/sift_client/_internal/pytest_plugin/report.py b/python/lib/sift_client/_internal/pytest_plugin/report.py new file mode 100644 index 000000000..5ce0590f1 --- /dev/null +++ b/python/lib/sift_client/_internal/pytest_plugin/report.py @@ -0,0 +1,506 @@ +"""Report construction, status resolution, and step creation. + +Builds the session ``ReportContext`` from resolved settings (name/test_case +templates, log-file mode, credentials for disabled mode), resolves a function +step's status from pytest's per-phase reports, and finalizes after teardown. +``report_context_impl`` is a pure generator that yields the context; the +plugin's ``report_context`` fixture owns the module-level ``REPORT_CONTEXT``. +""" + +from __future__ import annotations + +import os +import warnings +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any, Generator + +import pytest + +from sift_client import SiftClient, SiftConnectionConfig +from sift_client._internal.pytest_plugin.modes import is_offline +from sift_client._internal.pytest_plugin.options import ( + GIT_METADATA_OPTION, + LOG_FILE_OPTION, + METADATA_OPTION, + PARAMETRIZE_NESTING_OPTION, + PART_NUMBER_OPTION, + REPORT_NAME_OPTION, + SERIAL_NUMBER_OPTION, + SYSTEM_OPERATOR_OPTION, + TEST_CASE_OPTION, + TEST_SYSTEM_NAME_OPTION, +) +from sift_client._internal.pytest_plugin.steps import ( + drain_hierarchy_stack, + drain_parametrize_stack, + parametrize_path_key, +) +from sift_client.sift_types.test_report import ErrorInfo, TestStatus +from sift_client.util.test_results import ReportContext +from sift_client.util.test_results.context_manager import ( + _git_metadata, + format_assertion_message, + format_truncated_traceback, +) + +if TYPE_CHECKING: + from sift_client.util.test_results.context_manager import NewStep + + +def resolve_real_report_id(context: Any) -> str | None: + """Resolve the real server-side report id for the online footer link. + + In synchronous online mode (``--sift-log-file=false``) the report is created + directly against the API, so ``report.id_`` is already the real id. In the + default incremental mode the report is created through the simulate path + (a client-side UUID) and the background worker maps it to the real id on + replay, recording it in the ``.tracking`` sidecar's ``id_map``. By the + time this footer runs the session-scoped report context has torn down and + the worker has drained, so the sidecar is final. + + Returns ``None`` when the worker never mapped the report (e.g. it died before + replaying the create), meaning no real report exists to link. + """ + report = context.report + if not report.id_: + # No id was ever assigned (unset/empty); nothing to link. + return None + sim_id = str(report.id_) + if not getattr(report, "is_simulated", False): + return sim_id + log_file = getattr(context, "log_file", None) + if log_file is None: + return None + from sift_client._internal.low_level_wrappers._test_results_log import LogTracking + + return LogTracking.load(log_file).id_map.get(sim_id) + + +def resolve_report_link(context: Any, offline: bool) -> tuple[str | None, str | None]: + """Resolve ``(report_id, report_url)`` for the terminal footer. + + Offline runs never upload, so the id is ``None``. Online, the id comes from + ``resolve_real_report_id`` and the URL is built only when both the id and the + client's ``app_url`` are set. Truthiness, not ``is not None``: a + resolved-but-empty id (degenerate sidecar mapping, unset proto field) must + fall through to the "not uploaded" path, not produce a ``/test-results/`` link. + """ + report_id = None if offline else resolve_real_report_id(context) + report_url = ( + f"{context.client.app_url}/test-results/{report_id}" + if report_id and context.client.app_url + else None + ) + return report_id, report_url + + +def error_info_from_longrepr(longrepr: Any) -> ErrorInfo: + """Fall back to the report's longrepr when no Python exception is available.""" + return ErrorInfo(error_code=1, error_message=str(longrepr) if longrepr is not None else "") + + +def resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: + """Resolve the function step's status from pytest's per-phase reports. + + Reads ``_sift_phase_setup`` / ``_sift_phase_call`` and the test's xfail marker, + then mutates ``new_step.current_step`` in place and flips + ``new_step._sift_managed_externally`` so ``NewStep.__exit__`` emits the + resolved status without re-classifying. + + When the call phase reports ``passed`` and no override is needed (i.e. the + test's own status or substep failures should drive the result), this leaves + the step alone so the default ``__exit__`` resolution stays in charge. + """ + current_step = new_step.current_step + if current_step is None: + # The step never opened (the autouse fixture short-circuited or was + # disabled). Nothing to resolve. + return + setup_phase = getattr(item, "_sift_phase_setup", None) + call_phase = getattr(item, "_sift_phase_call", None) + xfail_marker = item.get_closest_marker("xfail") + xfail_runs = xfail_marker.kwargs.get("run", True) if xfail_marker is not None else True + + status: TestStatus | None = None + error_info: ErrorInfo | None = None + keep_managed = False + + if setup_phase is not None and setup_phase.report.outcome == "failed": + status = TestStatus.ERROR + excinfo = setup_phase.call.excinfo + if excinfo is not None: + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + else: + error_info = error_info_from_longrepr(setup_phase.report.longrepr) + elif setup_phase is not None and setup_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase is None: + # Setup completed but the call-phase report never fired; the inner + # pytester session was aborted (e.g. by KeyboardInterrupt) before the + # plugin could observe the outcome. Leave the step at IN_PROGRESS so + # the report does not lie about a clean pass. + keep_managed = True + else: + wasxfail = getattr(call_phase.report, "wasxfail", None) + if wasxfail is not None: + if call_phase.report.outcome == "failed": + # Strict xpass: pytest synthesizes a failure when an xfail(strict=True) + # test unexpectedly passes. The xfail mark no longer matches reality. + status = TestStatus.FAILED + elif call_phase.report.outcome == "skipped": + if xfail_marker is not None and xfail_runs is False: + # xfail(run=False): the test body never executed. + status = TestStatus.SKIPPED + else: + # xfail + expected failure: the test fulfilled its xfail expectation. + status = TestStatus.PASSED + else: + # Non-strict xpass: passes that weren't required to fail. + status = TestStatus.PASSED + elif call_phase.report.outcome == "passed": + # Default __exit__ resolves PASSED/FAILED from open_step_results and any + # status the test code may have set. Don't override it here. + return + elif call_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase.report.outcome == "failed": + excinfo = call_phase.call.excinfo + children_passed = new_step.report_context.open_step_results.get( + current_step.step_path, True + ) + if excinfo is None: + status = TestStatus.FAILED + elif isinstance(excinfo.value, AssertionError): + status = TestStatus.FAILED + error_info = format_assertion_message(excinfo.type, excinfo.value) + elif isinstance(excinfo.value, pytest.fail.Exception): + status = TestStatus.FAILED + elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): + # Hard exits the plugin can observe: pytest converted the + # raise into a call-phase report. The session-aborting variant + # (call_phase is None) lands earlier and stays IN_PROGRESS. + status = TestStatus.ABORTED + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif xfail_marker is not None: + # xfail(raises=X) with a non-matching exception: the contract failed. + status = TestStatus.FAILED + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif not children_passed: + # A substep already recorded the error and carries the traceback; + # the test step only inherits the child-failed signal. + status = TestStatus.FAILED + else: + status = TestStatus.ERROR + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + + if status is None and not keep_managed: + return + + if status is not None: + # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. + current_step.__dict__["status"] = status + if error_info is not None: + current_step.__dict__["error_info"] = error_info + new_step._sift_managed_externally = True + + +def finalize_after_teardown(item: pytest.Item, teardown_report: pytest.TestReport) -> None: + """Upgrade a closed step to FAILED when the teardown phase failed. + + The autouse step fixture has already exited by the time the teardown + makereport hook fires, so call ``step.update`` again to override the status + server-side and propagate the failure to the still-open parent step. + """ + step: NewStep | None = getattr(item, "_sift_step", None) + if step is None: + return + current_step = step.current_step + if current_step is None: + return + if teardown_report.outcome == "failed" and current_step.status == TestStatus.PASSED: + current_step.update({"status": TestStatus.FAILED}) + step.report_context.mark_step_failed_after_close(current_step) + + +def _relativize(path: Path, rootpath: Path) -> str: + """Path relative to rootdir, or the basename when it sits outside the tree.""" + try: + rel = str(path.relative_to(rootpath)) + except ValueError: + return path.name + return "" if rel == "." else rel + + +def _strip_param(nodeid: str) -> str: + """Drop the trailing ``[param]`` from a nodeid, keeping ``file::Class::func``. + + The parametrize id is a variation of the test, not its identity; leaving it + in would make a re-parametrization silently shift the grouping key. Splits on + the last ``::`` segment and cuts at its first ``[``; class/function names + never contain ``[``, so nested brackets in a param value can't confuse it. + """ + head, sep, leaf = nodeid.rpartition("::") + leaf = leaf.split("[", 1)[0] + return f"{head}{sep}{leaf}" + + +def derive_target(request: pytest.FixtureRequest, args: tuple[str, ...]) -> str: + """Describe what was run, from the collected items rather than the command line. + + Collection is the ground truth of selection, independent of flag order, + ``-k`` / ``-m`` filters, or which path form was typed. Every value is + anchored to the rootdir (project) name so the shape is uniform; granularity + narrows with the selection: + + * a single test -> ``project/tests/test_motor.py::test_spin`` (param stripped) + * a single file -> ``project/tests/test_motor.py`` + * many files -> their common directory, ``project/tests/motor`` + * whole tree / nothing collected / paths outside rootdir -> ``project`` + + The report is session-level and individual tests are its steps, so the + file/directory grain is the natural unit of "what ran" for the report + itself. The verbatim invocation stays available via ``{command}`` and the + ``pytest_command`` metadata key. + """ + rootpath = request.config.rootpath + root = rootpath.name + + def _anchor(rel: str) -> str: + return f"{root}/{rel}" if rel else root + + items = list(getattr(request.session, "items", ()) or ()) + if not items: + return root + if len(items) == 1: + return _anchor(_strip_param(items[0].nodeid)) + paths = {p for p in (getattr(i, "path", None) for i in items) if p is not None} + if not paths: + return root + if len(paths) == 1: + return _anchor(_relativize(next(iter(paths)), rootpath)) + try: + common = Path(os.path.commonpath([str(p) for p in paths])) + except ValueError: + # e.g. paths on different drives (Windows); fall back to the project. + return root + return _anchor(_relativize(common, rootpath)) + + +def build_template_fields( + target: str, + command: str, + args: tuple[str, ...], + request: pytest.FixtureRequest, +) -> dict[str, Any]: + """Build the placeholder mapping shared by the name and test_case templates.""" + items = getattr(request.session, "items", ()) or () + git = _git_metadata() or {} + return { + "target": target, + "command": command, + "args": " ".join(args), + "rootdir": request.config.rootpath.name, + "timestamp": datetime.now(timezone.utc).isoformat(), + "count": len(items), + "git_repo": git.get("git_repo", ""), + "git_branch": git.get("git_branch", ""), + "git_commit": git.get("git_commit", ""), + } + + +def format_template( + template: str, + fields: dict[str, Any], + *, + fallback: str, + option_label: str, +) -> str: + """Format ``template`` with ``fields``; on bad input, warn and return ``fallback``. + + A bad template should never block test results from being recorded, so the + rendering errors collapse to a warning + fallback rather than aborting the + session. + """ + from sift_client.pytest_plugin import SiftPytestPluginWarning + + try: + return template.format(**fields) + except (KeyError, IndexError, ValueError) as exc: + warnings.warn( + f"Invalid {option_label} template {template!r} ({exc}); using fallback.", + SiftPytestPluginWarning, + stacklevel=2, + ) + return fallback + + +def resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | None: + """Determine log_file value from CLI flag or ini key. + + Three signal types arrive here: + + * ``None``: unset; nothing was passed on the CLI and the ini key is + absent. Treat as the default "use a temp file." + * Python ``False``: an explicit disable, typically set in a conftest via + ``config.option.sift_log_file = False``. Return ``None`` so + the rest of the pipeline knows to skip logging entirely. + * A string (from CLI or ini): interpret ``"true"`` / ``"1"`` as the temp + file default, ``"false"`` / ``"none"`` as disable, anything else as a + file path. + + Rejects ``--sift-log-file=none`` combined with ``--sift-offline`` since + offline mode needs the log file as its sole sink. + """ + raw = LOG_FILE_OPTION.resolve(pytestconfig) + disabled = raw is False or (isinstance(raw, str) and raw.lower() in ("false", "none")) + if disabled and is_offline(pytestconfig): + raise pytest.UsageError( + "--sift-log-file=none is incompatible with --sift-offline; offline " + "mode requires a log file. Pin one with --sift-log-file=, or " + "drop --sift-log-file=none to use a temp file." + ) + if raw is False: + return None + if not raw: + return True + lower = str(raw).lower() + if lower in ("true", "1"): + return True + if lower in ("false", "none"): + return None + return Path(raw) + + +def report_context_impl( + sift_client: SiftClient, + request: pytest.FixtureRequest, + pytestconfig: pytest.Config | None = None, +) -> Generator[ReportContext, None, None]: + args = request.config.invocation_params.args + # ``target`` is "what ran", derived from the collected items (see + # derive_target), invocation-independent, unlike parsing the command + # line. Both the display name and test_case default to it; the verbatim + # command stays available via {command} and the pytest_command metadata. + target = derive_target(request, args) + command = "pytest " + " ".join(args) if args else "pytest" + fields = build_template_fields(target, command, args, request) + name_template = REPORT_NAME_OPTION.resolve(pytestconfig) or "{target} {timestamp}" + name = format_template( + name_template, + fields, + fallback=f"{target} {fields['timestamp']}", + option_label="sift_report_name", + ) + test_case_template = TEST_CASE_OPTION.resolve(pytestconfig) + test_case = ( + format_template( + test_case_template, + fields, + fallback=target, + option_label="sift_test_case", + ) + if test_case_template + else target + ) + # Metadata starts from the [tool.sift.pytest.report.metadata] TOML table, and + # the auto-recorded pytest_command layers in last so the user can't + # accidentally overwrite it. + report_metadata: dict[str, str | float | bool] = { + **METADATA_OPTION.resolve_merged(pytestconfig), + "pytest_command": command, + } + # Mode → ReportContext flags: + # online (default): log_file=, replay_log_file=True + # --sift-offline: log_file=, replay_log_file=False + # --sift-disabled: log_file=False, replay_log_file=False + disabled = sift_client._simulate + offline = False if disabled else is_offline(pytestconfig) + log_file: str | Path | bool | None = False if disabled else resolve_log_file(pytestconfig) + include_git_metadata = bool(GIT_METADATA_OPTION.resolve(pytestconfig)) + with ReportContext( + sift_client, + name=name, + test_case=test_case, + test_system_name=TEST_SYSTEM_NAME_OPTION.resolve(pytestconfig) or None, + system_operator=SYSTEM_OPERATOR_OPTION.resolve(pytestconfig) or None, + serial_number=SERIAL_NUMBER_OPTION.resolve(pytestconfig) or None, + part_number=PART_NUMBER_OPTION.resolve(pytestconfig) or None, + log_file=log_file, + include_git_metadata=include_git_metadata, + replay_log_file=not (disabled or offline), + metadata=report_metadata, + ) as context: + try: + yield context + finally: + # Drain the hierarchy + parametrize stacks INSIDE the + # ReportContext's ``with`` block, so the final ``__exit__`` + # update calls for those parent steps are written to the log + # file BEFORE the import worker drains. Without this, the + # worker exits with a partial backlog and the parent steps + # are stuck IN_PROGRESS in the Sift report. + try: + drain_parametrize_stack() + finally: + drain_hierarchy_stack() + + +# Placeholder credentials used in --sift-offline mode when env/ini values +# are missing. Offline mode never makes network calls, so the values are +# only syntactically required by SiftConnectionConfig. +OFFLINE_DEFAULTS = { + "SIFT_API_KEY": "offline", + "SIFT_GRPC_URI": "offline.invalid:0", + "SIFT_REST_URI": "http://offline.invalid", +} + + +def build_disabled_client() -> SiftClient: + """Construct a SiftClient for ``--sift-disabled`` mode. + + Tagged with ``_simulate=True`` so test-results writes short-circuit through + the existing low-level simulate path without contacting Sift. The URLs are + syntactically valid but unreachable; nothing dials them. + """ + client = SiftClient( + connection_config=SiftConnectionConfig( + api_key="disabled", + grpc_url="disabled.invalid:0", + rest_url="http://disabled.invalid", + ) + ) + client._simulate = True + return client + + +def step_impl( + report_context: ReportContext, request: pytest.FixtureRequest +) -> Generator[NewStep, None, None]: + node = request.node + # Items get a parametrize path stashed in ``pytest_collection_modifyitems``; + # modules/other nodes fall back to their node name. The leaf frame + # (``path[-1]``) is the test-specific display name; parents are opened + # by ``_parametrize_parents``. When parametrize-nesting is disabled, fall + # back to the bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf + # remains uniquely identifiable. + if PARAMETRIZE_NESTING_OPTION.resolve(request.config): + path = node.stash.get(parametrize_path_key, ()) + name = path[-1] if path else str(node.name) + else: + name = str(node.name) + # ``node.obj`` may not exist (e.g., ``pytest.DoctestItem``) or may raise + # when accessed; fall back to no description in those cases rather than + # erroring out a perfectly valid test. ``getattr``'s default only + # suppresses ``AttributeError``; the try/except catches everything else + # (RuntimeError from a misbehaving ``__doc__`` descriptor, etc.). + try: + existing_docstring = getattr(getattr(node, "obj", None), "__doc__", None) or None + except Exception: + existing_docstring = None + with report_context.new_step( + name=name, description=existing_docstring, assertion_as_fail_not_error=False + ) as new_step: + node._sift_step = new_step + yield new_step + resolve_initial_status(new_step, node) diff --git a/python/lib/sift_client/_internal/pytest_plugin/steps.py b/python/lib/sift_client/_internal/pytest_plugin/steps.py new file mode 100644 index 000000000..9904ceecb --- /dev/null +++ b/python/lib/sift_client/_internal/pytest_plugin/steps.py @@ -0,0 +1,310 @@ +"""Parent-step stacks: the parametrize and hierarchy frames shared across items. + +Holds the collection-phase stash keys and the two module-level frame stacks +(``parametrize_stack`` / ``hierarchy_stack``), the helpers that build a chain +for an item and drain the stacks, and the per-item reconcilers the autouse +fixtures delegate to. Frames are shared across sibling test items and drained +innermost-first at session end. +""" + +from __future__ import annotations + +import warnings +from typing import Any, Tuple + +import pytest + +from sift_client._internal.pytest_plugin.options import ( + CLASS_STEP_OPTION, + MODULE_STEP_OPTION, + PACKAGE_STEP_OPTION, + PARAMETRIZE_NESTING_OPTION, +) + +STASH_MISSING = object() + +parametrize_path_key = pytest.StashKey[Tuple[str, ...]]() +# Each frame: (path_key, open step). Frames are shared across sibling test items +# and drained at session end. +parametrize_stack: list[tuple[str, Any]] = [] + +hierarchy_key = pytest.StashKey[Tuple[Tuple[str, str, "str | None", bool], ...]]() +# Outer-to-inner frames for the item's collection-tree ancestors. Each chain +# entry is ``(identity, name, doc, rendered)``: +# - ``identity``: a globally-unique key (``node.nodeid``) used for diff +# comparison. Two ancestors at the same depth with the same display name +# but reached via different paths (e.g., ``proj_a/utils`` and +# ``proj_b/utils`` in a monorepo) get distinct identities, so they never +# silently merge in the diff. +# - ``name``: the human-readable step name used when ``rendered`` opens the +# Sift step. +# - ``doc``: docstring used for the step description if rendered. +# - ``rendered``: True iff the corresponding ``sift_*_step`` ini flag is on. +# Non-rendered frames participate in the diff but do not call +# ``rc.new_step(...)``; they appear with ``ns=None`` in the stack. +# +# Stack entries: ``(identity, name, open_step_or_None)``. Frames are shared +# across sibling test items and drained at session end. Drained AFTER +# parametrize_stack since parametrize parents nest inside hierarchy parents. +hierarchy_stack: list[tuple[str, str, Any]] = [] + + +def drain_step_stack(stack: list, *, swallow_errors: bool = True) -> None: + """Pop and close every frame. + + With ``swallow_errors=True`` (default, used at teardown / session end), + per-frame failures are surfaced as ``SiftPytestStepDrainWarning`` so a + single misbehaving ``__exit__`` can't block the rest of the stack from + cleaning up or cascade out of pytest's finalizer chain. + + With ``swallow_errors=False`` (mid-session, when a class transition forces + parametrize parents to close), the stack is still fully drained but the + first per-frame exception is re-raised at the end as a + ``SiftPytestStepDrainError`` so a real upstream invariant violation + surfaces as a test error instead of a silenceable warning. + """ + from sift_client.pytest_plugin import SiftPytestStepDrainError, SiftPytestStepDrainWarning + + errors: list[tuple[str, BaseException]] = [] + while stack: + entry = stack.pop() + # Tolerate either ``(name, ns)`` (parametrize stack) or + # ``(identity, name, ns)`` (hierarchy stack) entries. + name, ns = entry[-2], entry[-1] + if ns is None: + # Non-rendered diff-only frame (e.g. a Package frame when + # ``sift_package_step=false``); nothing to close. + continue + try: + ns.__exit__(None, None, None) + except Exception as exc: + if swallow_errors: + warnings.warn( + f"Sift plugin: closing step {name!r} during drain raised " + f"{type(exc).__name__}: {exc}", + SiftPytestStepDrainWarning, + stacklevel=2, + ) + else: + errors.append((name, exc)) + if errors: + first_name, first_exc = errors[0] + raise SiftPytestStepDrainError( + f"Sift plugin: {len(errors)} step(s) raised while draining mid-session; " + f"first failure on {first_name!r}: {type(first_exc).__name__}: {first_exc}" + ) from first_exc + + +def drain_parametrize_stack(*, swallow_errors: bool = True) -> None: + drain_step_stack(parametrize_stack, swallow_errors=swallow_errors) + + +def drain_hierarchy_stack(*, swallow_errors: bool = True) -> None: + drain_step_stack(hierarchy_stack, swallow_errors=swallow_errors) + + +def close_frame(name: str, ns: Any) -> None: + """Close a single frame, warning on per-frame failure. + + Used by the mid-session hierarchy-stack pop and the rollback paths so a + misbehaving ``__exit__`` neither shadows the original exception nor leaks + sibling frames. ``ns=None`` indicates a non-rendered diff-only frame; skip. + """ + from sift_client.pytest_plugin import SiftPytestStepDrainWarning + + if ns is None: + return + try: + ns.__exit__(None, None, None) + except Exception as exc: + warnings.warn( + f"Sift plugin: closing step {name!r} raised {type(exc).__name__}: {exc}", + SiftPytestStepDrainWarning, + stacklevel=2, + ) + + +def build_parametrize_path(item: pytest.Item) -> tuple[str, ...]: + """Outer-to-inner step display names for a parametrized item. + + Pytest stores ``callspec.params`` with the BOTTOM decorator's axis first; + the Sift step tree treats the TOP decorator as outermost, so we reverse. + """ + callspec = getattr(item, "callspec", None) + if callspec is None or not callspec.params: + return () + originalname = getattr(item, "originalname", item.name) + frames: list[str] = [originalname] + for name, value in reversed(callspec.params.items()): + frames.append(f"{name}={value!r}") + return tuple(frames) + + +def build_hierarchy_chain( + item: pytest.Item | pytest.Collector, + config: pytest.Config, +) -> tuple[tuple[str, str, str | None, bool], ...]: + """Outer-to-inner ``(identity, name, docstring, rendered)`` for collection ancestors. + + Walks ``item.parent`` upward and ALWAYS collects every ``pytest.Package``, + ``pytest.Module``, and ``pytest.Class`` ancestor; they all participate in + the diff that keeps the report tree coherent across tests, so two + same-named ancestors reached via different paths (e.g., ``proj_a/utils`` + and ``proj_b/utils`` in a monorepo where the ``proj_*`` dirs are + ``pytest.Dir`` nodes the walker skips) cannot silently merge. + + The ``identity`` field is ``node.nodeid``, globally unique per collected + node. The diff compares on identity, not the display ``name``. + + The ``rendered`` flag is True iff the layer's ini flag is on + (``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``). + Non-rendered frames participate in the diff for identity but don't open a + Sift step. + + The ``node.obj`` access is a pytest property that imports the underlying + Python object and can raise *any* exception (ImportError, custom + metaclass errors, descriptor ``__doc__`` properties that throw). Guard + broadly so a misbehaving collector doesn't abort the whole collection + phase; that frame's docstring just becomes ``None``. + """ + include_package = bool(PACKAGE_STEP_OPTION.resolve(config)) + include_module = bool(MODULE_STEP_OPTION.resolve(config)) + include_class = bool(CLASS_STEP_OPTION.resolve(config)) + + chain: list[tuple[str, str, str | None, bool]] = [] + # ``node.parent`` is typed as the internal ``_pytest.nodes.Node`` which + # isn't part of pytest's public API; widen to ``Any`` for the walk. + node: Any = item + while node is not None: + if isinstance(node, pytest.Class): + rendered = include_class + elif isinstance(node, pytest.Module): + rendered = include_module + elif isinstance(node, pytest.Package): + rendered = include_package + else: + node = node.parent + continue + try: + doc = ( + (getattr(node, "obj", None) and getattr(node.obj, "__doc__", None)) or "" + ).strip() or None + except Exception: + doc = None + chain.append((node.nodeid, node.name, doc, rendered)) + node = node.parent + return tuple(reversed(chain)) + + +def reconcile_hierarchy(request: pytest.FixtureRequest, config: pytest.Config) -> None: + """Open/close hierarchy parents so the open stack matches the item's chain. + + Diffs the item's desired ``(package, module, class)`` chain against + ``hierarchy_stack`` on identity (nodeid), pops the stale tail, and pushes + new rendered frames. Which node types render is decided at build time by + ``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``; when the + chain changes, the parametrize stack is drained first since parametrize + parents nest INSIDE these. + """ + # Fall back to computing the chain on-demand for items that bypassed + # ``pytest_collection_modifyitems`` (e.g., dynamically inserted by another + # plugin's later hook). Defaulting to ``()`` would incorrectly drain the + # entire open hierarchy stack for those items. + desired = request.node.stash.get(hierarchy_key, STASH_MISSING) + if desired is STASH_MISSING: + desired = build_hierarchy_chain(request.node, config) + common = 0 + # Compare on identity (nodeid); same-named ancestors at different paths + # MUST stay distinct. + while ( + common < len(hierarchy_stack) + and common < len(desired) + and hierarchy_stack[common][0] == desired[common][0] + ): + common += 1 + # Any change to the hierarchy chain orphans parametrize parents from the + # previous test. Drain them before mutating the hierarchy stack so + # ReportContext's top-of-stack invariant holds. Strict mode: a per-frame + # ``__exit__`` failure here signals a real upstream drift between the + # plugin stacks and ReportContext; raise it as a test error instead of a + # silenceable warning. + if common < len(hierarchy_stack) or common < len(desired): + drain_parametrize_stack(swallow_errors=False) + # Symmetric per-frame guard for the hierarchy pop so one bad ``__exit__`` + # doesn't leave hierarchy_stack partially drained for every subsequent test. + while len(hierarchy_stack) > common: + _identity, name, ns = hierarchy_stack.pop() + close_frame(name, ns) + if not desired[common:]: + return + # Fetch ``report_context`` lazily, but only when there's at least one + # rendered frame to push. Pure diff-only frames (e.g. a Package frame when + # ``sift_package_step=false``) just update hierarchy_stack with ns=None. + rc = None + # Roll back any partial push so a mid-loop exception doesn't leave half + # the chain orphaned on the stack. Per-frame guard inside the rollback so + # a failing ``__exit__`` doesn't shadow the original exception or leak + # the remaining opened frames. + opened: list[tuple[str, str, Any]] = [] + try: + for identity, name, doc, rendered in desired[common:]: + if rendered: + if rc is None: + rc = request.getfixturevalue("report_context") + ns = rc.new_step(name=name, description=doc, assertion_as_fail_not_error=False) + ns.__enter__() + opened.append((identity, name, ns)) + else: + opened.append((identity, name, None)) + except BaseException: + while opened: + _identity, name, ns = opened.pop() + close_frame(name, ns) + raise + hierarchy_stack.extend(opened) + + +def reconcile_parametrize(request: pytest.FixtureRequest, config: pytest.Config) -> None: + """Open/close shared parametrize parents so the open stack matches the item. + + Diffs the item's desired parametrize path against ``parametrize_stack``: + pops the stale tail, then opens new parents (everything except the innermost + frame, which the ``step`` fixture creates as the leaf). Parents persist + across sibling items so a tree like ``test_x[a=1]`` / ``test_x[a=2]`` shares + one ``test_x`` container. No-op when ``sift_parametrize_nesting=false``. + """ + if not PARAMETRIZE_NESTING_OPTION.resolve(config): + return + # Fall back to on-demand computation for dynamically-inserted items; + # see reconcile_hierarchy for the same rationale. + desired = request.node.stash.get(parametrize_path_key, STASH_MISSING) + if desired is STASH_MISSING: + desired = build_parametrize_path(request.node) + parents = desired[:-1] + common = 0 + while ( + common < len(parametrize_stack) + and common < len(parents) + and parametrize_stack[common][0] == parents[common] + ): + common += 1 + # Per-frame guard so one bad ``__exit__`` doesn't leave parametrize_stack + # partially drained for every subsequent test. + while len(parametrize_stack) > common: + name, ns = parametrize_stack.pop() + close_frame(name, ns) + if not parents[common:]: + return + rc = request.getfixturevalue("report_context") + opened: list[tuple[str, Any]] = [] + try: + for display in parents[common:]: + ns = rc.new_step(name=display, assertion_as_fail_not_error=False) + ns.__enter__() + opened.append((display, ns)) + except BaseException: + while opened: + name, ns = opened.pop() + close_frame(name, ns) + raise + parametrize_stack.extend(opened) diff --git a/python/lib/sift_client/_internal/pytest_plugin/terminal.py b/python/lib/sift_client/_internal/pytest_plugin/terminal.py new file mode 100644 index 000000000..4f1eee0dd --- /dev/null +++ b/python/lib/sift_client/_internal/pytest_plugin/terminal.py @@ -0,0 +1,231 @@ +"""Terminal-summary formatting for the session-end Sift report panel. + +Row writers and colored count/measurement segments used by +``pytest_terminal_summary``, plus the best-effort browser opener for +``--sift-open-report``. Color is dropped automatically when the terminal has no +markup (not a TTY or ``--color=no``), so captured/CI output stays plain text. +""" + +from __future__ import annotations + +import os +from typing import Any + +from sift_client._internal.pytest_plugin.modes import mode_label, sdk_version +from sift_client.sift_types.test_report import TestStatus +from sift_client.util.test_results.context_manager import _quiet_fork_stderr + +LABEL_WIDTH = 13 + + +def sift_kv(terminalreporter: Any, label: str, value: str, **value_markup: bool) -> None: + """Write an indented ``label value`` row, bolding the label. + + ``value_markup`` (e.g. ``green=True``, ``cyan=True``) styles only the value. + Color is dropped automatically when the terminal has no markup (not a TTY or + ``--color=no``), so captured/CI output stays plain text. + """ + terminalreporter.write(" ") + terminalreporter.write(f"{label:<{LABEL_WIDTH}}", bold=True) + terminalreporter.write_line(value, **value_markup) + + +# Step-count breakdown order and labels for the footer's "Steps" row. +STEP_COUNT_ORDER: tuple[tuple[TestStatus, str], ...] = ( + (TestStatus.PASSED, "passed"), + (TestStatus.FAILED, "failed"), + (TestStatus.ERROR, "error"), + (TestStatus.ABORTED, "aborted"), + (TestStatus.SKIPPED, "skipped"), + (TestStatus.IN_PROGRESS, "in progress"), +) + + +# Per-status color for the footer's step breakdown: green pass, red +# failure/error/abort, yellow skip; in-progress (and anything else) stays plain. +STEP_STATUS_MARKUP: dict[TestStatus, dict[str, bool]] = { + TestStatus.PASSED: {"green": True}, + TestStatus.FAILED: {"red": True}, + TestStatus.ERROR: {"red": True}, + TestStatus.ABORTED: {"red": True}, + TestStatus.SKIPPED: {"yellow": True}, +} + + +def step_count_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: + """Build ``(text, markup)`` segments for a step tally, non-zero only.""" + return [ + (f"{counts.get(status, 0)} {label}", STEP_STATUS_MARKUP.get(status, {})) + for status, label in STEP_COUNT_ORDER + if counts.get(status, 0) + ] + + +def measurement_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: + """Build ``(text, markup)`` segments for a measurement tally, non-zero only.""" + segments: list[tuple[str, dict[str, bool]]] = [] + if counts.get(True, 0): + segments.append((f"{counts[True]} passed", {"green": True})) + if counts.get(False, 0): + segments.append((f"{counts[False]} failed", {"red": True})) + return segments + + +def write_count_row( + terminalreporter: Any, label: str, segments: list[tuple[str, dict[str, bool]]] +) -> None: + """Write a ``label a · b · c`` row, applying each segment's color markup.""" + terminalreporter.write(" ") + terminalreporter.write(f"{label:<{LABEL_WIDTH}}", bold=True) + for index, (text, markup) in enumerate(segments): + if index: + terminalreporter.write(" · ") + terminalreporter.write(text, **markup) + terminalreporter.write_line("") + + +def report_panel_title(report: Any, terminalreporter: Any) -> str: + """``Sift report · `` for the section rule, truncated to the terminal width. + + The report name embeds a timestamp (and, for invocation-based runs, the + pytest args), so a long name is truncated with an ellipsis to keep the + separator line from wrapping. + """ + base = "Sift report" + name = getattr(report, "name", None) + if not name: + return base + title = f"{base} · {name}" + fullwidth = getattr(getattr(terminalreporter, "_tw", None), "fullwidth", 80) + # Reserve room for the separator characters and spaces write_sep adds. + limit = max(len(base), fullwidth - 8) + if len(title) > limit: + title = title[: limit - 1] + "…" + return title + + +def maybe_open_report(url: str) -> None: + """Best-effort open the report URL in a browser (for ``--sift-open-report``). + + Skipped on CI or non-interactive sessions so a committed ``sift_open_report`` + setting can't spawn a browser on a headless agent; the flag is meant for + local development. + """ + import sys + import webbrowser + + if os.environ.get("CI") or not sys.stdout.isatty(): + return + try: + # webbrowser.open forks/execs the platform opener while the gRPC client's + # background threads are live; redirect fd 2 across the fork to swallow + # gRPC's prefork notice (same treatment as the plugin's other fork sites). + with _quiet_fork_stderr(): + webbrowser.open(url) + except Exception: + # Headless / no browser available: opening is a convenience, never fatal. + pass + + +def write_disabled_summary(terminalreporter: Any) -> None: + """Print the one-line panel shown in ``--sift-disabled`` mode.""" + terminalreporter.write_sep("=", "Sift", cyan=True, bold=True) + terminalreporter.write_line("Sift disabled — no test report created.") + + +def write_report_summary( + terminalreporter: Any, + context: Any, + config: Any, + report_id: str | None, + report_url: str | None, + offline: bool, +) -> None: + """Print the session-end report panel: outcome, tallies, provenance, action. + + ``report_id`` / ``report_url`` come from ``resolve_report_link``. The action + row is a clickable link (online), the upload command (offline), or a replay + hint when the report never uploaded. + """ + log_file = getattr(context, "log_file", None) + + failed = bool(getattr(context, "any_failures", False)) + status_word, status_markup = ( + ("FAILED", {"red": True, "bold": True}) + if failed + else ("PASSED", {"green": True, "bold": True}) + ) + # Offline results live only in the local log until replayed, so the status + # row calls that out instead of repeating the version (already in the header). + status_context = ( + f"{mode_label(config)} · not uploaded" + if offline + else f"{mode_label(config)} · sift-stack-py {sdk_version()}" + ) + + report = context.report + + terminalreporter.write_sep( + "=", report_panel_title(report, terminalreporter), cyan=True, bold=True + ) + + # Identity row: the test case (test path or pytest invocation). + if report.test_case: + sift_kv(terminalreporter, "Test case", str(report.test_case)) + + # Status row: colored outcome, then compact mode context. + terminalreporter.write(" ") + terminalreporter.write(f"{'Status':<{LABEL_WIDTH}}", bold=True) + terminalreporter.write(status_word, **status_markup) + terminalreporter.write_line(f" {status_context}") + + # Step + measurement tallies (green pass, red failure, yellow skip). + write_count_row( + terminalreporter, + "Steps", + step_count_segments(context.step_status_counts) or [("no steps", {})], + ) + measurements = measurement_segments(context.measurement_counts) + if measurements: + write_count_row(terminalreporter, "Measurements", measurements) + + # Provenance row: test system and operator. + system = " · ".join(part for part in (report.test_system_name, report.system_operator) if part) + if system: + sift_kv(terminalreporter, "System", system) + + # Local log file (write-through backup online, sole sink offline). + if log_file is not None: + sift_kv(terminalreporter, "Log file", str(log_file)) + + if offline: + if log_file is not None: + terminalreporter.write_sep("-", "to upload to Sift") + terminalreporter.write_line(f" >> import-test-result-log {log_file}", cyan=True) + return + + if not report_id: + # Incremental upload never mapped the report (the worker died before + # replaying the create), so there's no real report to link. + sift_kv( + terminalreporter, + "Report", + f"not uploaded — replay with: import-test-result-log {log_file}", + yellow=True, + ) + elif report_url is not None: + sift_kv(terminalreporter, "Report", report_url, cyan=True) + else: + sift_kv( + terminalreporter, + "Report", + f"id {report_id} (set sift_app_url for a clickable link)", + ) + + if report_id and getattr(context, "replay_incomplete", False) and log_file is not None: + sift_kv( + terminalreporter, + "", + f"may be incomplete — finish with: import-test-result-log {log_file}", + yellow=True, + ) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py index 4efb9f554..a61035b90 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py @@ -27,8 +27,8 @@ def test_ini_log_file_none( ) -> None: write_probe_conftest( """ - from sift_client.pytest_plugin import _resolve_log_file - print("RESOLVED:", _resolve_log_file(config)) + from sift_client._internal.pytest_plugin.report import resolve_log_file + print("RESOLVED:", resolve_log_file(config)) """, ) pytester.makepyprojecttoml( @@ -56,8 +56,8 @@ def test_python_false_disables_log_file( write_probe_conftest( """ config.option.sift_log_file = False - from sift_client.pytest_plugin import _resolve_log_file - print("RESOLVED:", _resolve_log_file(config)) + from sift_client._internal.pytest_plugin.report import resolve_log_file + print("RESOLVED:", resolve_log_file(config)) """, ) pytester.makepyfile("def test_noop(): pass") @@ -73,8 +73,8 @@ def test_ini_log_file_path( log_path = tmp_path / "sift-run.jsonl" write_probe_conftest( """ - from sift_client.pytest_plugin import _resolve_log_file - print("RESOLVED:", _resolve_log_file(config)) + from sift_client._internal.pytest_plugin.report import resolve_log_file + print("RESOLVED:", resolve_log_file(config)) """, ) pytester.makepyprojecttoml( @@ -94,8 +94,8 @@ def test_ini_offline_true( ) -> None: write_probe_conftest( """ - from sift_client.pytest_plugin import _is_offline - print("OFFLINE:", _is_offline(config)) + from sift_client._internal.pytest_plugin.modes import is_offline + print("OFFLINE:", is_offline(config)) """, ) pytester.makepyprojecttoml( @@ -115,8 +115,8 @@ def test_ini_disabled_true( ) -> None: write_probe_conftest( """ - from sift_client.pytest_plugin import _is_disabled - print("DISABLED:", _is_disabled(config)) + from sift_client._internal.pytest_plugin.modes import is_disabled + print("DISABLED:", is_disabled(config)) """, ) pytester.makepyprojecttoml( @@ -159,8 +159,8 @@ def test_cli_overrides_ini( cli_path = tmp_path / "cli-wins.jsonl" write_probe_conftest( """ - from sift_client.pytest_plugin import _resolve_log_file - print("RESOLVED:", _resolve_log_file(config)) + from sift_client._internal.pytest_plugin.report import resolve_log_file + print("RESOLVED:", resolve_log_file(config)) """, ) pytester.makepyprojecttoml( @@ -181,8 +181,8 @@ def test_cli_offline_flag( """The ``--sift-offline`` CLI flag flips the resolver to True.""" write_probe_conftest( """ - from sift_client.pytest_plugin import _is_offline - print("OFFLINE:", _is_offline(config)) + from sift_client._internal.pytest_plugin.modes import is_offline + print("OFFLINE:", is_offline(config)) """, ) pytester.makepyfile("def test_noop(): pass") @@ -197,8 +197,8 @@ def test_cli_disabled_flag( """The ``--sift-disabled`` CLI flag flips the resolver to True.""" write_probe_conftest( """ - from sift_client.pytest_plugin import _is_disabled - print("DISABLED:", _is_disabled(config)) + from sift_client._internal.pytest_plugin.modes import is_disabled + print("DISABLED:", is_disabled(config)) """, ) pytester.makepyfile("def test_noop(): pass") @@ -232,14 +232,11 @@ def test_defaults_when_neither_set( ) -> None: write_probe_conftest( """ - from sift_client.pytest_plugin import ( - _is_disabled, - _is_offline, - _resolve_log_file, - ) - print("RESOLVED:", _resolve_log_file(config)) - print("OFFLINE:", _is_offline(config)) - print("DISABLED:", _is_disabled(config)) + from sift_client._internal.pytest_plugin.modes import is_disabled, is_offline + from sift_client._internal.pytest_plugin.report import resolve_log_file + print("RESOLVED:", resolve_log_file(config)) + print("OFFLINE:", is_offline(config)) + print("DISABLED:", is_disabled(config)) print("INI_GIT:", config.getini("sift_git_metadata")) """, ) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index 9e0dd52e0..39ee0ccf6 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -311,10 +311,8 @@ def test_y(self, w): def test_drain_step_stack_continues_past_failing_exit() -> None: """Lenient mode: a misbehaving ``__exit__`` must not block the rest of the stack.""" - from sift_client.pytest_plugin import ( - SiftPytestStepDrainWarning, - _drain_step_stack, - ) + from sift_client._internal.pytest_plugin.steps import drain_step_stack + from sift_client.pytest_plugin import SiftPytestStepDrainWarning class _Good: def __init__(self) -> None: @@ -330,7 +328,7 @@ def __exit__(self, *_: object) -> None: g1, g2, bad = _Good(), _Good(), _Bad() stack: list[tuple[str, object]] = [("g1", g1), ("bad", bad), ("g2", g2)] with pytest.warns(SiftPytestStepDrainWarning, match="boom"): - _drain_step_stack(stack) + drain_step_stack(stack) assert stack == [] assert g1.closed assert g2.closed @@ -338,10 +336,8 @@ def __exit__(self, *_: object) -> None: def test_drain_step_stack_strict_drains_fully_then_raises() -> None: """Strict mode: drain every frame, then raise with the FIRST failure chained.""" - from sift_client.pytest_plugin import ( - SiftPytestStepDrainError, - _drain_step_stack, - ) + from sift_client._internal.pytest_plugin.steps import drain_step_stack + from sift_client.pytest_plugin import SiftPytestStepDrainError class _Good: def __init__(self) -> None: @@ -362,7 +358,7 @@ def __exit__(self, *_: object) -> None: # one collected and surfaces in __cause__. stack: list[tuple[str, object]] = [("g", g), ("b1", b1), ("b2", b2)] with pytest.raises(SiftPytestStepDrainError, match="2 step.*'b2'") as exc_info: - _drain_step_stack(stack, swallow_errors=False) + drain_step_stack(stack, swallow_errors=False) # Stack fully drained even though it raised. assert stack == [] assert g.closed diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py b/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py index ba6fbf5a5..0bb46c76f 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_settings_reference.py @@ -1,4 +1,4 @@ -"""Guard rail that pins the docs settings table to the ``_OPTIONS`` registry. +"""Guard rail that pins the docs settings table to the ``PLUGIN_OPTIONS`` registry. If you add or change a setting in ``lib/sift_client/pytest_plugin.py`` without regenerating the Markdown table in ``docs/guides/pytest_plugin/configuration.md``, @@ -25,15 +25,15 @@ def test_settings_reference_docs_in_sync(pytestconfig: pytest.Config) -> None: import pytest pytest.skip(f"{_DOCS_PATH} not present in this checkout") - from sift_client.pytest_plugin import _render_settings_reference + from sift_client._internal.pytest_plugin.options import render_settings_reference - rendered = _render_settings_reference() + rendered = render_settings_reference() content = _DOCS_PATH.read_text() if rendered not in content: import pytest pytest.fail( - "Settings reference is out of sync with the _OPTIONS registry. Replace the " + "Settings reference is out of sync with the PLUGIN_OPTIONS registry. Replace the " "table under '## Settings reference' in " "docs/guides/pytest_plugin/configuration.md with:\n\n" + rendered ) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py b/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py index 76550cc22..0845f143b 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_terminal_output.py @@ -13,10 +13,10 @@ from typing import TYPE_CHECKING, Callable from sift_client._internal.low_level_wrappers._test_results_log import LogTracking -from sift_client.pytest_plugin import ( - _measurement_segments, - _resolve_real_report_id, - _step_count_segments, +from sift_client._internal.pytest_plugin.report import resolve_real_report_id +from sift_client._internal.pytest_plugin.terminal import ( + measurement_segments, + step_count_segments, ) from sift_client.sift_types.test_report import TestStatus @@ -29,7 +29,7 @@ class TestStepCountSegments: def test_lists_nonzero_statuses_in_order_with_color(self) -> None: counts = Counter({TestStatus.PASSED: 4, TestStatus.FAILED: 2, TestStatus.SKIPPED: 1}) - assert _step_count_segments(counts) == [ + assert step_count_segments(counts) == [ ("4 passed", {"green": True}), ("2 failed", {"red": True}), ("1 skipped", {"yellow": True}), @@ -37,28 +37,28 @@ def test_lists_nonzero_statuses_in_order_with_color(self) -> None: def test_error_and_aborted_are_red(self) -> None: counts = Counter({TestStatus.ERROR: 1, TestStatus.ABORTED: 1}) - assert _step_count_segments(counts) == [ + assert step_count_segments(counts) == [ ("1 error", {"red": True}), ("1 aborted", {"red": True}), ] def test_empty_is_empty(self) -> None: - assert _step_count_segments(Counter()) == [] + assert step_count_segments(Counter()) == [] class TestMeasurementSegments: def test_passed_green_failed_red(self) -> None: - assert _measurement_segments(Counter({True: 2, False: 1})) == [ + assert measurement_segments(Counter({True: 2, False: 1})) == [ ("2 passed", {"green": True}), ("1 failed", {"red": True}), ] def test_empty_is_empty(self) -> None: - assert _measurement_segments(Counter()) == [] + assert measurement_segments(Counter()) == [] class TestResolveRealReportId: - """``_resolve_real_report_id`` maps the footer to the real server report id.""" + """``resolve_real_report_id`` maps the footer to the real server report id.""" def test_synchronous_online_uses_report_id_directly(self) -> None: # No log file, non-simulated report (``--sift-log-file=false`` path). @@ -66,7 +66,7 @@ def test_synchronous_online_uses_report_id_directly(self) -> None: report=SimpleNamespace(id_="real-123", is_simulated=False), log_file=None, ) - assert _resolve_real_report_id(context) == "real-123" + assert resolve_real_report_id(context) == "real-123" def test_incremental_resolves_via_sidecar(self, tmp_path: Path) -> None: log_file = tmp_path / "run.jsonl" @@ -76,7 +76,7 @@ def test_incremental_resolves_via_sidecar(self, tmp_path: Path) -> None: report=SimpleNamespace(id_="sim-1", is_simulated=True), log_file=log_file, ) - assert _resolve_real_report_id(context) == "real-1" + assert resolve_real_report_id(context) == "real-1" def test_empty_report_id_returns_none(self) -> None: # An unset/empty id must not produce a ``/test-results/`` link. @@ -84,7 +84,7 @@ def test_empty_report_id_returns_none(self) -> None: report=SimpleNamespace(id_="", is_simulated=False), log_file=None, ) - assert _resolve_real_report_id(context) is None + assert resolve_real_report_id(context) is None def test_incremental_unmapped_returns_none(self, tmp_path: Path) -> None: # Worker died before mapping the report: no sidecar entry. @@ -94,7 +94,7 @@ def test_incremental_unmapped_returns_none(self, tmp_path: Path) -> None: report=SimpleNamespace(id_="sim-1", is_simulated=True), log_file=log_file, ) - assert _resolve_real_report_id(context) is None + assert resolve_real_report_id(context) is None class TestHeader: diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py b/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py index ed7a92dc4..435170ed5 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_typo_detector.py @@ -2,7 +2,7 @@ The plugin scans ``SIFT_*`` env vars and ``[tool.sift.pytest.*]`` keys at session start and emits a ``SiftPytestPluginWarning`` for anything not -declared in the central ``_OPTIONS`` registry. A typo (`SIFT_REPORT_SERIALNUM` +declared in the central ``PLUGIN_OPTIONS`` registry. A typo (`SIFT_REPORT_SERIALNUM` instead of `SIFT_REPORT_SERIAL_NUMBER`) would otherwise silently no-op. """ diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 4341bf122..43f689894 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -1,1574 +1,123 @@ -from __future__ import annotations - -import os -import warnings -from dataclasses import dataclass -from datetime import datetime, timezone -from pathlib import Path -from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Generator, Tuple - -import pytest - -from sift_client import SiftClient, SiftConnectionConfig -from sift_client._internal.pyproject_config import load_tool_sift -from sift_client.errors import SiftWarning -from sift_client.sift_types.test_report import ErrorInfo, TestStatus -from sift_client.util.test_results import ReportContext -from sift_client.util.test_results.context_manager import ( - _git_metadata, - _quiet_fork_stderr, - format_assertion_message, - format_truncated_traceback, -) - - -class SiftPytestPluginWarning(SiftWarning): - """Base warning for issues raised by the Sift pytest plugin.""" - - -class SiftPytestStepDrainWarning(SiftPytestPluginWarning): - """A step's ``__exit__`` raised while the plugin was draining its stack. - - Surfaced at module-teardown or session-end so the drain can continue and - pytest test outcomes stay unaffected; the underlying exception is included - in the message for debugging. - """ - - -class SiftPytestStepDrainError(RuntimeError): - """Raised when mid-session drain fails — signals a likely upstream invariant break.""" - - -if TYPE_CHECKING: - from sift_client.util.test_results.context_manager import NewStep - -REPORT_CONTEXT: Any = None - -# Set at session end with the resolved (real) report id/URL when online and -# uploaded. Read from a project's conftest in a later hook (e.g. -# ``pytest_unconfigure``) to post the link, write a file, etc. -SIFT_REPORT_ID_STASH_KEY = pytest.StashKey[str]() -SIFT_REPORT_URL_STASH_KEY = pytest.StashKey[str]() - -_STASH_MISSING = object() - -_PARAMETRIZE_PATH_KEY = pytest.StashKey[Tuple[str, ...]]() -# Each frame: (path_key, open step). Frames are shared across sibling test items -# and drained at session end. -_PARAMETRIZE_STACK: list[tuple[str, Any]] = [] - -_HIERARCHY_KEY = pytest.StashKey[Tuple[Tuple[str, str, "str | None", bool], ...]]() -# Outer-to-inner frames for the item's collection-tree ancestors. Each chain -# entry is ``(identity, name, doc, rendered)``: -# - ``identity``: a globally-unique key (``node.nodeid``) used for diff -# comparison. Two ancestors at the same depth with the same display name -# but reached via different paths (e.g., ``proj_a/utils`` and -# ``proj_b/utils`` in a monorepo) get distinct identities, so they never -# silently merge in the diff. -# - ``name``: the human-readable step name used when ``rendered`` opens the -# Sift step. -# - ``doc``: docstring used for the step description if rendered. -# - ``rendered``: True iff the corresponding ``sift_*_step`` ini flag is on. -# Non-rendered frames participate in the diff but do not call -# ``rc.new_step(...)`` — they appear with ``ns=None`` in the stack. -# -# Stack entries: ``(identity, name, open_step_or_None)``. Frames are shared -# across sibling test items and drained at session end. Drained AFTER -# _PARAMETRIZE_STACK since parametrize parents nest inside hierarchy parents. -_HIERARCHY_STACK: list[tuple[str, str, Any]] = [] - - -def _drain_step_stack(stack: list, *, swallow_errors: bool = True) -> None: - """Pop and close every frame. - - With ``swallow_errors=True`` (default, used at teardown / session end), - per-frame failures are surfaced as ``SiftPytestStepDrainWarning`` so a - single misbehaving ``__exit__`` can't block the rest of the stack from - cleaning up or cascade out of pytest's finalizer chain. - - With ``swallow_errors=False`` (mid-session, when a class transition forces - parametrize parents to close), the stack is still fully drained but the - first per-frame exception is re-raised at the end as a - ``SiftPytestStepDrainError`` so a real upstream invariant violation - surfaces as a test error instead of a silenceable warning. - """ - errors: list[tuple[str, BaseException]] = [] - while stack: - entry = stack.pop() - # Tolerate either ``(name, ns)`` (parametrize stack) or - # ``(identity, name, ns)`` (hierarchy stack) entries. - name, ns = entry[-2], entry[-1] - if ns is None: - # Non-rendered diff-only frame (e.g. a Package frame when - # ``sift_package_step=false``); nothing to close. - continue - try: - ns.__exit__(None, None, None) - except Exception as exc: - if swallow_errors: - warnings.warn( - f"Sift plugin: closing step {name!r} during drain raised " - f"{type(exc).__name__}: {exc}", - SiftPytestStepDrainWarning, - stacklevel=2, - ) - else: - errors.append((name, exc)) - if errors: - first_name, first_exc = errors[0] - raise SiftPytestStepDrainError( - f"Sift plugin: {len(errors)} step(s) raised while draining mid-session; " - f"first failure on {first_name!r}: {type(first_exc).__name__}: {first_exc}" - ) from first_exc - - -def _drain_parametrize_stack(*, swallow_errors: bool = True) -> None: - _drain_step_stack(_PARAMETRIZE_STACK, swallow_errors=swallow_errors) - - -def _drain_hierarchy_stack(*, swallow_errors: bool = True) -> None: - _drain_step_stack(_HIERARCHY_STACK, swallow_errors=swallow_errors) - - -def _close_frame(name: str, ns: Any) -> None: - """Close a single frame, warning on per-frame failure. - - Used by the mid-session hierarchy-stack pop and the rollback paths so a - misbehaving ``__exit__`` neither shadows the original exception nor leaks - sibling frames. ``ns=None`` indicates a non-rendered diff-only frame; skip. - """ - if ns is None: - return - try: - ns.__exit__(None, None, None) - except Exception as exc: - warnings.warn( - f"Sift plugin: closing step {name!r} raised {type(exc).__name__}: {exc}", - SiftPytestStepDrainWarning, - stacklevel=2, - ) - - -def _build_parametrize_path(item: pytest.Item) -> tuple[str, ...]: - """Outer-to-inner step display names for a parametrized item. - - Pytest stores ``callspec.params`` with the BOTTOM decorator's axis first; - the Sift step tree treats the TOP decorator as outermost, so we reverse. - """ - callspec = getattr(item, "callspec", None) - if callspec is None or not callspec.params: - return () - originalname = getattr(item, "originalname", item.name) - frames: list[str] = [originalname] - for name, value in reversed(callspec.params.items()): - frames.append(f"{name}={value!r}") - return tuple(frames) - - -def _build_hierarchy_chain( - item: pytest.Item | pytest.Collector, - config: pytest.Config, -) -> tuple[tuple[str, str, str | None, bool], ...]: - """Outer-to-inner ``(identity, name, docstring, rendered)`` for collection ancestors. - - Walks ``item.parent`` upward and ALWAYS collects every ``pytest.Package``, - ``pytest.Module``, and ``pytest.Class`` ancestor — they all participate in - the diff that keeps the report tree coherent across tests, so two - same-named ancestors reached via different paths (e.g., ``proj_a/utils`` - and ``proj_b/utils`` in a monorepo where the ``proj_*`` dirs are - ``pytest.Dir`` nodes the walker skips) cannot silently merge. - - The ``identity`` field is ``node.nodeid`` — globally unique per collected - node. The diff compares on identity, not the display ``name``. - - The ``rendered`` flag is True iff the layer's ini flag is on - (``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``). - Non-rendered frames participate in the diff for identity but don't open a - Sift step. - - The ``node.obj`` access is a pytest property that imports the underlying - Python object and can raise *any* exception (ImportError, custom - metaclass errors, descriptor ``__doc__`` properties that throw). Guard - broadly so a misbehaving collector doesn't abort the whole collection - phase — that frame's docstring just becomes ``None``. - """ - include_package = bool(_PACKAGE_STEP.resolve(config)) - include_module = bool(_MODULE_STEP.resolve(config)) - include_class = bool(_CLASS_STEP.resolve(config)) - - chain: list[tuple[str, str, str | None, bool]] = [] - # ``node.parent`` is typed as the internal ``_pytest.nodes.Node`` which - # isn't part of pytest's public API; widen to ``Any`` for the walk. - node: Any = item - while node is not None: - if isinstance(node, pytest.Class): - rendered = include_class - elif isinstance(node, pytest.Module): - rendered = include_module - elif isinstance(node, pytest.Package): - rendered = include_package - else: - node = node.parent - continue - try: - doc = ( - (getattr(node, "obj", None) and getattr(node.obj, "__doc__", None)) or "" - ).strip() or None - except Exception: - doc = None - chain.append((node.nodeid, node.name, doc, rendered)) - node = node.parent - return tuple(reversed(chain)) - - -# Settings-reference categories. Each maps to a docs subsection and, in the -# renderer, to the column subset that category actually uses. -_CAT_BEHAVIOR = "Pytest behavior" -_CAT_CONNECTION = "Connection" -_CAT_REPORT = "Report content" -_CATEGORIES = (_CAT_BEHAVIOR, _CAT_CONNECTION, _CAT_REPORT) - -_TOOL_SIFT_KEY = pytest.StashKey[dict]() - - -def _tool_sift(config: pytest.Config | None) -> dict[str, Any]: - """Session-cached ``[tool.sift]`` table. - - Every option that reads TOML, plus the typo detector, would otherwise - re-parse pyproject.toml on the session-start path — and re-emit the - malformed-file warning each time. Parse once per session via the config - stash; ``load_tool_sift`` stays the uncached parser for direct callers. - """ - if config is None: - return {} - cached = config.stash.get(_TOOL_SIFT_KEY, None) - if cached is None: - cached = load_tool_sift(config) - config.stash[_TOOL_SIFT_KEY] = cached - return cached - - -@dataclass(frozen=True) -class _Option: - """One setting and the logic to resolve it from wherever it can be set. - - A setting may be read from an env var, a CLI flag, a pytest ini key, or a - ``[tool.sift...]`` TOML path. :meth:`resolve` walks the declared surfaces in - env > cli > ini > toml order. ``metadata`` is the one exception: a free-form - TOML table (``merge=True``) resolved by :meth:`resolve_merged`. - - One registry of these drives ``pytest_addoption``, the resolvers, the docs - settings-reference table, and the typo detector, so a setting is added or - changed in one place. - - Surface fields (declare only the ones a setting uses): - - - ``cli`` / ``cli_action``: CLI flag (e.g. ``"--sift-offline"``) and - argparse action; ``cli_dest`` is derived from the flag. - - ``ini`` / ``ini_type`` / ``ini_default``: pytest ini key under - ``[tool.pytest.ini_options]`` and its pytest type + default. - - ``toml``: tuple path under ``[tool.sift...]``, e.g. - ``("pytest", "report", "name")`` -> ``tool.sift.pytest.report.name``. - - ``env``: full env var name, e.g. ``"SIFT_API_KEY"``. - - ``category`` groups the option in the docs settings reference (one of - ``_CATEGORIES``). - """ - - name: str - help: str - category: str - cli: str | None = None - cli_action: str | None = None - ini: str | None = None - ini_type: str | None = None - ini_default: Any = None - toml: tuple[str, ...] | None = None - env: str | None = None - merge: bool = False - - @property - def cli_dest(self) -> str: - """Argparse ``dest`` for the option. - - When the option has both a CLI flag and an ini key, the dest matches - the ini name so ``config.getoption(ini_name)`` returns the CLI value - (and falls through to ``config.getini(ini_name)`` when the flag wasn't - passed). Without an ini key, the dest derives from the flag name. - """ - if self.ini: - return self.ini - if self.cli is None: - return self.name - return self.cli.lstrip("-").replace("-", "_") - - def __post_init__(self) -> None: - if self.cli_action and not self.cli: - raise ValueError(f"_Option({self.name!r}): cli_action requires cli") - if self.ini_type and not self.ini: - raise ValueError(f"_Option({self.name!r}): ini_type requires ini") - if self.merge and not self.toml: - raise ValueError(f"_Option({self.name!r}): merge=True needs toml") - if not any([self.cli, self.ini, self.toml, self.env]): - raise ValueError(f"_Option({self.name!r}): declares no surfaces") - if self.category not in _CATEGORIES: - raise ValueError(f"_Option({self.name!r}): category must be one of {_CATEGORIES}") - - def resolve(self, config: pytest.Config | None) -> Any: - """First set value from declared surfaces; ``None`` when unset everywhere. - - Walk order is env > cli > ini > toml. No current option declares both - env and cli, so the chain isn't ambiguous in practice. - ``getini`` returns the typed default for unset bool/list keys, so this - only returns ini values for booleans (always meaningful), non-empty - strings, and non-empty lists. - """ - if self.env: - env_value = os.getenv(self.env) - if env_value not in (None, ""): - return env_value - if config is None: - return None - if self.cli: - cli_value = config.getoption(self.cli_dest, default=None) - if cli_value is not None: - return cli_value - if self.ini: - try: - ini_value = config.getini(self.ini) - except (KeyError, ValueError): - ini_value = None - if isinstance(ini_value, bool): - return ini_value - if isinstance(ini_value, str) and ini_value: - return ini_value - if isinstance(ini_value, list) and ini_value: - return ini_value - if self.toml: - toml_value = _walk_toml(_tool_sift(config), self.toml) - if toml_value not in (None, ""): - return toml_value - return None - - def resolve_merged(self, config: pytest.Config | None) -> dict[str, str | float | bool]: - """For ``merge=True`` dict-shape settings: the free-form TOML table. - - TOML values that don't fit ``dict[str, str | float | bool]`` (nested - tables, lists, ``None``) are dropped with a warning so a malformed - entry can't crash report creation. - """ - result: dict[str, str | float | bool] = {} - if config is not None and self.toml: - base = _walk_toml(_tool_sift(config), self.toml) - if isinstance(base, dict): - for key, value in base.items(): - if not isinstance(key, str): - continue - if isinstance(value, (bool, str, int, float)): - # ``bool`` first since ``isinstance(True, int)`` is True. - result[key] = value # type: ignore[assignment] - continue - warnings.warn( - f"[tool.sift.{'.'.join(self.toml)}] entry {key!r} ignored: " - f"unsupported type {type(value).__name__}.", - SiftPytestPluginWarning, - stacklevel=2, - ) - return result - - -def _walk_toml(data: dict[str, Any], path: tuple[str, ...]) -> Any: - """Walk a parsed TOML tree along ``path``; return None on any missing key.""" - cur: Any = data - for key in path: - if not isinstance(cur, dict): - return None - cur = cur.get(key) - if cur is None: - return None - return cur - - -# --------------------------------------------------------------------------- -# Settings registry. -# -# Add new options here. The registry drives `pytest_addoption`, resolution, -# the docs settings-reference table, and the unknown-key typo detector, so a -# setting is declared once instead of wired up in several places. -# -# Where each setting lives follows a few principles: -# - Secrets (the API key) come from environment variables only, never a -# committed file. -# - Pytest behavior lives in [tool.pytest.ini_options] so it integrates with -# `pytest --help` / `--co` / `--trace-config`. -# - Sift report content lives in [tool.sift.pytest.report.*]. -# - Non-secret endpoints take an env var plus one static home (ini or toml, -# not both). -# - A CLI flag is added only when there is a real per-run override workflow; -# stable project config stays in ini/toml. -# - Dynamic per-run values are injected via environment variables (pytest-dotenv -# loads .env for local dev; CI sets the same names from its secret store). -# --------------------------------------------------------------------------- - -# Pytest behavior. The CLI flag survives because the per-run override is real. -_LOG_FILE = _Option( - name="log_file", - category=_CAT_BEHAVIOR, - help="Path to the JSONL log of create/update calls (path | true | false | none).", - cli="--sift-log-file", - ini="sift_log_file", -) -_GIT_METADATA = _Option( - name="git_metadata", - category=_CAT_BEHAVIOR, - help="Capture git repo/branch/commit on the report.", - cli="--no-sift-git-metadata", - cli_action="store_false", - ini="sift_git_metadata", - ini_type="bool", - ini_default=True, -) -_OFFLINE = _Option( - name="offline", - category=_CAT_BEHAVIOR, - help="Skip the session-start ping; route create/update through the JSONL log.", - cli="--sift-offline", - cli_action="store_true", - ini="sift_offline", - ini_type="bool", - ini_default=False, -) -_DISABLED = _Option( - name="disabled", - category=_CAT_BEHAVIOR, - help="Disable Sift entirely (no API calls, no log file). Supersedes --sift-offline.", - cli="--sift-disabled", - cli_action="store_true", - ini="sift_disabled", - ini_type="bool", - ini_default=False, -) - -_OPEN = _Option( - name="open_report", - category=_CAT_BEHAVIOR, - help="Open the resulting report in a browser at session end (online only; " - "no-op when the report URL can't be resolved).", - cli="--sift-open-report", - cli_action="store_true", - ini="sift_open_report", - ini_type="bool", - ini_default=False, -) - -# Pytest behavior: set-once project defaults (no CLI flag — no per-run override). -_AUTOUSE = _Option( - name="autouse", - category=_CAT_BEHAVIOR, - help="Default for the Sift autouse fixtures (report_context, step, hierarchy/parametrize parents).", - ini="sift_autouse", - ini_type="bool", - ini_default=True, -) -_PACKAGE_STEP = _Option( - name="package_step", - category=_CAT_BEHAVIOR, - help="Open a parent step for each Python package in the test path.", - ini="sift_package_step", - ini_type="bool", - ini_default=True, -) -_MODULE_STEP = _Option( - name="module_step", - category=_CAT_BEHAVIOR, - help="Open a parent step for each test module.", - ini="sift_module_step", - ini_type="bool", - ini_default=True, -) -_CLASS_STEP = _Option( - name="class_step", - category=_CAT_BEHAVIOR, - help="Open per-class parent steps, including nested classes.", - ini="sift_class_step", - ini_type="bool", - ini_default=True, -) -_PARAMETRIZE_NESTING = _Option( - name="parametrize_nesting", - category=_CAT_BEHAVIOR, - help="Cluster parametrized tests under shared parent steps (e.g. test_a -> v=1, v=2).", - ini="sift_parametrize_nesting", - ini_type="bool", - ini_default=True, -) - -# Credentials. The API key is env-only; the URIs accept env + ini. -_API_KEY = _Option( - name="api_key", - category=_CAT_CONNECTION, - help="Sift API key (secret, env-only).", - env="SIFT_API_KEY", -) -_GRPC_URI = _Option( - name="grpc_uri", - category=_CAT_CONNECTION, - help="Sift gRPC endpoint URI.", - env="SIFT_GRPC_URI", - ini="sift_grpc_uri", -) -_REST_URI = _Option( - name="rest_uri", - category=_CAT_CONNECTION, - help="Sift REST endpoint URI.", - env="SIFT_REST_URI", - ini="sift_rest_uri", -) -_APP_URL = _Option( - name="app_url", - category=_CAT_CONNECTION, - help="Sift web-app origin for the report link in the terminal footer (e.g. " - "https://app.siftstack.com). When unset, the link is derived from the REST URI " - "for known Sift hosts.", - env="SIFT_APP_URL", - ini="sift_app_url", -) - -# Report content. Project defaults in [tool.sift.pytest.report]; CI injects -# per-run values via SIFT_REPORT_* env vars (pytest-dotenv handles .env files -# for local dev). -_REPORT_NAME = _Option( - name="report_name", - category=_CAT_REPORT, - help="Template for the report display name. Placeholders: {target}, {command}, {args}, " - "{rootdir}, {timestamp}, {count}, {git_repo}, {git_branch}, {git_commit}.", - toml=("pytest", "report", "name"), -) -_TEST_CASE = _Option( - name="test_case", - category=_CAT_REPORT, - help="Template for the report's test_case field (same placeholders as report_name).", - toml=("pytest", "report", "test_case"), -) -_TEST_SYSTEM_NAME = _Option( - name="test_system_name", - category=_CAT_REPORT, - help="Name of the test system / rig. Defaults to the host's name.", - env="SIFT_REPORT_TEST_SYSTEM_NAME", - toml=("pytest", "report", "test_system_name"), -) -_SYSTEM_OPERATOR = _Option( - name="system_operator", - category=_CAT_REPORT, - help="Operator running the test. Defaults to the OS user.", - env="SIFT_REPORT_SYSTEM_OPERATOR", - toml=("pytest", "report", "system_operator"), -) -_SERIAL_NUMBER = _Option( - name="serial_number", - category=_CAT_REPORT, - help="Serial number of the unit under test.", - env="SIFT_REPORT_SERIAL_NUMBER", - toml=("pytest", "report", "serial_number"), -) -_PART_NUMBER = _Option( - name="part_number", - category=_CAT_REPORT, - help="Part number of the unit under test.", - env="SIFT_REPORT_PART_NUMBER", - toml=("pytest", "report", "part_number"), -) -_METADATA = _Option( - name="metadata", - category=_CAT_REPORT, - help="Free-form report metadata, as a TOML table of scalar values. For " - "dynamic per-run keys, attach them in conftest via the report_context fixture.", - toml=("pytest", "report", "metadata"), - merge=True, -) - -_OPTIONS: tuple[_Option, ...] = ( - _LOG_FILE, - _GIT_METADATA, - _OFFLINE, - _DISABLED, - _OPEN, - _AUTOUSE, - _PACKAGE_STEP, - _MODULE_STEP, - _CLASS_STEP, - _PARAMETRIZE_NESTING, - _API_KEY, - _GRPC_URI, - _REST_URI, - _APP_URL, - _REPORT_NAME, - _TEST_CASE, - _TEST_SYSTEM_NAME, - _SYSTEM_OPERATOR, - _SERIAL_NUMBER, - _PART_NUMBER, - _METADATA, -) - - -def pytest_addoption(parser: pytest.Parser) -> None: - """Register every CLI flag and pytest ini key declared in ``_OPTIONS``. - - One loop drives both surfaces — adding a setting is one entry in the - registry, not three edits across this function and a docs table. - """ - group = parser.getgroup("sift", description="Sift test results") - for opt in _OPTIONS: - if opt.cli is not None: - cli_kwargs: dict[str, Any] = { - "dest": opt.cli_dest, - "default": None, - "help": opt.help, - } - if opt.cli_action is not None: - cli_kwargs["action"] = opt.cli_action - group.addoption(opt.cli, **cli_kwargs) - if opt.ini is not None: - ini_kwargs: dict[str, Any] = {"help": opt.help, "default": opt.ini_default} - if opt.ini_type is not None: - ini_kwargs["type"] = opt.ini_type - parser.addini(opt.ini, **ini_kwargs) - - -def pytest_configure(config: pytest.Config) -> None: - """Register the Sift gate markers and warn on unknown ``SIFT_*`` settings.""" - config.addinivalue_line( - "markers", - "sift_include: force the Sift autouse fixtures to activate for this test " - "regardless of the `sift_autouse` ini default.", - ) - config.addinivalue_line( - "markers", - "sift_exclude: force the Sift autouse fixtures to skip this test " - "regardless of the `sift_autouse` ini default.", - ) - # Surface typos in env vars and [tool.sift...] keys at session start so a - # silent no-op (env var that doesn't match anything, table key the loader - # ignores) becomes visible. The registry is the source of truth for what's - # known. - _warn_on_unknown_env_vars() - _warn_on_unknown_toml_keys(config) - - -def _render_settings_reference() -> str: - """Render the Markdown settings reference from ``_OPTIONS``. - - One ``### `` subsection per category, each table showing only the - columns that category uses (so no dead all-``—`` columns). The plugin docs - at ``docs/guides/pytest_plugin/configuration.md`` embed this output verbatim - so the registry and the docs can't drift; - ``test_settings_reference_docs_in_sync`` is the guard rail. Regenerate with:: - - uv run python -c "from sift_client.pytest_plugin import _render_settings_reference; print(_render_settings_reference())" - """ - - def _cli_cell(opt: _Option) -> str: - return f"`{opt.cli}`" if opt.cli else "—" - - def _ini_cell(opt: _Option) -> str: - return f"`{opt.ini}`" if opt.ini else "—" - - def _toml_cell(opt: _Option) -> str: - if not opt.toml: - return "—" - if opt.merge: - return f"`[tool.sift.{'.'.join(opt.toml)}]` (table)" - section = ".".join(opt.toml[:-1]) - return f"`[tool.sift.{section}] {opt.toml[-1]}`" - - def _env_cell(opt: _Option) -> str: - if opt.env: - return f"`{opt.env}`" - return "—" - - # Per-category column layout: only the surfaces that category actually uses. - # Each column is (header, cell-renderer). - columns_by_category = { - _CAT_BEHAVIOR: [ - ("CLI flag", _cli_cell), - ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), - ], - _CAT_CONNECTION: [ - ("Ini (`[tool.pytest.ini_options]`)", _ini_cell), - ("Env var", _env_cell), - ], - _CAT_REPORT: [ - ("TOML (`[tool.sift...]`)", _toml_cell), - ("Env var", _env_cell), - ], - } - - def _escape(cell: str) -> str: - # Literal pipes inside a Markdown table cell need backslash escaping or - # they'd be parsed as column separators. - return cell.replace("|", "\\|") - - blocks: list[str] = [] - for category in _CATEGORIES: - opts = [o for o in _OPTIONS if o.category == category] - if not opts: - continue - columns = columns_by_category[category] - headers = ["Setting", *(h for h, _ in columns)] - lines = [ - f"### {category}", - "", - "| " + " | ".join(headers) + " |", - "|" + "|".join(["---"] * len(headers)) + "|", - ] - for opt in opts: - cells = [opt.help, *(render(opt) for _, render in columns)] - lines.append("| " + " | ".join(_escape(c) for c in cells) + " |") - blocks.append("\n".join(lines)) - return "\n\n".join(blocks) - - -def _warn_on_unknown_env_vars() -> None: - """Emit a warning for any ``SIFT_*`` env var not declared in the registry. - - The registry declares each env var by its full name (``opt.env``); a - ``SIFT_*`` var that matches none of them is almost always a typo. - """ - import difflib - - known_full = {opt.env for opt in _OPTIONS if opt.env} - suggestion_pool = sorted(known_full) - for name in sorted(os.environ): - if not name.startswith("SIFT_"): - continue - if name in known_full: - continue - close = difflib.get_close_matches(name, suggestion_pool, n=1, cutoff=0.6) - hint = f" (did you mean `{close[0]}`?)" if close else "" - warnings.warn( - f"Unknown SIFT_* env var `{name}`{hint}; ignored.", - SiftPytestPluginWarning, - stacklevel=2, - ) - - -def _warn_on_unknown_toml_keys(config: pytest.Config) -> None: - """Walk ``[tool.sift.pytest.*]`` in pyproject.toml and warn on keys outside the registry. - - Only the ``tool.sift.pytest`` subtree is checked. Other ``tool.sift.*`` - subtrees are reserved for non-pytest Sift tooling (e.g. ``tool.sift.extras`` - is consumed by this repo's extras-generation script) and aren't our - concern. Free-form subtrees (``merge=True`` options like ``metadata``) - stop the walk — their keys are user-defined and not validated. - """ - import difflib - - data = _tool_sift(config) - pytest_table = (data or {}).get("pytest") - if not isinstance(pytest_table, dict): - return - # Build leaf/free-form/prefix sets relative to the ``("pytest", ...)`` root - # the registry already uses, so the walk runs on the table we just sliced. - leaves = {opt.toml for opt in _OPTIONS if opt.toml and not opt.merge} - free_form = {opt.toml for opt in _OPTIONS if opt.toml and opt.merge} - prefixes: set[tuple[str, ...]] = set() - for full in leaves | free_form: - for i in range(len(full)): - prefixes.add(full[:i]) - - def _walk(node: Any, base: tuple[str, ...]) -> None: - if base in free_form or not isinstance(node, dict): - return - for key, value in node.items(): - path = (*base, str(key)) - if path in leaves or path in free_form: - continue - if path in prefixes: - _walk(value, path) - continue - full_name = "tool.sift." + ".".join(path) - same_depth = [ - ".".join(p) for p in (leaves | free_form | prefixes) if len(p) == len(path) - ] - close = difflib.get_close_matches(".".join(path), same_depth, n=1, cutoff=0.6) - hint = f" (did you mean `tool.sift.{close[0]}`?)" if close else "" - warnings.warn( - f"Unknown sift config key `{full_name}`{hint}; ignored.", - SiftPytestPluginWarning, - stacklevel=2, - ) - - _walk(pytest_table, ("pytest",)) - - -def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: - """Stash each item's class chain + parametrize path and cluster siblings. - - Sorts by ``(file_path, hierarchy_chain, parametrize_path)`` so sibling - items under a shared parent (package, module, class, or parametrize axis) - stay contiguous — otherwise a free function sorting between two class - methods would tear down + re-open the class step, producing duplicate - parents in the report tree. - """ - for item in items: - item.stash[_HIERARCHY_KEY] = _build_hierarchy_chain(item, config) - item.stash[_PARAMETRIZE_PATH_KEY] = _build_parametrize_path(item) - # Use ``.get(...)`` defensively: a third-party hook may inject items after - # our stashing loop runs, and we'd rather sort them at the tail than - # KeyError out of collection. - items.sort( - key=lambda i: ( - str(i.path), - tuple(identity for identity, _, _, _ in i.stash.get(_HIERARCHY_KEY, ())), - i.stash.get(_PARAMETRIZE_PATH_KEY, ()), - ) - ) - - -def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: - """Drain any parent steps still open at session end (innermost first). - - Wrapped so a failure in the inner drain does not prevent the outer one - from running. With ``module_substep`` removed, this is the sole place - where hierarchy parents close — they persist across all tests and only - drain when the session ends. - """ - try: - _drain_parametrize_stack() - finally: - _drain_hierarchy_stack() - - -def _is_offline(pytestconfig: pytest.Config | None) -> bool: - return bool(_OFFLINE.resolve(pytestconfig)) - - -def _is_disabled(pytestconfig: pytest.Config | None) -> bool: - return bool(_DISABLED.resolve(pytestconfig)) - - -def _sdk_version() -> str: - """Return the installed ``sift_stack_py`` version, or ``"unknown"``.""" - from importlib.metadata import PackageNotFoundError, version - - try: - return version("sift_stack_py") - except PackageNotFoundError: - return "unknown" - - -def _mode_label(config: pytest.Config) -> str: - """Resolve the active mode for the terminal header: disabled > offline > online.""" - if _is_disabled(config): - return "disabled" - if _is_offline(config): - return "offline" - return "online" - - -def pytest_report_header(config: pytest.Config) -> str | None: - """Emit a session-start header with the SDK version and active mode. - - Suppressed under ``-q`` (negative verbosity), matching how pytest hides its - own platform/plugin header. - """ - if config.get_verbosity() < 0: - return None - return f"Sift: sift-stack-py {_sdk_version()} — {_mode_label(config)} mode" +"""Sift pytest plugin: records each test as a step in a Sift test report. +Load it from a project's ``conftest.py``:: -def _resolve_real_report_id(context: Any) -> str | None: - """Resolve the real server-side report id for the online footer link. - - In synchronous online mode (``--sift-log-file=false``) the report is created - directly against the API, so ``report.id_`` is already the real id. In the - default incremental mode the report is created through the simulate path - (a client-side UUID) and the background worker maps it to the real id on - replay, recording it in the ``.tracking`` sidecar's ``id_map``. By the - time this footer runs the session-scoped report context has torn down and - the worker has drained, so the sidecar is final. - - Returns ``None`` when the worker never mapped the report (e.g. it died before - replaying the create), meaning no real report exists to link. - """ - report = context.report - if not report.id_: - # No id was ever assigned (unset/empty); nothing to link. - return None - sim_id = str(report.id_) - if not getattr(report, "is_simulated", False): - return sim_id - log_file = getattr(context, "log_file", None) - if log_file is None: - return None - from sift_client._internal.low_level_wrappers._test_results_log import LogTracking - - return LogTracking.load(log_file).id_map.get(sim_id) - - -_LABEL_WIDTH = 13 - - -def _sift_kv(terminalreporter: Any, label: str, value: str, **value_markup: bool) -> None: - """Write an indented ``label value`` row, bolding the label. - - ``value_markup`` (e.g. ``green=True``, ``cyan=True``) styles only the value. - Color is dropped automatically when the terminal has no markup (not a TTY or - ``--color=no``), so captured/CI output stays plain text. - """ - terminalreporter.write(" ") - terminalreporter.write(f"{label:<{_LABEL_WIDTH}}", bold=True) - terminalreporter.write_line(value, **value_markup) - - -# Step-count breakdown order and labels for the footer's "Steps" row. -_STEP_COUNT_ORDER: tuple[tuple[TestStatus, str], ...] = ( - (TestStatus.PASSED, "passed"), - (TestStatus.FAILED, "failed"), - (TestStatus.ERROR, "error"), - (TestStatus.ABORTED, "aborted"), - (TestStatus.SKIPPED, "skipped"), - (TestStatus.IN_PROGRESS, "in progress"), -) - - -# Per-status color for the footer's step breakdown: green pass, red -# failure/error/abort, yellow skip; in-progress (and anything else) stays plain. -_STEP_STATUS_MARKUP: dict[TestStatus, dict[str, bool]] = { - TestStatus.PASSED: {"green": True}, - TestStatus.FAILED: {"red": True}, - TestStatus.ERROR: {"red": True}, - TestStatus.ABORTED: {"red": True}, - TestStatus.SKIPPED: {"yellow": True}, -} - - -def _step_count_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: - """Build ``(text, markup)`` segments for a step tally, non-zero only.""" - return [ - (f"{counts.get(status, 0)} {label}", _STEP_STATUS_MARKUP.get(status, {})) - for status, label in _STEP_COUNT_ORDER - if counts.get(status, 0) - ] - - -def _measurement_segments(counts: Any) -> list[tuple[str, dict[str, bool]]]: - """Build ``(text, markup)`` segments for a measurement tally, non-zero only.""" - segments: list[tuple[str, dict[str, bool]]] = [] - if counts.get(True, 0): - segments.append((f"{counts[True]} passed", {"green": True})) - if counts.get(False, 0): - segments.append((f"{counts[False]} failed", {"red": True})) - return segments - - -def _write_count_row( - terminalreporter: Any, label: str, segments: list[tuple[str, dict[str, bool]]] -) -> None: - """Write a ``label a · b · c`` row, applying each segment's color markup.""" - terminalreporter.write(" ") - terminalreporter.write(f"{label:<{_LABEL_WIDTH}}", bold=True) - for index, (text, markup) in enumerate(segments): - if index: - terminalreporter.write(" · ") - terminalreporter.write(text, **markup) - terminalreporter.write_line("") - - -def _report_panel_title(report: Any, terminalreporter: Any) -> str: - """``Sift report · `` for the section rule, truncated to the terminal width. - - The report name embeds a timestamp (and, for invocation-based runs, the - pytest args), so a long name is truncated with an ellipsis to keep the - separator line from wrapping. - """ - base = "Sift report" - name = getattr(report, "name", None) - if not name: - return base - title = f"{base} · {name}" - fullwidth = getattr(getattr(terminalreporter, "_tw", None), "fullwidth", 80) - # Reserve room for the separator characters and spaces write_sep adds. - limit = max(len(base), fullwidth - 8) - if len(title) > limit: - title = title[: limit - 1] + "…" - return title - - -def _maybe_open_report(url: str) -> None: - """Best-effort open the report URL in a browser (for ``--sift-open-report``). - - Skipped on CI or non-interactive sessions so a committed ``sift_open_report`` - setting can't spawn a browser on a headless agent; the flag is meant for - local development. - """ - import sys - import webbrowser - - if os.environ.get("CI") or not sys.stdout.isatty(): - return - try: - # webbrowser.open forks/execs the platform opener while the gRPC client's - # background threads are live; redirect fd 2 across the fork to swallow - # gRPC's prefork notice (same treatment as the plugin's other fork sites). - with _quiet_fork_stderr(): - webbrowser.open(url) - except Exception: - # Headless / no browser available: opening is a convenience, never fatal. - pass - - -def pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: pytest.Config) -> None: - """Emit a session-end Sift report summary, adapting per mode. - - The printed panel is suppressed under ``-q``, but programmatic side effects - (stashing the report ref for ``conftest.py``, ``--sift-open-report``) still run so - other plugins and CI steps can consume the result. The panel shows the - outcome (green/red), step and measurement tallies, and a per-mode action: a - report link (online), the upload command (offline), or a disabled note. - """ - quiet = config.get_verbosity() < 0 - - if _is_disabled(config): - if not quiet: - terminalreporter.write_sep("=", "Sift", cyan=True, bold=True) - terminalreporter.write_line("Sift disabled — no test report created.") - return - - context = REPORT_CONTEXT - if context is None: - # No gated test ran, so no report context was created. Nothing to show. - return - - log_file = getattr(context, "log_file", None) - offline = _is_offline(config) - - # Resolve the report link first so stashing and --sift-open-report run even under - # -q (programmatic consumers don't care about verbosity). Truthiness, not - # ``is not None``: a resolved-but-empty id (degenerate sidecar mapping, unset - # proto field) must fall through to the "not uploaded" path, not produce a - # ``/test-results/`` link. - report_id = None if offline else _resolve_real_report_id(context) - report_url = ( - f"{context.client.app_url}/test-results/{report_id}" - if report_id and context.client.app_url - else None - ) - if report_id: - config.stash[SIFT_REPORT_ID_STASH_KEY] = report_id - if report_url is not None: - config.stash[SIFT_REPORT_URL_STASH_KEY] = report_url - if _OPEN.resolve(config): - _maybe_open_report(report_url) - - if quiet: - return - - failed = bool(getattr(context, "any_failures", False)) - status_word, status_markup = ( - ("FAILED", {"red": True, "bold": True}) - if failed - else ("PASSED", {"green": True, "bold": True}) - ) - # Offline results live only in the local log until replayed, so the status - # row calls that out instead of repeating the version (already in the header). - status_context = ( - f"{_mode_label(config)} · not uploaded" - if offline - else f"{_mode_label(config)} · sift-stack-py {_sdk_version()}" - ) - - report = context.report - - terminalreporter.write_sep( - "=", _report_panel_title(report, terminalreporter), cyan=True, bold=True - ) - - # Identity row: the test case (test path or pytest invocation). - if report.test_case: - _sift_kv(terminalreporter, "Test case", str(report.test_case)) - - # Status row: colored outcome, then compact mode context. - terminalreporter.write(" ") - terminalreporter.write(f"{'Status':<{_LABEL_WIDTH}}", bold=True) - terminalreporter.write(status_word, **status_markup) - terminalreporter.write_line(f" {status_context}") - - # Step + measurement tallies (green pass, red failure, yellow skip). - _write_count_row( - terminalreporter, - "Steps", - _step_count_segments(context.step_status_counts) or [("no steps", {})], - ) - measurement_segments = _measurement_segments(context.measurement_counts) - if measurement_segments: - _write_count_row(terminalreporter, "Measurements", measurement_segments) - - # Provenance row: test system and operator. - system = " · ".join(part for part in (report.test_system_name, report.system_operator) if part) - if system: - _sift_kv(terminalreporter, "System", system) - - # Local log file (write-through backup online, sole sink offline). - if log_file is not None: - _sift_kv(terminalreporter, "Log file", str(log_file)) - - if offline: - if log_file is not None: - terminalreporter.write_sep("-", "to upload to Sift") - terminalreporter.write_line(f" >> import-test-result-log {log_file}", cyan=True) - return - - if not report_id: - # Incremental upload never mapped the report (the worker died before - # replaying the create), so there's no real report to link. - _sift_kv( - terminalreporter, - "Report", - f"not uploaded — replay with: import-test-result-log {log_file}", - yellow=True, - ) - elif report_url is not None: - _sift_kv(terminalreporter, "Report", report_url, cyan=True) - else: - _sift_kv( - terminalreporter, - "Report", - f"id {report_id} (set sift_app_url for a clickable link)", - ) - - if report_id and getattr(context, "replay_incomplete", False) and log_file is not None: - _sift_kv( - terminalreporter, - "", - f"may be incomplete — finish with: import-test-result-log {log_file}", - yellow=True, - ) - - -def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool: - """Resolve the Sift gate for a node: sift_exclude > sift_include > default. - - `get_closest_marker` walks the node hierarchy upward, so markers applied - at any level (function, class, module, package, session) are honored. - """ - if node.get_closest_marker("sift_exclude"): - return False - if node.get_closest_marker("sift_include"): - return True - return default - - -def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | None: - """Determine log_file value from CLI flag or ini key. - - Three signal types arrive here: - - * ``None`` — unset; nothing was passed on the CLI and the ini key is - absent. Treat as the default "use a temp file." - * Python ``False`` — an explicit disable, typically set in a conftest via - ``config.option.sift_log_file = False``. Return ``None`` so - the rest of the pipeline knows to skip logging entirely. - * A string (from CLI or ini) — interpret ``"true"`` / ``"1"`` as the temp - file default, ``"false"`` / ``"none"`` as disable, anything else as a - file path. - - Rejects ``--sift-log-file=none`` combined with ``--sift-offline`` since - offline mode needs the log file as its sole sink. - """ - raw = _LOG_FILE.resolve(pytestconfig) - disabled = raw is False or (isinstance(raw, str) and raw.lower() in ("false", "none")) - if disabled and _is_offline(pytestconfig): - raise pytest.UsageError( - "--sift-log-file=none is incompatible with --sift-offline; offline " - "mode requires a log file. Pin one with --sift-log-file=, or " - "drop --sift-log-file=none to use a temp file." - ) - if raw is False: - return None - if not raw: - return True - lower = str(raw).lower() - if lower in ("true", "1"): - return True - if lower in ("false", "none"): - return None - return Path(raw) - - -def _error_info_from_longrepr(longrepr: Any) -> ErrorInfo: - """Fall back to the report's longrepr when no Python exception is available.""" - return ErrorInfo(error_code=1, error_message=str(longrepr) if longrepr is not None else "") - - -def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: - """Resolve the function step's status from pytest's per-phase reports. - - Reads ``_sift_phase_setup`` / ``_sift_phase_call`` and the test's xfail marker, - then mutates ``new_step.current_step`` in place and flips - ``new_step._sift_managed_externally`` so ``NewStep.__exit__`` emits the - resolved status without re-classifying. - - When the call phase reports ``passed`` and no override is needed (i.e. the - test's own status or substep failures should drive the result), this leaves - the step alone so the default ``__exit__`` resolution stays in charge. - """ - current_step = new_step.current_step - if current_step is None: - # The step never opened (the autouse fixture short-circuited or was - # disabled). Nothing to resolve. - return - setup_phase = getattr(item, "_sift_phase_setup", None) - call_phase = getattr(item, "_sift_phase_call", None) - xfail_marker = item.get_closest_marker("xfail") - xfail_runs = xfail_marker.kwargs.get("run", True) if xfail_marker is not None else True - - status: TestStatus | None = None - error_info: ErrorInfo | None = None - keep_managed = False - - if setup_phase is not None and setup_phase.report.outcome == "failed": - status = TestStatus.ERROR - excinfo = setup_phase.call.excinfo - if excinfo is not None: - error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) - else: - error_info = _error_info_from_longrepr(setup_phase.report.longrepr) - elif setup_phase is not None and setup_phase.report.outcome == "skipped": - status = TestStatus.SKIPPED - elif call_phase is None: - # Setup completed but the call-phase report never fired — the inner - # pytester session was aborted (e.g. by KeyboardInterrupt) before the - # plugin could observe the outcome. Leave the step at IN_PROGRESS so - # the report does not lie about a clean pass. - keep_managed = True - else: - wasxfail = getattr(call_phase.report, "wasxfail", None) - if wasxfail is not None: - if call_phase.report.outcome == "failed": - # Strict xpass: pytest synthesizes a failure when an xfail(strict=True) - # test unexpectedly passes. The xfail mark no longer matches reality. - status = TestStatus.FAILED - elif call_phase.report.outcome == "skipped": - if xfail_marker is not None and xfail_runs is False: - # xfail(run=False): the test body never executed. - status = TestStatus.SKIPPED - else: - # xfail + expected failure: the test fulfilled its xfail expectation. - status = TestStatus.PASSED - else: - # Non-strict xpass: passes that weren't required to fail. - status = TestStatus.PASSED - elif call_phase.report.outcome == "passed": - # Default __exit__ resolves PASSED/FAILED from open_step_results and any - # status the test code may have set. Don't override it here. - return - elif call_phase.report.outcome == "skipped": - status = TestStatus.SKIPPED - elif call_phase.report.outcome == "failed": - excinfo = call_phase.call.excinfo - children_passed = new_step.report_context.open_step_results.get( - current_step.step_path, True - ) - if excinfo is None: - status = TestStatus.FAILED - elif isinstance(excinfo.value, AssertionError): - status = TestStatus.FAILED - error_info = format_assertion_message(excinfo.type, excinfo.value) - elif isinstance(excinfo.value, pytest.fail.Exception): - status = TestStatus.FAILED - elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): - # Hard exits the plugin can observe: pytest converted the - # raise into a call-phase report. The session-aborting variant - # (call_phase is None) lands earlier and stays IN_PROGRESS. - status = TestStatus.ABORTED - error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) - elif xfail_marker is not None: - # xfail(raises=X) with a non-matching exception: the contract failed. - status = TestStatus.FAILED - error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) - elif not children_passed: - # A substep already recorded the error and carries the traceback; - # the test step only inherits the child-failed signal. - status = TestStatus.FAILED - else: - status = TestStatus.ERROR - error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) - - if status is None and not keep_managed: - return - - if status is not None: - # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. - current_step.__dict__["status"] = status - if error_info is not None: - current_step.__dict__["error_info"] = error_info - new_step._sift_managed_externally = True - - -def _finalize_after_teardown(item: pytest.Item, teardown_report: pytest.TestReport) -> None: - """Upgrade a closed step to FAILED when the teardown phase failed. - - The autouse step fixture has already exited by the time the teardown - makereport hook fires, so call ``step.update`` again to override the status - server-side and propagate the failure to the still-open parent step. - """ - step: NewStep | None = getattr(item, "_sift_step", None) - if step is None: - return - current_step = step.current_step - if current_step is None: - return - if teardown_report.outcome == "failed" and current_step.status == TestStatus.PASSED: - current_step.update({"status": TestStatus.FAILED}) - step.report_context.mark_step_failed_after_close(current_step) - - -@pytest.hookimpl(tryfirst=True, hookwrapper=True) -def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): - """Capture per-phase reports and finalize step status after teardown. - - Stashes both ``rep_`` (the ``CallInfo``, kept for pytest plugins that - expect that conventional attribute) and ``_sift_phase_`` (a - ``SimpleNamespace(call, report)`` used by ``_resolve_initial_status``). The - collection-time skip path is strictly gated on ``_sift_step`` being unset - so it does not duplicate steps the fixture already created. - """ - outcome = yield - report = outcome.get_result() - setattr(item, "rep_" + report.when, call) - setattr(item, "_sift_phase_" + report.when, SimpleNamespace(call=call, report=report)) + pytest_plugins = ["sift_client.pytest_plugin"] - # Collection-time skip (``@pytest.mark.skip`` / ``skipif``): the autouse - # ``step`` fixture never runs, so the hook is the only place that can - # record a step. Presence of ``_sift_step`` is the "fixture ran" signal. - if ( - REPORT_CONTEXT - and report.when == "setup" - and report.outcome == "skipped" - and getattr(item, "_sift_step", None) is None - ): - with REPORT_CONTEXT.new_step(name=item.name) as inline_step: - inline_step.current_step.update({"status": TestStatus.SKIPPED}) +This module holds only the plugin's public surface: the catchable warnings, +the session-state globals a conftest may read, the fixtures a project can +request or override, and pytest's hook entry points. The implementation +(settings registry, step stacks, report construction, terminal formatting) +lives under ``sift_client._internal.pytest_plugin``. +""" - if report.when == "teardown": - _finalize_after_teardown(item, report) +from __future__ import annotations +from types import SimpleNamespace +from typing import TYPE_CHECKING, Any, Generator -def _relativize(path: Path, rootpath: Path) -> str: - """Path relative to rootdir, or the basename when it sits outside the tree.""" - try: - rel = str(path.relative_to(rootpath)) - except ValueError: - return path.name - return "" if rel == "." else rel +import pytest +from sift_client import SiftClient, SiftConnectionConfig +from sift_client._internal.pytest_plugin.modes import ( + gate_enabled, + is_disabled, + is_offline, + mode_label, + sdk_version, +) +from sift_client._internal.pytest_plugin.options import ( + API_KEY_OPTION, + APP_URL_OPTION, + GRPC_URI_OPTION, + OPEN_OPTION, + REST_URI_OPTION, + register_options, + warn_on_unknown_env_vars, + warn_on_unknown_toml_keys, +) +from sift_client._internal.pytest_plugin.report import ( + OFFLINE_DEFAULTS, + build_disabled_client, + finalize_after_teardown, + report_context_impl, + resolve_report_link, + step_impl, +) +from sift_client._internal.pytest_plugin.steps import ( + build_hierarchy_chain, + build_parametrize_path, + drain_hierarchy_stack, + drain_parametrize_stack, + hierarchy_key, + parametrize_path_key, + reconcile_hierarchy, + reconcile_parametrize, +) +from sift_client._internal.pytest_plugin.terminal import ( + maybe_open_report, + write_disabled_summary, + write_report_summary, +) +from sift_client.errors import SiftWarning +from sift_client.sift_types.test_report import TestStatus + +if TYPE_CHECKING: + from sift_client.util.test_results import ReportContext + from sift_client.util.test_results.context_manager import NewStep -def _strip_param(nodeid: str) -> str: - """Drop the trailing ``[param]`` from a nodeid, keeping ``file::Class::func``. +__all__ = [ + "REPORT_CONTEXT", + "SIFT_REPORT_ID_STASH_KEY", + "SIFT_REPORT_URL_STASH_KEY", + "SiftPytestPluginWarning", + "SiftPytestStepDrainError", + "SiftPytestStepDrainWarning", + "client_has_connection", + "report_context", + "sift_client", + "step", +] - The parametrize id is a variation of the test, not its identity — leaving it - in would make a re-parametrization silently shift the grouping key. Splits on - the last ``::`` segment and cuts at its first ``[``; class/function names - never contain ``[``, so nested brackets in a param value can't confuse it. - """ - head, sep, leaf = nodeid.rpartition("::") - leaf = leaf.split("[", 1)[0] - return f"{head}{sep}{leaf}" + +# --------------------------------------------------------------------------- +# Public warnings. +# --------------------------------------------------------------------------- -def _derive_target(request: pytest.FixtureRequest, args: tuple[str, ...]) -> str: - """Describe what was run, from the collected items rather than the command line. +class SiftPytestPluginWarning(SiftWarning): + """Base warning for issues raised by the Sift pytest plugin.""" - Collection is the ground truth of selection — independent of flag order, - ``-k`` / ``-m`` filters, or which path form was typed. Every value is - anchored to the rootdir (project) name so the shape is uniform; granularity - narrows with the selection: - * a single test -> ``project/tests/test_motor.py::test_spin`` (param stripped) - * a single file -> ``project/tests/test_motor.py`` - * many files -> their common directory, ``project/tests/motor`` - * whole tree / nothing collected / paths outside rootdir -> ``project`` +class SiftPytestStepDrainWarning(SiftPytestPluginWarning): + """A step's ``__exit__`` raised while the plugin was draining its stack. - The report is session-level and individual tests are its steps, so the - file/directory grain is the natural unit of "what ran" for the report - itself. The verbatim invocation stays available via ``{command}`` and the - ``pytest_command`` metadata key. + Surfaced at module-teardown or session-end so the drain can continue and + pytest test outcomes stay unaffected; the underlying exception is included + in the message for debugging. """ - rootpath = request.config.rootpath - root = rootpath.name - - def _anchor(rel: str) -> str: - return f"{root}/{rel}" if rel else root - - items = list(getattr(request.session, "items", ()) or ()) - if not items: - return root - if len(items) == 1: - return _anchor(_strip_param(items[0].nodeid)) - paths = {p for p in (getattr(i, "path", None) for i in items) if p is not None} - if not paths: - return root - if len(paths) == 1: - return _anchor(_relativize(next(iter(paths)), rootpath)) - try: - common = Path(os.path.commonpath([str(p) for p in paths])) - except ValueError: - # e.g. paths on different drives (Windows); fall back to the project. - return root - return _anchor(_relativize(common, rootpath)) -def _build_template_fields( - target: str, - command: str, - args: tuple[str, ...], - request: pytest.FixtureRequest, -) -> dict[str, Any]: - """Build the placeholder mapping shared by the name and test_case templates.""" - items = getattr(request.session, "items", ()) or () - git = _git_metadata() or {} - return { - "target": target, - "command": command, - "args": " ".join(args), - "rootdir": request.config.rootpath.name, - "timestamp": datetime.now(timezone.utc).isoformat(), - "count": len(items), - "git_repo": git.get("git_repo", ""), - "git_branch": git.get("git_branch", ""), - "git_commit": git.get("git_commit", ""), - } +class SiftPytestStepDrainError(RuntimeError): + """Raised when mid-session drain fails, signaling a likely upstream invariant break.""" -def _format_template( - template: str, - fields: dict[str, Any], - *, - fallback: str, - option_label: str, -) -> str: - """Format ``template`` with ``fields``; on bad input, warn and return ``fallback``. +# --------------------------------------------------------------------------- +# Public session state and stash keys. +# --------------------------------------------------------------------------- - A bad template should never block test results from being recorded, so the - rendering errors collapse to a warning + fallback rather than aborting the - session. - """ - try: - return template.format(**fields) - except (KeyError, IndexError, ValueError) as exc: - warnings.warn( - f"Invalid {option_label} template {template!r} ({exc}); using fallback.", - SiftPytestPluginWarning, - stacklevel=2, - ) - return fallback +REPORT_CONTEXT: Any = None +# Set at session end with the resolved (real) report id/URL when online and +# uploaded. Read from a project's conftest in a later hook (e.g. +# ``pytest_unconfigure``) to post the link, write a file, etc. +SIFT_REPORT_ID_STASH_KEY = pytest.StashKey[str]() +SIFT_REPORT_URL_STASH_KEY = pytest.StashKey[str]() -def _report_context_impl( - sift_client: SiftClient, - request: pytest.FixtureRequest, - pytestconfig: pytest.Config | None = None, -) -> Generator[ReportContext, None, None]: - args = request.config.invocation_params.args - # ``target`` is "what ran", derived from the collected items (see - # _derive_target) — invocation-independent, unlike parsing the command - # line. Both the display name and test_case default to it; the verbatim - # command stays available via {command} and the pytest_command metadata. - target = _derive_target(request, args) - command = "pytest " + " ".join(args) if args else "pytest" - fields = _build_template_fields(target, command, args, request) - name_template = _REPORT_NAME.resolve(pytestconfig) or "{target} {timestamp}" - name = _format_template( - name_template, - fields, - fallback=f"{target} {fields['timestamp']}", - option_label="sift_report_name", - ) - test_case_template = _TEST_CASE.resolve(pytestconfig) - test_case = ( - _format_template( - test_case_template, - fields, - fallback=target, - option_label="sift_test_case", - ) - if test_case_template - else target - ) - # Metadata starts from the [tool.sift.pytest.report.metadata] TOML table, and - # the auto-recorded pytest_command layers in last so the user can't - # accidentally overwrite it. - report_metadata: dict[str, str | float | bool] = { - **_METADATA.resolve_merged(pytestconfig), - "pytest_command": command, - } - # Mode → ReportContext flags: - # online (default): log_file=, replay_log_file=True - # --sift-offline: log_file=, replay_log_file=False - # --sift-disabled: log_file=False, replay_log_file=False - disabled = sift_client._simulate - offline = False if disabled else _is_offline(pytestconfig) - log_file: str | Path | bool | None = False if disabled else _resolve_log_file(pytestconfig) - include_git_metadata = bool(_GIT_METADATA.resolve(pytestconfig)) - with ReportContext( - sift_client, - name=name, - test_case=test_case, - test_system_name=_TEST_SYSTEM_NAME.resolve(pytestconfig) or None, - system_operator=_SYSTEM_OPERATOR.resolve(pytestconfig) or None, - serial_number=_SERIAL_NUMBER.resolve(pytestconfig) or None, - part_number=_PART_NUMBER.resolve(pytestconfig) or None, - log_file=log_file, - include_git_metadata=include_git_metadata, - replay_log_file=not (disabled or offline), - metadata=report_metadata, - ) as context: - global REPORT_CONTEXT - REPORT_CONTEXT = context - try: - yield context - finally: - # Drain the hierarchy + parametrize stacks INSIDE the - # ReportContext's ``with`` block, so the final ``__exit__`` - # update calls for those parent steps are written to the log - # file BEFORE the import worker drains. Without this, the - # worker exits with a partial backlog and the parent steps - # are stuck IN_PROGRESS in the Sift report. - try: - _drain_parametrize_stack() - finally: - _drain_hierarchy_stack() - - -# Placeholder credentials used in --sift-offline mode when env/ini values -# are missing. Offline mode never makes network calls, so the values are -# only syntactically required by SiftConnectionConfig. -_OFFLINE_DEFAULTS = { - "SIFT_API_KEY": "offline", - "SIFT_GRPC_URI": "offline.invalid:0", - "SIFT_REST_URI": "http://offline.invalid", -} - - -def _build_disabled_client() -> SiftClient: - """Construct a SiftClient for ``--sift-disabled`` mode. - - Tagged with ``_simulate=True`` so test-results writes short-circuit through - the existing low-level simulate path without contacting Sift. The URLs are - syntactically valid but unreachable; nothing dials them. - """ - client = SiftClient( - connection_config=SiftConnectionConfig( - api_key="disabled", - grpc_url="disabled.invalid:0", - rest_url="http://disabled.invalid", - ) - ) - client._simulate = True - return client + +# --------------------------------------------------------------------------- +# Fixtures. +# --------------------------------------------------------------------------- @pytest.fixture(scope="session") @@ -1579,7 +128,7 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: (``SIFT_GRPC_URI``, ``SIFT_REST_URI``) additionally fall back to the ``sift_grpc_uri`` / ``sift_rest_uri`` ini keys, since they are stable per-org values that are safe to commit. ``SIFT_API_KEY`` is intentionally - env-only — use ``pytest-dotenv`` (already a project dependency) to load + env-only; use ``pytest-dotenv`` (already a project dependency) to load it from a ``.env`` file kept out of version control. Projects that need custom construction (TLS toggles, custom timeouts, @@ -1594,30 +143,30 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: mode the credential resolution is skipped entirely and placeholders are always used. """ - if _is_disabled(pytestconfig): - return _build_disabled_client() + if is_disabled(pytestconfig): + return build_disabled_client() resolved = { - "SIFT_API_KEY": _API_KEY.resolve(pytestconfig), - "SIFT_GRPC_URI": _GRPC_URI.resolve(pytestconfig), - "SIFT_REST_URI": _REST_URI.resolve(pytestconfig), + "SIFT_API_KEY": API_KEY_OPTION.resolve(pytestconfig), + "SIFT_GRPC_URI": GRPC_URI_OPTION.resolve(pytestconfig), + "SIFT_REST_URI": REST_URI_OPTION.resolve(pytestconfig), } missing = [env for env, value in resolved.items() if not value] - if missing and not _is_offline(pytestconfig): + if missing and not is_offline(pytestconfig): raise pytest.UsageError( "Sift credentials missing: " + ", ".join(missing) - + ". Set the environment variable(s) — pytest-dotenv loads them " - "from a `.env` file automatically — or set the URIs under " + + ". Set the environment variable(s) (pytest-dotenv loads them " + "from a `.env` file automatically), or set the URIs under " "`sift_grpc_uri` / `sift_rest_uri` in `[tool.pytest.ini_options]` " "in pyproject.toml, or override the sift_client fixture in your " "conftest.py, or pass --sift-offline / --sift-disabled to run " "without contacting Sift." ) for env in missing: - resolved[env] = _OFFLINE_DEFAULTS[env] + resolved[env] = OFFLINE_DEFAULTS[env] # Web-app origin for the report link: the SIFT_APP_URL env var wins, then the # sift_app_url ini key, else host-based derivation in SiftClient.app_url. - app_url = _APP_URL.resolve(pytestconfig) + app_url = APP_URL_OPTION.resolve(pytestconfig) return SiftClient( connection_config=SiftConnectionConfig( api_key=resolved["SIFT_API_KEY"] or "", @@ -1628,6 +177,41 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient: ) +@pytest.fixture(scope="session") +def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRequest) -> bool: + """Verify the ``SiftClient`` can reach Sift via ``/ping``. + + Consulted at session start by ``report_context`` in online mode. A failed + ping aborts the session via ``pytest.exit``. Override this fixture in your + conftest to use a + different reachability signal (e.g. a cached auth token) for environments + where pinging is the wrong check. Returns ``False`` in ``--sift-disabled`` + mode without constructing a client. + """ + if is_disabled(pytestconfig): + return False + sift_client = request.getfixturevalue("sift_client") + sift_client.ping.ping() + return True + + +def _set_report_context( + contexts: Generator[ReportContext, None, None], +) -> Generator[ReportContext, None, None]: + """Publish each yielded ReportContext to the module-level ``REPORT_CONTEXT``. + + ``report_context_impl`` stays pure: it builds and yields the context. + Ownership of the reassignable global lives here so the terminal-summary and + makereport hooks (which read ``REPORT_CONTEXT``) see it. The global is set + after the context opens and before tests run, then the impl's ``finally`` + still drains the step stacks before the context exits. + """ + global REPORT_CONTEXT + for context in contexts: + REPORT_CONTEXT = context + yield context + + @pytest.fixture(scope="session") def report_context( request: pytest.FixtureRequest, pytestconfig: pytest.Config @@ -1661,13 +245,13 @@ def report_context( The log-file destination is controlled by ``--sift-log-file``; defaults to a temp file when unset. """ - if _is_disabled(pytestconfig): - yield from _report_context_impl( - _build_disabled_client(), request, pytestconfig=pytestconfig + if is_disabled(pytestconfig): + yield from _set_report_context( + report_context_impl(build_disabled_client(), request, pytestconfig=pytestconfig) ) return sift_client = request.getfixturevalue("sift_client") - if not _is_offline(pytestconfig): + if not is_offline(pytestconfig): try: request.getfixturevalue("client_has_connection") except pytest.UsageError: @@ -1681,39 +265,32 @@ def report_context( "--sift-disabled to skip Sift entirely.", returncode=4, ) - yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig) - - -def _step_impl( - report_context: ReportContext, request: pytest.FixtureRequest -) -> Generator[NewStep, None, None]: - node = request.node - # Items get a parametrize path stashed in ``pytest_collection_modifyitems``; - # modules/other nodes fall back to their node name. The leaf frame - # (``path[-1]``) is the test-specific display name — parents are opened - # by ``_parametrize_parents``. When parametrize-nesting is disabled, fall - # back to the bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf - # remains uniquely identifiable. - if _PARAMETRIZE_NESTING.resolve(request.config): - path = node.stash.get(_PARAMETRIZE_PATH_KEY, ()) - name = path[-1] if path else str(node.name) - else: - name = str(node.name) - # ``node.obj`` may not exist (e.g., ``pytest.DoctestItem``) or may raise - # when accessed — fall back to no description in those cases rather than - # erroring out a perfectly valid test. ``getattr``'s default only - # suppresses ``AttributeError``; the try/except catches everything else - # (RuntimeError from a misbehaving ``__doc__`` descriptor, etc.). - try: - existing_docstring = getattr(getattr(node, "obj", None), "__doc__", None) or None - except Exception: - existing_docstring = None - with report_context.new_step( - name=name, description=existing_docstring, assertion_as_fail_not_error=False - ) as new_step: - node._sift_step = new_step - yield new_step - _resolve_initial_status(new_step, node) + yield from _set_report_context( + report_context_impl(sift_client, request, pytestconfig=pytestconfig) + ) + + +@pytest.fixture(autouse=True) +def step( + request: pytest.FixtureRequest, + pytestconfig: pytest.Config, + _parametrize_parents: None, +) -> Generator[NewStep | None, None, None]: + """Create an outer step for the function when the Sift gate is on. + + Resolves the gate via `gate_enabled`: the `sift_exclude` marker forces off, + `sift_include` forces on, otherwise the `sift_autouse` ini default applies. + When on, requests the session `report_context` lazily; the first gated test + in the session triggers its creation, subsequent gated tests reuse it. In + ``--sift-disabled`` mode the report context is backed by a + ``SiftClient(_simulate=True)`` placeholder, so every write returns a + synthesized response without contacting Sift. + """ + if not gate_enabled(request.node, pytestconfig): + yield None + return + rc = request.getfixturevalue("report_context") + yield from step_impl(rc, request) @pytest.fixture(autouse=True) @@ -1723,75 +300,13 @@ def _hierarchy_parents( ) -> None: """Open/close hierarchy parent steps (packages, modules, classes) for the current item. - Same diff-stack pattern as ``_parametrize_parents`` but operates on - ``_HIERARCHY_KEY``. The chain is built outer-to-inner from the item's - collection-tree ancestors; which node types are included is decided at - build time by ``sift_package_step`` / ``sift_module_step`` / - ``sift_class_step``. When the chain changes (pop or push), the parametrize - stack is drained first since parametrize parents nest INSIDE these. - - Gated off when the item is excluded (avoids eager ``report_context`` setup). + Gated off when the item is excluded (avoids eager ``report_context`` setup); + otherwise delegates to ``reconcile_hierarchy``, which diffs the item's + ancestor chain against the open stack and opens/closes parents to match. """ - default = bool(_AUTOUSE.resolve(pytestconfig)) - if not _sift_enabled_for(request.node, default): - return None - # Fall back to computing the chain on-demand for items that bypassed - # ``pytest_collection_modifyitems`` (e.g., dynamically inserted by another - # plugin's later hook). Defaulting to ``()`` would incorrectly drain the - # entire open hierarchy stack for those items. - desired = request.node.stash.get(_HIERARCHY_KEY, _STASH_MISSING) - if desired is _STASH_MISSING: - desired = _build_hierarchy_chain(request.node, pytestconfig) - common = 0 - # Compare on identity (nodeid) — same-named ancestors at different paths - # MUST stay distinct. - while ( - common < len(_HIERARCHY_STACK) - and common < len(desired) - and _HIERARCHY_STACK[common][0] == desired[common][0] - ): - common += 1 - # Any change to the hierarchy chain orphans parametrize parents from the - # previous test — drain them before mutating the hierarchy stack so - # ReportContext's top-of-stack invariant holds. Strict mode: a per-frame - # ``__exit__`` failure here signals a real upstream drift between the - # plugin stacks and ReportContext; raise it as a test error instead of a - # silenceable warning. - if common < len(_HIERARCHY_STACK) or common < len(desired): - _drain_parametrize_stack(swallow_errors=False) - # Symmetric per-frame guard for the hierarchy pop so one bad ``__exit__`` - # doesn't leave _HIERARCHY_STACK partially drained for every subsequent test. - while len(_HIERARCHY_STACK) > common: - _identity, name, ns = _HIERARCHY_STACK.pop() - _close_frame(name, ns) - if not desired[common:]: - return None - # Fetch ``report_context`` lazily — but only when there's at least one - # rendered frame to push. Pure diff-only frames (e.g. a Package frame when - # ``sift_package_step=false``) just update _HIERARCHY_STACK with ns=None. - rc = None - # Roll back any partial push so a mid-loop exception doesn't leave half - # the chain orphaned on the stack. Per-frame guard inside the rollback so - # a failing ``__exit__`` doesn't shadow the original exception or leak - # the remaining opened frames. - opened: list[tuple[str, str, Any]] = [] - try: - for identity, name, doc, rendered in desired[common:]: - if rendered: - if rc is None: - rc = request.getfixturevalue("report_context") - ns = rc.new_step(name=name, description=doc, assertion_as_fail_not_error=False) - ns.__enter__() - opened.append((identity, name, ns)) - else: - opened.append((identity, name, None)) - except BaseException: - while opened: - _identity, name, ns = opened.pop() - _close_frame(name, ns) - raise - _HIERARCHY_STACK.extend(opened) - return None + if not gate_enabled(request.node, pytestconfig): + return + reconcile_hierarchy(request, pytestconfig) @pytest.fixture(autouse=True) @@ -1802,97 +317,160 @@ def _parametrize_parents( ) -> None: """Open/close shared parametrize parent steps for the current item. - Diffs the item's desired parametrize path against the open stack: pops the - stale tail, then opens new parents (everything except the innermost frame — - the ``step`` fixture creates that as the leaf). Parents persist across - sibling items so a tree like ``test_x[a=1]`` / ``test_x[a=2]`` shares one - ``test_x`` container. - - Gated off when the current item is excluded so that excluded items don't - eagerly request ``report_context`` (which would defeat its lazy creation), - or when ``sift_parametrize_nesting=false``. Parents persist until the - diff against a subsequent test's chain pops them, or until - ``pytest_sessionfinish`` drains anything left at session end. + Ordered after ``_hierarchy_parents`` so parametrize parents nest inside the + hierarchy ones. Gated off when the item is excluded (so excluded items don't + eagerly request ``report_context``); otherwise delegates to + ``reconcile_parametrize``, which also no-ops when + ``sift_parametrize_nesting=false``. Parents persist until a later test's + chain pops them, or until ``pytest_sessionfinish`` drains the rest. """ - default = bool(_AUTOUSE.resolve(pytestconfig)) - if not _sift_enabled_for(request.node, default): - return None - if not _PARAMETRIZE_NESTING.resolve(pytestconfig): - return None - # Fall back to on-demand computation for dynamically-inserted items; - # see _hierarchy_parents for the same rationale. - desired = request.node.stash.get(_PARAMETRIZE_PATH_KEY, _STASH_MISSING) - if desired is _STASH_MISSING: - desired = _build_parametrize_path(request.node) - parents = desired[:-1] - common = 0 - while ( - common < len(_PARAMETRIZE_STACK) - and common < len(parents) - and _PARAMETRIZE_STACK[common][0] == parents[common] + if not gate_enabled(request.node, pytestconfig): + return + reconcile_parametrize(request, pytestconfig) + + +# --------------------------------------------------------------------------- +# Hooks (in lifecycle fire order). +# --------------------------------------------------------------------------- + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Register every CLI flag and pytest ini key declared in ``PLUGIN_OPTIONS``.""" + register_options(parser) + + +def pytest_configure(config: pytest.Config) -> None: + """Register the Sift gate markers and warn on unknown ``SIFT_*`` settings.""" + config.addinivalue_line( + "markers", + "sift_include: force the Sift autouse fixtures to activate for this test " + "regardless of the `sift_autouse` ini default.", + ) + config.addinivalue_line( + "markers", + "sift_exclude: force the Sift autouse fixtures to skip this test " + "regardless of the `sift_autouse` ini default.", + ) + # Surface typos in env vars and [tool.sift...] keys at session start so a + # silent no-op (env var that doesn't match anything, table key the loader + # ignores) becomes visible. The registry is the source of truth for what's + # known. + warn_on_unknown_env_vars() + warn_on_unknown_toml_keys(config) + + +def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: + """Stash each item's class chain + parametrize path and cluster siblings. + + Sorts by ``(file_path, hierarchy_chain, parametrize_path)`` so sibling + items under a shared parent (package, module, class, or parametrize axis) + stay contiguous; otherwise a free function sorting between two class + methods would tear down + re-open the class step, producing duplicate + parents in the report tree. + """ + for item in items: + item.stash[hierarchy_key] = build_hierarchy_chain(item, config) + item.stash[parametrize_path_key] = build_parametrize_path(item) + # Use ``.get(...)`` defensively: a third-party hook may inject items after + # our stashing loop runs, and we'd rather sort them at the tail than + # KeyError out of collection. + items.sort( + key=lambda i: ( + str(i.path), + tuple(identity for identity, _, _, _ in i.stash.get(hierarchy_key, ())), + i.stash.get(parametrize_path_key, ()), + ) + ) + + +@pytest.hookimpl(tryfirst=True, hookwrapper=True) +def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): + """Capture per-phase reports and finalize step status after teardown. + + Stashes both ``rep_`` (the ``CallInfo``, kept for pytest plugins that + expect that conventional attribute) and ``_sift_phase_`` (a + ``SimpleNamespace(call, report)`` used by ``resolve_initial_status``). The + collection-time skip path is strictly gated on ``_sift_step`` being unset + so it does not duplicate steps the fixture already created. + """ + outcome = yield + report = outcome.get_result() + setattr(item, "rep_" + report.when, call) + setattr(item, "_sift_phase_" + report.when, SimpleNamespace(call=call, report=report)) + + # Collection-time skip (``@pytest.mark.skip`` / ``skipif``): the autouse + # ``step`` fixture never runs, so the hook is the only place that can + # record a step. Presence of ``_sift_step`` is the "fixture ran" signal. + if ( + REPORT_CONTEXT + and report.when == "setup" + and report.outcome == "skipped" + and getattr(item, "_sift_step", None) is None ): - common += 1 - # Per-frame guard so one bad ``__exit__`` doesn't leave _PARAMETRIZE_STACK - # partially drained for every subsequent test. - while len(_PARAMETRIZE_STACK) > common: - name, ns = _PARAMETRIZE_STACK.pop() - _close_frame(name, ns) - if not parents[common:]: - return None - rc = request.getfixturevalue("report_context") - opened: list[tuple[str, Any]] = [] + with REPORT_CONTEXT.new_step(name=item.name) as inline_step: + inline_step.current_step.update({"status": TestStatus.SKIPPED}) + + if report.when == "teardown": + finalize_after_teardown(item, report) + + +def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: + """Drain any parent steps still open at session end (innermost first). + + Wrapped so a failure in the inner drain does not prevent the outer one + from running. With ``module_substep`` removed, this is the sole place + where hierarchy parents close; they persist across all tests and only + drain when the session ends. + """ try: - for display in parents[common:]: - ns = rc.new_step(name=display, assertion_as_fail_not_error=False) - ns.__enter__() - opened.append((display, ns)) - except BaseException: - while opened: - name, ns = opened.pop() - _close_frame(name, ns) - raise - _PARAMETRIZE_STACK.extend(opened) - return None + drain_parametrize_stack() + finally: + drain_hierarchy_stack() -@pytest.fixture(autouse=True) -def step( - request: pytest.FixtureRequest, - pytestconfig: pytest.Config, - _parametrize_parents: None, -) -> Generator[NewStep | None, None, None]: - """Create an outer step for the function when the Sift gate is on. +def pytest_report_header(config: pytest.Config) -> str | None: + """Emit a session-start header with the SDK version and active mode. - Resolves the gate via `_sift_enabled_for(request.node, ini_default)`: - `sift_exclude` marker forces off, `sift_include` forces on, otherwise the - `sift_autouse` ini default applies. When on, requests the - session `report_context` lazily — the first gated test in the session - triggers its creation, subsequent gated tests reuse it. In - ``--sift-disabled`` mode the report context is backed by a - ``SiftClient(_simulate=True)`` placeholder, so every write returns a - synthesized response without contacting Sift. + Suppressed under ``-q`` (negative verbosity), matching how pytest hides its + own platform/plugin header. """ - default = bool(_AUTOUSE.resolve(pytestconfig)) - if not _sift_enabled_for(request.node, default): - yield None - return - rc = request.getfixturevalue("report_context") - yield from _step_impl(rc, request) + if config.get_verbosity() < 0: + return None + return f"Sift: sift-stack-py {sdk_version()} — {mode_label(config)} mode" -@pytest.fixture(scope="session") -def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRequest) -> bool: - """Verify the ``SiftClient`` can reach Sift via ``/ping``. +def pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: pytest.Config) -> None: + """Emit a session-end Sift report summary, adapting per mode. - Consulted at session start by ``report_context`` in online mode. A failed - ping aborts the session via ``pytest.exit``. Override this fixture in your - conftest to use a - different reachability signal (e.g. a cached auth token) for environments - where pinging is the wrong check. Returns ``False`` in ``--sift-disabled`` - mode without constructing a client. + The printed panel is suppressed under ``-q``, but programmatic side effects + (stashing the report ref for ``conftest.py``, ``--sift-open-report``) still run so + other plugins and CI steps can consume the result. The panel itself is + rendered by ``write_report_summary``; this hook handles the side effects. """ - if _is_disabled(pytestconfig): - return False - sift_client = request.getfixturevalue("sift_client") - sift_client.ping.ping() - return True + quiet = config.get_verbosity() < 0 + + if is_disabled(config): + if not quiet: + write_disabled_summary(terminalreporter) + return + + context = REPORT_CONTEXT + if context is None: + # No gated test ran, so no report context was created. Nothing to show. + return + + offline = is_offline(config) + # Resolve the link first so stashing and --sift-open-report run even under -q; + # programmatic consumers don't care about verbosity. + report_id, report_url = resolve_report_link(context, offline) + if report_id: + config.stash[SIFT_REPORT_ID_STASH_KEY] = report_id + if report_url is not None: + config.stash[SIFT_REPORT_URL_STASH_KEY] = report_url + if OPEN_OPTION.resolve(config): + maybe_open_report(report_url) + + if quiet: + return + + write_report_summary(terminalreporter, context, config, report_id, report_url, offline) From 56f711df815adbe34f3cb3bdb10494e4f3b62bb8 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Mon, 8 Jun 2026 09:04:29 -0700 Subject: [PATCH 18/19] Python(feat): broaden step failure check to cover substeps (#619) --- python/CHANGELOG.md | 2 +- .../docs/examples/pytest_plugin_quickstart.md | 6 +- .../guides/pytest_plugin/configuration.md | 2 +- python/docs/guides/pytest_plugin/index.md | 4 +- .../guides/pytest_plugin/report_structure.md | 4 +- python/examples/pytest_plugin/README.md | 8 +-- .../tests/with_sift/test_with_sift_demo.py | 33 +++++++---- .../_tests/pytest_plugin/test_pass_fail.py | 25 ++++++-- python/lib/sift_client/pytest_plugin.py | 10 ++-- .../util/test_results/context_manager.py | 58 ++++++++++++------- 10 files changed, 97 insertions(+), 55 deletions(-) diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index 2ae4b6a88..01cd82631 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -13,7 +13,7 @@ Highlights: - **Hierarchical report tree.** Packages, modules, classes, and parametrize axes above a test each become a parent step, so the report mirrors your test layout. Arbitrary substeps can be opened inside a test. - **Three running modes.** Online (default) pings Sift at session start and streams create/update calls during the run; offline records to a JSONL log for later replay; disabled evaluates bounds locally without contacting Sift. Select with `--sift-offline` or `--sift-disabled`. - **Graceful connection handling.** Online mode aborts at session start if Sift is unreachable or credentials are invalid, so a misconfigured job fails fast. If the connection drops mid-run, tests keep running and the log keeps writing locally; remaining entries upload afterward via the import command the plugin prints on exit. -- **Pass/fail mapping.** Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard exit) maps to a `TestStatus` and propagates to parent steps and the report. `step.measure(...)` returns a pass/fail boolean without raising, so all measurements land in the report even when one fails; `step.fail_if_measurements_failed()` fails the test at the end without adding assertion noise to `error_info`. +- **Pass/fail mapping.** Every pytest outcome (pass, assertion failure, exception, skip, xfail, hard exit) maps to a `TestStatus` and propagates to parent steps and the report. `step.measure(...)` returns a pass/fail boolean without raising, so all measurements land in the report even when one fails; `step.pytest_fail_if_step_failed()` fails the test at the end if the step or any descendant failed (out-of-bounds measurements, failed substeps, `report_outcome` failures) without adding assertion noise to `error_info`. - **Assertion messages as error info.** Assertion failure messages are reported as the step's error info. - **Git metadata.** Repo, branch, and commit are captured on the report automatically. - **Terminal output.** The plugin prints a session header with the SDK version and active mode, and an end-of-run `Sift report` panel showing the test case, outcome, step and measurement breakdowns (color-coded), test system/operator, plus a link to the report (online), the saved log and upload command (offline), or a disabled note. Both suppress under `-q`. `SiftClient.app_url` exposes the web-app origin; set `sift_app_url` for on-prem or custom deployments. `--sift-open-report` opens the report in a browser at session end. diff --git a/python/docs/examples/pytest_plugin_quickstart.md b/python/docs/examples/pytest_plugin_quickstart.md index 30012f9b4..cf19c11fb 100644 --- a/python/docs/examples/pytest_plugin_quickstart.md +++ b/python/docs/examples/pytest_plugin_quickstart.md @@ -140,7 +140,7 @@ TestReport (FAILED, since failures propagate up from leaves) │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) ├── test_measure_series PASSED ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) - ├── test_fail_if_measurements_failed_at_end FAILED (pytest FAILED) + ├── test_pytest_fail_if_step_failed_at_end FAILED (pytest FAILED) ├── test_report_level_metadata PASSED └── TestClassStep ├── test_parametrize @@ -162,8 +162,8 @@ The `with_sift` module shows two patterns for handling measurement results: `test_failed_measurement_marks_sift_step_failed` lets the test keep passing in pytest while the Sift step is `FAILED` (useful when measurements are diagnostic data you want to collect regardless of outcome); and -`test_fail_if_measurements_failed_at_end` takes every measurement first and -then calls `step.fail_if_measurements_failed()` once at the end, so every +`test_pytest_fail_if_step_failed_at_end` takes every measurement first and +then calls `step.pytest_fail_if_step_failed()` once at the end, so every measurement still lands in the report even when one fails. The end-of-test call is the recommended pattern: it fails via `pytest.fail` (no assertion noise in `error_info`), and unlike asserting on an individual diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md index a05897cd4..47427055b 100644 --- a/python/docs/guides/pytest_plugin/configuration.md +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -117,7 +117,7 @@ def sift_client() -> SiftClient: | Name | Kind | Scope | Purpose | |---|---|---|---| | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | -| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `fail_if_measurements_failed`, and `current_step`. | +| `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `pytest_fail_if_step_failed`, and `current_step`. | | `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently; see [settings reference](#settings-reference). | | `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | diff --git a/python/docs/guides/pytest_plugin/index.md b/python/docs/guides/pytest_plugin/index.md index a649204a4..93879692c 100644 --- a/python/docs/guides/pytest_plugin/index.md +++ b/python/docs/guides/pytest_plugin/index.md @@ -45,7 +45,7 @@ def test_battery_voltage(step): bounds={"min": 4.8, "max": 5.2}, unit="V", ) - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() ``` Run it: @@ -59,7 +59,7 @@ A `TestReport` shows up in Sift once the session finishes. !!! tip "Fail at the end, not per measurement" `step.measure(...)` returns a pass/fail boolean and never raises, so a failing measurement marks the step failed without aborting the test. Take - every measurement first, then call `step.fail_if_measurements_failed()` once + every measurement first, then call `step.pytest_fail_if_step_failed()` once at the end, so every measurement still lands in the report even when one fails. It fails the test via `pytest.fail` (no assertion noise in `error_info`), and unlike asserting on an individual `step.measure(...)` call diff --git a/python/docs/guides/pytest_plugin/report_structure.md b/python/docs/guides/pytest_plugin/report_structure.md index 811fd7cf0..dd0d8ed54 100644 --- a/python/docs/guides/pytest_plugin/report_structure.md +++ b/python/docs/guides/pytest_plugin/report_structure.md @@ -29,7 +29,7 @@ def test_measure_a_single_value(step): ) # An out-of-bounds measurement already marks the step FAILED. Call this at # the end to also fail pytest, without an assertion message in error_info. - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() def test_measure_strings_and_booleans(step): @@ -354,7 +354,7 @@ def test_only_outliers_recorded(step): ) # Returns False because 99.9 is out of bounds. The step is already # marked failed; call this only if you also want pytest to fail. - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() ``` !!! note "`measure_all` requires at least one bound" diff --git a/python/examples/pytest_plugin/README.md b/python/examples/pytest_plugin/README.md index 0a94b7f97..fcc60fd5f 100644 --- a/python/examples/pytest_plugin/README.md +++ b/python/examples/pytest_plugin/README.md @@ -76,7 +76,7 @@ TestReport (FAILED, since failures propagate up from leaves) │ (test_excluded: @sift_exclude, runs in pytest, NOT in tree) ├── test_measure_series PASSED ├── test_failed_measurement_marks_sift_step_failed FAILED (pytest PASSED) - ├── test_fail_if_measurements_failed_at_end FAILED (pytest FAILED) + ├── test_pytest_fail_if_step_failed_at_end FAILED (pytest FAILED) ├── test_report_level_metadata PASSED └── TestClassStep ├── test_parametrize @@ -98,8 +98,8 @@ The `with_sift` module shows two patterns for handling measurement results: `test_failed_measurement_marks_sift_step_failed` lets the test keep passing in pytest while the Sift step is `FAILED` (useful when measurements are diagnostic data you want to collect regardless of outcome); and -`test_fail_if_measurements_failed_at_end` takes every measurement first and -then calls `step.fail_if_measurements_failed()` once at the end, so every +`test_pytest_fail_if_step_failed_at_end` takes every measurement first and +then calls `step.pytest_fail_if_step_failed()` once at the end, so every measurement still lands in the report even when one fails. The end-of-test call is the recommended pattern: it fails via `pytest.fail` (no assertion noise in `error_info`), and unlike asserting on an individual @@ -117,5 +117,5 @@ Toggle any of the `sift_*_step` / `sift_parametrize_nesting` flags in | `conftest.py` | Plugin registration via `pytest_plugins` (a single line) | | `pyproject.toml` | Pytest nesting/git-metadata knobs at their defaults; report `name`, `test_case`, and `metadata` under `[tool.sift.pytest.report]` | | `tests/pytest_only/test_pytest_only_demo.py` | Plain pytest tests with no Sift APIs. The plugin captures pass/fail automatically; covers functions, fixtures, parametrize, classes, plus one each of `AssertionError` (FAILED), `pytest.skip` (SKIPPED), and a raised `ValueError` (ERROR) | -| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `step.fail_if_measurements_failed()` end-of-test call that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | +| `tests/with_sift/test_with_sift_demo.py` | `step.measure` (numeric/string/bool bounds, units, description, metadata, `channel_names`), `step.measure_avg` and `step.measure_all` for series, an out-of-bounds measurement (pytest PASSED, Sift step FAILED), the recommended `step.pytest_fail_if_step_failed()` end-of-test call that fails pytest while still recording every measurement, nested `step.substep` (with step-level `metadata=...`), `@pytest.mark.sift_exclude`, class step + class docstring → description, nested classes, stacked `@pytest.mark.parametrize`, `step.report_outcome`, and session-level metadata via `report_context.report.update({...})` | | `tests/{pytest_only,with_sift}/__init__.py` | Each Python package (directory with `__init__.py`) becomes a parent step in the report tree | diff --git a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py index 7cbe8f8ce..c25c605c5 100644 --- a/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py +++ b/python/examples/pytest_plugin/tests/with_sift/test_with_sift_demo.py @@ -40,6 +40,11 @@ def test_substeps(step) -> None: Metadata can be attached at the step level by passing ``metadata=...`` to ``substep``; the same keyword is accepted by ``report_context.new_step`` and propagates to the resulting ``TestStep``. + + A failed substep marks this step FAILED in the report without raising, so + the end-of-test ``step.pytest_fail_if_step_failed()`` call is needed here + too: it folds substep failures (not just direct measurements) into the + pytest outcome. """ with step.substep(name="phase_1", metadata={"phase_index": 1}) as s1: s1.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) @@ -48,6 +53,9 @@ def test_substeps(step) -> None: with s2.substep(name="phase_2a") as s2a: s2a.measure(name="value", value=1.0, bounds={"min": 0.0, "max": 2.0}) + # Fails pytest if any substep above failed; no-op when they all passed. + step.pytest_fail_if_step_failed() + def test_measure_series(step) -> None: """``measure_avg`` and ``measure_all`` are the series variants of ``measure``. @@ -94,25 +102,28 @@ def test_failed_measurement_marks_sift_step_failed(step) -> None: ) -def test_fail_if_measurements_failed_at_end(step) -> None: - """Recommended pattern: take every measurement first, then call - ``step.fail_if_measurements_failed()`` once at the end. +def test_pytest_fail_if_step_failed_at_end(step) -> None: + """Recommended pattern: do every measurement and substep first, then call + ``step.pytest_fail_if_step_failed()`` once at the end. Asserting on individual ``step.measure(...)`` calls raises ``AssertionError`` on the first failure, so any measurements after the failing one never run and never land in the Sift report. The end-of-test - call is strictly better for diagnostic completeness: every measurement is - recorded, including the failures, and the aggregate result is then folded - into the pytest outcome. It fails via ``pytest.fail`` rather than an - assertion, so the failed step carries no assertion noise in ``error_info``. - - The ``b`` measurement below is deliberately out of bounds. ``c`` still - runs and is recorded; only the final call fails the test. + call is strictly better for diagnostic completeness: every measurement and + substep is recorded, including the failures, and the aggregate result is + then folded into the pytest outcome. It fails via ``pytest.fail`` rather + than an assertion, so the failed step carries no assertion noise in + ``error_info``. + + It fails on any failure the report would record: out-of-bounds + measurements, failed substeps, and ``report_outcome`` failures. The ``b`` + measurement below is deliberately out of bounds. ``c`` still runs and is + recorded; only the final call fails the test. """ step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0}) step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0}) # out of bounds step.measure(name="c", value=1.5, bounds={"min": 0.0, "max": 2.0}) # still recorded - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() def test_report_level_metadata(step, report_context) -> None: diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index d5f9674ce..52aa6f23c 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -137,8 +137,8 @@ def test_x(): assert capture.final_status("test_x") == TestStatus.FAILED -def test_fail_if_measurements_failed_fails_without_error_info(inner): - # An out-of-bounds measurement plus step.fail_if_measurements_failed() +def test_pytest_fail_if_step_failed_fails_without_error_info(inner): + # An out-of-bounds measurement plus step.pytest_fail_if_step_failed() # fails the test via pytest.fail, so the step is FAILED with no assertion # message in error_info (the reason this helper exists over `assert`). _run( @@ -146,20 +146,35 @@ def test_fail_if_measurements_failed_fails_without_error_info(inner): """ def test_x(step): step.measure(name="b", value=99.0, bounds={"min": 0.0, "max": 2.0}) - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() """, ) assert capture.final_status("test_x") == TestStatus.FAILED assert capture.final_error_message("test_x") is None -def test_fail_if_measurements_failed_passes_when_in_bounds(inner): +def test_pytest_fail_if_step_failed_fails_on_failed_substep(inner): + # A failed substep (here via report_outcome) leaves no out-of-bounds + # measurement on the step, but the report still marks the step FAILED. + # pytest_fail_if_step_failed must fail the test so the verdict matches. + _run( + inner, + """ + def test_x(step): + step.report_outcome("check", False, "deliberately failing") + step.pytest_fail_if_step_failed() + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_pytest_fail_if_step_failed_passes_when_in_bounds(inner): _run( inner, """ def test_x(step): step.measure(name="a", value=1.0, bounds={"min": 0.0, "max": 2.0}) - step.fail_if_measurements_failed() + step.pytest_fail_if_step_failed() """, ) assert capture.final_status("test_x") == TestStatus.PASSED diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 43f689894..a381d78cd 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -14,7 +14,7 @@ from __future__ import annotations from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Generator +from typing import Any, Generator import pytest @@ -61,15 +61,15 @@ ) from sift_client.errors import SiftWarning from sift_client.sift_types.test_report import TestStatus - -if TYPE_CHECKING: - from sift_client.util.test_results import ReportContext - from sift_client.util.test_results.context_manager import NewStep +from sift_client.util.test_results import ReportContext +from sift_client.util.test_results.context_manager import NewStep __all__ = [ "REPORT_CONTEXT", "SIFT_REPORT_ID_STASH_KEY", "SIFT_REPORT_URL_STASH_KEY", + "NewStep", + "ReportContext", "SiftPytestPluginWarning", "SiftPytestStepDrainError", "SiftPytestStepDrainWarning", diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 497404c45..5cd2c6729 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -521,10 +521,10 @@ def __init__( # Per-step measurement-failure count for ``measurements_passed``. # Tracks only direct ``measure*`` calls on this NewStep instance; # substep / ``report_outcome`` failures are intentionally not folded - # in here (see ``measurements_passed`` vs ``passed``). + # in here. ``pytest_fail_if_step_failed`` covers the broader case. self._failed_measurement_count = 0 # Out-of-bounds measurements recorded on this step, retained so - # ``fail_if_measurements_failed`` can name them in the failure message. + # ``pytest_fail_if_step_failed`` can name them in the failure message. self._failed_measurements: list[TestMeasurement] = [] def __enter__(self): @@ -539,34 +539,50 @@ def measurements_passed(self) -> bool: """True if every measurement recorded directly on this step has passed. Counts only ``step.measure``, ``step.measure_avg``, and - ``step.measure_all`` calls on this ``NewStep`` instance. Pair it with - ``fail_if_measurements_failed()`` at the end of a test to fail pytest on - any out-of-bounds measurement without short-circuiting on the first - failure (asserting on individual ``measure(...)`` return values skips - every measurement after the failing one). + ``step.measure_all`` calls on this ``NewStep`` instance; substep and + ``report_outcome`` failures are not folded in. For the end-of-test + failure that mirrors the report, use ``pytest_fail_if_step_failed()``, + which also covers failed substeps. """ return self._failed_measurement_count == 0 - def fail_if_measurements_failed(self, message: str = "measurements out of bounds") -> None: - """Fail the pytest test if any measurement on this step was out of bounds. + def pytest_fail_if_step_failed(self, message: str = "step failed") -> None: + """Fail the running pytest test if this step or any descendant failed. - Use instead of ``assert step.measurements_passed``: it fails via - ``pytest.fail`` so the step resolves to FAILED without attaching an - assertion message to ``error_info``. No-op when every measurement - passed. Call once at the end of the test so every measurement is still - recorded before the failure fires. + Covers every signal that resolves the step to FAILED in the report: + out-of-bounds measurements recorded directly on the step, failed + substeps, and ``report_outcome`` failures. Call it once at the end of a + test so the pytest verdict matches the report instead of passing green + while the report shows a failure. - The failure message names each out-of-bounds measurement with its - recorded value and bounds. ``message`` is used as the header line. + It fails via ``pytest.fail(pytrace=False)`` so the step resolves to + FAILED without an assertion traceback in ``error_info``. No-op when the + step and all of its descendants passed. Call after the work is done so + every measurement and substep is recorded before the failure fires. + + The failure message names each out-of-bounds measurement and each + failed substep. ``message`` is used as the header line. """ - if self.measurements_passed: + step = self.current_step + # ``open_step_results[step_path]`` is the same signal ``__exit__`` reads + # to resolve status: it is flipped False by a direct measurement failure + # (record_step_outcome) and by any failed child as it propagates upward + # (propagate_step_result). Default True covers a step that never opened. + if step is None or self.report_context.open_step_results.get(step.step_path, True): return import pytest - failed = self._failed_measurements - header = f"{message} ({len(failed)}):" if failed else message - body = [f" - {m}" for m in failed] - pytest.fail("\n".join([header, *body]), pytrace=False) + prefix = f"{step.step_path}." + failed_substeps = [ + s + for s in self.report_context.created_steps + if s.step_path.startswith(prefix) + and s.status not in (TestStatus.PASSED, TestStatus.SKIPPED, TestStatus.IN_PROGRESS) + ] + details = [f" - measurement {m}" for m in self._failed_measurements] + details += [f" - substep {s.step_path!r}: {s.status.name}" for s in failed_substeps] + header = f"{message} ({len(details)}):" if details else message + pytest.fail("\n".join([header, *details]), pytrace=False) def update_step_from_result( self, From 46ca0d409fd8f254569408ea19bfbd87c7c9808b Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Mon, 8 Jun 2026 09:52:20 -0700 Subject: [PATCH 19/19] =?UTF-8?q?Python(feat):=20order-independent=20pytes?= =?UTF-8?q?t=20report=20tree=20with=20early=20parent=20=E2=80=A6=20(#616)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../guides/pytest_plugin/configuration.md | 3 +- .../pytest_plugin/pass_fail_behavior.md | 34 +- .../guides/pytest_plugin/report_structure.md | 6 + .../_internal/pytest_plugin/report.py | 83 ++- .../_internal/pytest_plugin/steps.py | 597 ++++++++++------- .../pytest_plugin/_step_status_capture.py | 37 ++ .../pytest_plugin/step_status_states.md | 13 +- .../_tests/pytest_plugin/test_hierarchy.py | 605 ++++++++++++++++-- .../_tests/pytest_plugin/test_pass_fail.py | 18 +- python/lib/sift_client/pytest_plugin.py | 145 +++-- .../util/test_results/context_manager.py | 152 ++++- python/pyproject.toml | 7 + python/uv.lock | 22 + 13 files changed, 1278 insertions(+), 444 deletions(-) diff --git a/python/docs/guides/pytest_plugin/configuration.md b/python/docs/guides/pytest_plugin/configuration.md index 47427055b..a8e291006 100644 --- a/python/docs/guides/pytest_plugin/configuration.md +++ b/python/docs/guides/pytest_plugin/configuration.md @@ -118,8 +118,7 @@ def sift_client() -> SiftClient: |---|---|---|---| | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. | | `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, `pytest_fail_if_step_failed`, and `current_step`. | -| `_hierarchy_parents` | internal fixture (autouse) | function | Opens a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor of the current test. Each layer is gated independently; see [settings reference](#settings-reference). | -| `_parametrize_parents` | internal fixture (autouse) | function | Opens a parent step for each `@pytest.mark.parametrize` axis (and fixture parametrization), nested inside the hierarchy parents. | +| `_sift_parents` | internal fixture (autouse) | function | Resolves the report-tree parents for the current test: a parent step for each `pytest.Package`, `pytest.Module`, and `pytest.Class` ancestor, then one per `@pytest.mark.parametrize` axis (and fixture parametrization) nested inside them. Parents are created once and reused across tests in any order, so test execution order is never changed. Each layer is gated independently; see [settings reference](#settings-reference). | | `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. | ## Settings reference diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md index 2ce3d0697..d0862778c 100644 --- a/python/docs/guides/pytest_plugin/pass_fail_behavior.md +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -13,9 +13,9 @@ The statuses below come from `sift_client.sift_types.test_report.TestStatus`. | `PASSED` | The step completed and every check it owns succeeded. | | `FAILED` | An assertion, a `pytest.fail(...)`, a failed `report_outcome`, or a failing measurement marked it. | | `ERROR` | An unexpected exception escaped the test body or a fixture (setup or teardown). | -| `ABORTED` | A hard exit (`SystemExit`, observed `KeyboardInterrupt`) interrupted the test. | +| `ABORTED` | A hard exit (`SystemExit` or `KeyboardInterrupt`) cut the test off; resolved while pytest tears the session down. | | `SKIPPED` | The test was skipped at collection time, at runtime, or from a fixture. | -| `IN_PROGRESS` | Test in progress or the plugin never observed a final outcome (e.g. a session-aborting interrupt killed pytest first). | +| `IN_PROGRESS` | A transient creation state. It survives into the report only if the process is killed so abruptly that teardown never runs. | ## Normal test outcomes @@ -34,15 +34,27 @@ mapping to `FAILED`. A non-assertion exception gets its formatted traceback ## Hard exits -Hard exits the plugin can observe map to `ABORTED`. If pytest tears the -session down before the plugin sees the exit, the step stays at -`IN_PROGRESS` instead of resolving. - -| Scenario | Trigger | Outcome | -| ---------------------------------------------- | ------------------------- | -------------------------------------------------------------------- | -| `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | -| `KeyboardInterrupt` the plugin observes | `raise KeyboardInterrupt` | `ABORTED` | -| Session-aborting `KeyboardInterrupt` | Ctrl-C terminates pytest | `IN_PROGRESS` (session ends before the plugin's hooks fire) | +Hard exits map to `ABORTED`. The step is resolved during fixture teardown, not +at the instant of the exit: + +- When the exit produces a call-phase report (`sys.exit(1)`, `SystemExit`), the + plugin reads the status off that report. +- When a `KeyboardInterrupt` aborts the session before any call-phase report + (Ctrl-C, or `raise KeyboardInterrupt` in the body), pytest still runs fixture + finalizers as it unwinds. The plugin sees setup completed with no call outcome + and resolves the cut-off step to `ABORTED` there. + +The status only reaches the report if those finalizers run. If the process is +killed before they do (`SIGKILL`, the OOM killer, power loss), nothing is written +and the step keeps the `IN_PROGRESS` it was created with. That is the only path +that leaves a step `IN_PROGRESS` in a finalized report. + +| Scenario | Trigger | Outcome | +| ---------------------------------------------- | ---------------------------------- | ------------------------------------------------ | +| `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` (read from the call-phase report) | +| `KeyboardInterrupt` from the test body | `raise KeyboardInterrupt` | `ABORTED` (resolved during teardown) | +| Session-aborting `KeyboardInterrupt` | Ctrl-C terminates pytest | `ABORTED` (resolved during teardown) | +| Process killed before finalizers run | `SIGKILL` / OOM / power loss | `IN_PROGRESS` (nothing written after creation) | ### Abort propagation through nested substeps diff --git a/python/docs/guides/pytest_plugin/report_structure.md b/python/docs/guides/pytest_plugin/report_structure.md index dd0d8ed54..188bee4ca 100644 --- a/python/docs/guides/pytest_plugin/report_structure.md +++ b/python/docs/guides/pytest_plugin/report_structure.md @@ -98,6 +98,12 @@ individually opt-out via ini flags (`sift_package_step`, `sift_module_step`, `sift_class_step`, `sift_parametrize_nesting`). Class/module/package docstrings become the matching step's description. +A parent step is created `IN_PROGRESS` and resolves to its final status as soon +as the last test in its subtree finishes — independent of test execution order, +so with incremental upload the report tree fills in progressively rather than +all at once at the end. Its time window spans from its first test starting to its +last test finishing. + ### Linking a Run to the report `report_context` is the session-scoped fixture; mutating it in one test affects diff --git a/python/lib/sift_client/_internal/pytest_plugin/report.py b/python/lib/sift_client/_internal/pytest_plugin/report.py index 5ce0590f1..e125c3e03 100644 --- a/python/lib/sift_client/_internal/pytest_plugin/report.py +++ b/python/lib/sift_client/_internal/pytest_plugin/report.py @@ -32,9 +32,9 @@ TEST_SYSTEM_NAME_OPTION, ) from sift_client._internal.pytest_plugin.steps import ( - drain_hierarchy_stack, - drain_parametrize_stack, + finalize_parents, parametrize_path_key, + strip_param, ) from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext @@ -124,7 +124,6 @@ def resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: status: TestStatus | None = None error_info: ErrorInfo | None = None - keep_managed = False if setup_phase is not None and setup_phase.report.outcome == "failed": status = TestStatus.ERROR @@ -136,11 +135,13 @@ def resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: elif setup_phase is not None and setup_phase.report.outcome == "skipped": status = TestStatus.SKIPPED elif call_phase is None: - # Setup completed but the call-phase report never fired; the inner - # pytester session was aborted (e.g. by KeyboardInterrupt) before the - # plugin could observe the outcome. Leave the step at IN_PROGRESS so - # the report does not lie about a clean pass. - keep_managed = True + # Setup completed but the call-phase report never fired; the session was + # aborted (e.g. by KeyboardInterrupt) before the plugin could observe the + # outcome. Resolve to ABORTED rather than leaving it IN_PROGRESS, since the + # test was cut off and a finalized report should not carry a step that + # still reads as in-progress. No call ``excinfo`` exists here, so there is + # no traceback to attach. + status = TestStatus.ABORTED else: wasxfail = getattr(call_phase.report, "wasxfail", None) if wasxfail is not None: @@ -179,7 +180,7 @@ def resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): # Hard exits the plugin can observe: pytest converted the # raise into a call-phase report. The session-aborting variant - # (call_phase is None) lands earlier and stays IN_PROGRESS. + # (call_phase is None) lands in the branch above, also ABORTED. status = TestStatus.ABORTED error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) elif xfail_marker is not None: @@ -194,14 +195,13 @@ def resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: status = TestStatus.ERROR error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) - if status is None and not keep_managed: + if status is None: return - if status is not None: - # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. - current_step.__dict__["status"] = status - if error_info is not None: - current_step.__dict__["error_info"] = error_info + # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. + current_step.__dict__["status"] = status + if error_info is not None: + current_step.__dict__["error_info"] = error_info new_step._sift_managed_externally = True @@ -232,19 +232,6 @@ def _relativize(path: Path, rootpath: Path) -> str: return "" if rel == "." else rel -def _strip_param(nodeid: str) -> str: - """Drop the trailing ``[param]`` from a nodeid, keeping ``file::Class::func``. - - The parametrize id is a variation of the test, not its identity; leaving it - in would make a re-parametrization silently shift the grouping key. Splits on - the last ``::`` segment and cuts at its first ``[``; class/function names - never contain ``[``, so nested brackets in a param value can't confuse it. - """ - head, sep, leaf = nodeid.rpartition("::") - leaf = leaf.split("[", 1)[0] - return f"{head}{sep}{leaf}" - - def derive_target(request: pytest.FixtureRequest, args: tuple[str, ...]) -> str: """Describe what was run, from the collected items rather than the command line. @@ -273,7 +260,7 @@ def _anchor(rel: str) -> str: if not items: return root if len(items) == 1: - return _anchor(_strip_param(items[0].nodeid)) + return _anchor(strip_param(items[0].nodeid)) paths = {p for p in (getattr(i, "path", None) for i in items) if p is not None} if not paths: return root @@ -434,16 +421,13 @@ def report_context_impl( try: yield context finally: - # Drain the hierarchy + parametrize stacks INSIDE the - # ReportContext's ``with`` block, so the final ``__exit__`` - # update calls for those parent steps are written to the log - # file BEFORE the import worker drains. Without this, the - # worker exits with a partial backlog and the parent steps - # are stuck IN_PROGRESS in the Sift report. - try: - drain_parametrize_stack() - finally: - drain_hierarchy_stack() + # Close any report-tree parents still open INSIDE the ReportContext's + # ``with`` block, so their final ``__exit__`` update calls are written + # to the log file BEFORE the import worker drains. Without this, the + # worker exits with a partial backlog and the parent steps are stuck + # IN_PROGRESS in the Sift report. Most parents already closed early as + # their subtrees finished; this is the backstop for the rest. + finalize_parents() # Placeholder credentials used in --sift-offline mode when env/ini values @@ -478,12 +462,12 @@ def step_impl( report_context: ReportContext, request: pytest.FixtureRequest ) -> Generator[NewStep, None, None]: node = request.node - # Items get a parametrize path stashed in ``pytest_collection_modifyitems``; + # Items get a parametrize path stashed in ``pytest_itemcollected``; # modules/other nodes fall back to their node name. The leaf frame - # (``path[-1]``) is the test-specific display name; parents are opened - # by ``_parametrize_parents``. When parametrize-nesting is disabled, fall - # back to the bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf - # remains uniquely identifiable. + # (``path[-1]``) is the test-specific display name; parents are opened by + # ``_sift_parents``. When parametrize-nesting is disabled, fall back to the + # bracket-mangled pytest name (e.g. ``test_a[1]``) so the leaf remains + # uniquely identifiable. if PARAMETRIZE_NESTING_OPTION.resolve(request.config): path = node.stash.get(parametrize_path_key, ()) name = path[-1] if path else str(node.name) @@ -498,8 +482,17 @@ def step_impl( existing_docstring = getattr(getattr(node, "obj", None), "__doc__", None) or None except Exception: existing_docstring = None + # Attach the leaf under the parent ``_sift_parents`` resolved for this item + # (None -> a report-root step). ``push=True`` keeps the leaf on the step stack + # so any in-test ``substep`` nests under it. + parent_ns: NewStep | None = getattr(node, "_sift_parent", None) + parent_step = parent_ns.current_step if parent_ns is not None else None with report_context.new_step( - name=name, description=existing_docstring, assertion_as_fail_not_error=False + name=name, + description=existing_docstring, + assertion_as_fail_not_error=False, + parent=parent_step, + push=True, ) as new_step: node._sift_step = new_step yield new_step diff --git a/python/lib/sift_client/_internal/pytest_plugin/steps.py b/python/lib/sift_client/_internal/pytest_plugin/steps.py index 9904ceecb..26779cb73 100644 --- a/python/lib/sift_client/_internal/pytest_plugin/steps.py +++ b/python/lib/sift_client/_internal/pytest_plugin/steps.py @@ -1,19 +1,22 @@ -"""Parent-step stacks: the parametrize and hierarchy frames shared across items. - -Holds the collection-phase stash keys and the two module-level frame stacks -(``parametrize_stack`` / ``hierarchy_stack``), the helpers that build a chain -for an item and drain the stacks, and the per-item reconcilers the autouse -fixtures delegate to. Frames are shared across sibling test items and drained -innermost-first at session end. +"""Report-tree parent steps: an identity-keyed registry built without reordering. + +Each test's package/module/class ancestors ("hierarchy" parents) and each +``@pytest.mark.parametrize`` axis ("parametrize" parents) become parent steps the +leaf nests under. Parents are kept in identity-keyed registries — created once and +reused by every descendant regardless of execution order — so the plugin never +reorders test items. A parent is closed as soon as the last leaf in its subtree +finishes (``release_finished_leaf``), with ``finalize_parents`` as the session-end +backstop for anything still open. """ from __future__ import annotations import warnings -from typing import Any, Tuple +from typing import TYPE_CHECKING, Any, List, Optional, Tuple import pytest +from sift_client._internal.pytest_plugin.modes import gate_enabled from sift_client._internal.pytest_plugin.options import ( CLASS_STEP_OPTION, MODULE_STEP_OPTION, @@ -21,110 +24,75 @@ PARAMETRIZE_NESTING_OPTION, ) -STASH_MISSING = object() - -parametrize_path_key = pytest.StashKey[Tuple[str, ...]]() -# Each frame: (path_key, open step). Frames are shared across sibling test items -# and drained at session end. -parametrize_stack: list[tuple[str, Any]] = [] - -hierarchy_key = pytest.StashKey[Tuple[Tuple[str, str, "str | None", bool], ...]]() -# Outer-to-inner frames for the item's collection-tree ancestors. Each chain -# entry is ``(identity, name, doc, rendered)``: -# - ``identity``: a globally-unique key (``node.nodeid``) used for diff -# comparison. Two ancestors at the same depth with the same display name -# but reached via different paths (e.g., ``proj_a/utils`` and -# ``proj_b/utils`` in a monorepo) get distinct identities, so they never -# silently merge in the diff. -# - ``name``: the human-readable step name used when ``rendered`` opens the -# Sift step. -# - ``doc``: docstring used for the step description if rendered. -# - ``rendered``: True iff the corresponding ``sift_*_step`` ini flag is on. -# Non-rendered frames participate in the diff but do not call -# ``rc.new_step(...)``; they appear with ``ns=None`` in the stack. -# -# Stack entries: ``(identity, name, open_step_or_None)``. Frames are shared -# across sibling test items and drained at session end. Drained AFTER -# parametrize_stack since parametrize parents nest inside hierarchy parents. -hierarchy_stack: list[tuple[str, str, Any]] = [] - - -def drain_step_stack(stack: list, *, swallow_errors: bool = True) -> None: - """Pop and close every frame. - - With ``swallow_errors=True`` (default, used at teardown / session end), - per-frame failures are surfaced as ``SiftPytestStepDrainWarning`` so a - single misbehaving ``__exit__`` can't block the rest of the stack from - cleaning up or cascade out of pytest's finalizer chain. +if TYPE_CHECKING: + from typing import Callable - With ``swallow_errors=False`` (mid-session, when a class transition forces - parametrize parents to close), the stack is still fully drained but the - first per-frame exception is re-raised at the end as a - ``SiftPytestStepDrainError`` so a real upstream invariant violation - surfaces as a test error instead of a silenceable warning. - """ - from sift_client.pytest_plugin import SiftPytestStepDrainError, SiftPytestStepDrainWarning - - errors: list[tuple[str, BaseException]] = [] - while stack: - entry = stack.pop() - # Tolerate either ``(name, ns)`` (parametrize stack) or - # ``(identity, name, ns)`` (hierarchy stack) entries. - name, ns = entry[-2], entry[-1] - if ns is None: - # Non-rendered diff-only frame (e.g. a Package frame when - # ``sift_package_step=false``); nothing to close. - continue - try: - ns.__exit__(None, None, None) - except Exception as exc: - if swallow_errors: - warnings.warn( - f"Sift plugin: closing step {name!r} during drain raised " - f"{type(exc).__name__}: {exc}", - SiftPytestStepDrainWarning, - stacklevel=2, - ) - else: - errors.append((name, exc)) - if errors: - first_name, first_exc = errors[0] - raise SiftPytestStepDrainError( - f"Sift plugin: {len(errors)} step(s) raised while draining mid-session; " - f"first failure on {first_name!r}: {type(first_exc).__name__}: {first_exc}" - ) from first_exc - - -def drain_parametrize_stack(*, swallow_errors: bool = True) -> None: - drain_step_stack(parametrize_stack, swallow_errors=swallow_errors) - - -def drain_hierarchy_stack(*, swallow_errors: bool = True) -> None: - drain_step_stack(hierarchy_stack, swallow_errors=swallow_errors) - - -def close_frame(name: str, ns: Any) -> None: - """Close a single frame, warning on per-frame failure. - - Used by the mid-session hierarchy-stack pop and the rollback paths so a - misbehaving ``__exit__`` neither shadows the original exception nor leaks - sibling frames. ``ns=None`` indicates a non-rendered diff-only frame; skip. - """ - from sift_client.pytest_plugin import SiftPytestStepDrainWarning + from sift_client.util.test_results import ReportContext + from sift_client.util.test_results.context_manager import NewStep - if ns is None: - return - try: - ns.__exit__(None, None, None) - except Exception as exc: - warnings.warn( - f"Sift plugin: closing step {name!r} raised {type(exc).__name__}: {exc}", - SiftPytestStepDrainWarning, - stacklevel=2, - ) - - -def build_parametrize_path(item: pytest.Item) -> tuple[str, ...]: +# --- Report-tree type aliases --------------------------------------------- +# The plugin juggles a few small tuple/dict shapes for the parent step tree; +# naming them keeps the signatures below readable. Defined with ``typing`` +# generics (not ``list``/``tuple``) because some are used in runtime +# ``StashKey[...]`` subscriptions, which must stay importable on Python 3.8. +# +# A hierarchy parent's identity is just a ``str`` (the ancestor node's +# ``nodeid``); a parametrize parent's identity is a ``ParametrizeKey``: the +# test's param-stripped node id followed by its outer-to-inner axis frames +# (e.g. ``("pkg/test_m.py::TestC::test_a", "v=1")``). +ParametrizeKey = Tuple[str, ...] +# Outer-to-inner display-name axis path stashed per parametrized item +# (``(originalname, "v=1", ...)``); the leaf is its last frame. +ParametrizePath = Tuple[str, ...] +# One collection-tree ancestor: ``(identity, display name, docstring, rendered)``. +# ``rendered`` is True iff that layer's ``sift_*_step`` ini flag opens a step. +HierarchyFrame = Tuple[str, str, Optional[str], bool] +# Outer-to-inner ancestor frames stashed per item. +HierarchyChain = Tuple[HierarchyFrame, ...] +# A rendered parent to open, as returned by ``resolved_parents``. +HierarchyParent = Tuple[str, str, Optional[str]] # (identity, name, docstring) +ParametrizeParent = Tuple[ParametrizeKey, str] # (registry key, frame name) +# A gated-in leaf's parents: its rendered hierarchy identities and parametrize keys. +LeafParents = Tuple[List[str], List[ParametrizeKey]] + +parametrize_path_key = pytest.StashKey[ParametrizePath]() + +hierarchy_key = pytest.StashKey[HierarchyChain]() +# See ``HierarchyFrame`` above for the chain entry shape. ``identity`` is the +# node's ``nodeid``: two ancestors at the same depth with the same display name +# but reached via different paths (e.g., ``proj_a/utils`` and ``proj_b/utils`` in +# a monorepo) get distinct identities, so they never silently merge. Non-rendered +# frames open no step; the next rendered descendant attaches to the nearest +# rendered ancestor instead. + +# Open report-tree parent steps, keyed by identity so they are created once and +# reused by every descendant regardless of test execution order. The leaf step +# for each test is created under its resolved parent (see ``report.step_impl``), +# so no global ordering of test items is required. Parents live OUTSIDE +# ``ReportContext.step_stack`` (created with ``push=False``) and are closed early +# by ``release_finished_leaf``, or at session end by ``finalize_parents``. +# +# Hierarchy parents (packages / modules / classes) keyed by the ancestor node's +# ``nodeid``: +hierarchy_parents: dict[str, NewStep] = {} +# Parametrize parents keyed by ``ParametrizeKey``, so sibling parametrizations of +# one test share a parent while parametrizations under different +# tests/classes/modules never collide: +parametrize_parents: dict[ParametrizeKey, NewStep] = {} + +# Remaining descendant leaves per open-able parent, keyed exactly like the +# registries above. Populated from the collected (and selected) items in +# ``tally_expected_parents`` and decremented as each test finishes; when a count +# reaches zero the parent's whole subtree is done and it is closed early (see +# ``release_finished_leaf``) instead of waiting for session end. +expected_hierarchy: dict[str, int] = {} +expected_parametrize: dict[ParametrizeKey, int] = {} +# Each gated-in leaf's parent identities, so ``release_finished_leaf`` — which +# only receives a nodeid — knows which counters to decrement. +leaf_parents: dict[str, LeafParents] = {} + + +def build_parametrize_path(item: pytest.Item) -> ParametrizePath: """Outer-to-inner step display names for a parametrized item. Pytest stores ``callspec.params`` with the BOTTOM decorator's axis first; @@ -143,23 +111,21 @@ def build_parametrize_path(item: pytest.Item) -> tuple[str, ...]: def build_hierarchy_chain( item: pytest.Item | pytest.Collector, config: pytest.Config, -) -> tuple[tuple[str, str, str | None, bool], ...]: +) -> HierarchyChain: """Outer-to-inner ``(identity, name, docstring, rendered)`` for collection ancestors. Walks ``item.parent`` upward and ALWAYS collects every ``pytest.Package``, - ``pytest.Module``, and ``pytest.Class`` ancestor; they all participate in - the diff that keeps the report tree coherent across tests, so two - same-named ancestors reached via different paths (e.g., ``proj_a/utils`` - and ``proj_b/utils`` in a monorepo where the ``proj_*`` dirs are - ``pytest.Dir`` nodes the walker skips) cannot silently merge. + ``pytest.Module``, and ``pytest.Class`` ancestor; they all carry the identity + that keeps the report tree coherent across tests, so two same-named ancestors + reached via different paths (e.g., ``proj_a/utils`` and ``proj_b/utils`` in a + monorepo where the ``proj_*`` dirs are ``pytest.Dir`` nodes the walker skips) + cannot silently merge. - The ``identity`` field is ``node.nodeid``, globally unique per collected - node. The diff compares on identity, not the display ``name``. + The ``identity`` field is ``node.nodeid``, globally unique per collected node. The ``rendered`` flag is True iff the layer's ini flag is on (``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``). - Non-rendered frames participate in the diff for identity but don't open a - Sift step. + Non-rendered frames carry identity but don't open a Sift step. The ``node.obj`` access is a pytest property that imports the underlying Python object and can raise *any* exception (ImportError, custom @@ -171,7 +137,7 @@ def build_hierarchy_chain( include_module = bool(MODULE_STEP_OPTION.resolve(config)) include_class = bool(CLASS_STEP_OPTION.resolve(config)) - chain: list[tuple[str, str, str | None, bool]] = [] + chain: list[HierarchyFrame] = [] # ``node.parent`` is typed as the internal ``_pytest.nodes.Node`` which # isn't part of pytest's public API; widen to ``Any`` for the walk. node: Any = item @@ -196,115 +162,292 @@ def build_hierarchy_chain( return tuple(reversed(chain)) -def reconcile_hierarchy(request: pytest.FixtureRequest, config: pytest.Config) -> None: - """Open/close hierarchy parents so the open stack matches the item's chain. +def resolved_parents( + node: pytest.Item, + config: pytest.Config, +) -> tuple[list[HierarchyParent], list[ParametrizeParent]]: + """The rendered report-tree parents for ``node`` — the single source of truth. + + Shared by ``get_or_create_parent_chain`` (which opens these parents) and the + early-close counters in ``tally_expected_parents`` (which count them), so the + two can never key on different identities. Returns ``(hierarchy, parametrize)`` + outer-to-inner: + + * hierarchy: ``(identity, name, doc)`` for each rendered package/module/class + ancestor. ``identity`` is the node's ``nodeid`` (the registry key). + * parametrize: ``(registry key, frame name)`` for each parametrize axis except + the innermost (the leaf is the ``step`` fixture's job). Empty when + ``sift_parametrize_nesting`` is off or the item isn't parametrized. + + Reads the per-item stash written in ``pytest_itemcollected``; recomputes for + items a later hook injected without going through it. + """ + if hierarchy_key in node.stash: + chain = node.stash[hierarchy_key] + else: + chain = build_hierarchy_chain(node, config) + # Non-rendered frames open no step; the next rendered descendant attaches to + # the nearest rendered ancestor, so they are simply dropped here. + hierarchy = [(identity, name, doc) for identity, name, doc, rendered in chain if rendered] + + parametrize: list[ParametrizeParent] = [] + if PARAMETRIZE_NESTING_OPTION.resolve(config): + if parametrize_path_key in node.stash: + path = node.stash[parametrize_path_key] + else: + path = build_parametrize_path(node) + if path: + # Key parametrize parents by the test's param-stripped identity plus + # the outer frame prefix, so sibling params share a parent but params + # under different tests never merge. + key: ParametrizeKey = (strip_param(node.nodeid),) + for frame in path[:-1]: + key = (*key, frame) + parametrize.append((key, frame)) + return hierarchy, parametrize + + +def strip_param(nodeid: str) -> str: + """Drop the trailing ``[param]`` from a nodeid, keeping ``file::Class::func``. + + The parametrize id is a variation of the test, not its identity — leaving it + in would make a re-parametrization silently shift the grouping key. Splits on + the last ``::`` segment and cuts at its first ``[``; class/function names + never contain ``[``, so nested brackets in a param value can't confuse it. + """ + head, sep, leaf = nodeid.rpartition("::") + leaf = leaf.split("[", 1)[0] + return f"{head}{sep}{leaf}" + + +def get_or_create_parent_chain( + node: pytest.Item, + config: pytest.Config, + request: pytest.FixtureRequest, +) -> NewStep | None: + """Resolve the innermost report-tree parent for ``node``, creating any missing ancestors. + + Walks the item's rendered hierarchy ancestors (outer-to-inner) and then its + parametrize axes (see ``resolved_parents``), get-or-creating one parent step + per identity in the registries. Each new parent is opened under the running + parent (``push=False``, so it stays off ``ReportContext.step_stack``) and + reused by every later descendant — no contiguity of sibling items is required, + so test execution order is irrelevant. + + Returns the innermost parent the leaf should attach to, or ``None`` when no + rendered parent applies (the leaf becomes a report-root step). ``report_context`` + is fetched lazily, only when a parent actually needs creating, so excluded + items never trigger eager context setup. + """ + rc_cache: list[ReportContext] = [] + + def rc() -> ReportContext: + if not rc_cache: + rc_cache.append(request.getfixturevalue("report_context")) + return rc_cache[0] + + return _resolve_parent_chain(node, config, rc) + + +def resolve_parent_chain_in_context( + node: pytest.Item, + config: pytest.Config, + context: ReportContext, +) -> NewStep | None: + """``get_or_create_parent_chain`` for callers holding a ``ReportContext`` directly. + + The collection-skip path runs from ``pytest_runtest_makereport`` (the autouse + fixtures never ran for a marker-skipped item), so it has no ``FixtureRequest`` + to resolve ``report_context`` from, only the session ``ReportContext``. It + must still nest the skipped item's step under the same registry parents a + running sibling uses, so it shares the create-once logic here. + """ + return _resolve_parent_chain(node, config, lambda: context) + + +def _resolve_parent_chain( + node: pytest.Item, + config: pytest.Config, + rc: Callable[[], ReportContext], +) -> NewStep | None: + """Shared body of the two parent-chain resolvers; ``rc`` supplies the context. + + ``rc`` is called only when a parent actually needs creating, so a caller that + passes a lazy getter keeps the "no eager context setup" guarantee. + """ + hierarchy, parametrize = resolved_parents(node, config) + parent_step: Any = None # TestStep of the running innermost parent, or None (root). + innermost: NewStep | None = None + + for identity, name, doc in hierarchy: + ns = hierarchy_parents.get(identity) + if ns is None: + ns = rc().new_step( + name=name, + description=doc, + assertion_as_fail_not_error=False, + parent=parent_step, + push=False, + ) + ns.__enter__() + hierarchy_parents[identity] = ns + parent_step = ns.current_step + innermost = ns + + for key, frame in parametrize: + ns = parametrize_parents.get(key) + if ns is None: + ns = rc().new_step( + name=frame, + assertion_as_fail_not_error=False, + parent=parent_step, + push=False, + ) + ns.__enter__() + parametrize_parents[key] = ns + parent_step = ns.current_step + innermost = ns + + return innermost + - Diffs the item's desired ``(package, module, class)`` chain against - ``hierarchy_stack`` on identity (nodeid), pops the stale tail, and pushes - new rendered frames. Which node types render is decided at build time by - ``sift_package_step`` / ``sift_module_step`` / ``sift_class_step``; when the - chain changes, the parametrize stack is drained first since parametrize - parents nest INSIDE these. +def close_parent(ns: NewStep) -> None: + """Close one open report-tree parent, stamping its last-descendant finish time. + + Shared by mid-session early close (``release_finished_leaf``) and the + session-end drain (``finalize_parents``). The ``end_time`` override comes from + ``ReportContext.parent_end_times`` so the parent's window ends at its latest + descendant rather than wall-clock at close. A misbehaving ``__exit__`` is + surfaced as a warning so it never blocks the remaining parents or cascades out + of pytest's finalizer chain. """ - # Fall back to computing the chain on-demand for items that bypassed - # ``pytest_collection_modifyitems`` (e.g., dynamically inserted by another - # plugin's later hook). Defaulting to ``()`` would incorrectly drain the - # entire open hierarchy stack for those items. - desired = request.node.stash.get(hierarchy_key, STASH_MISSING) - if desired is STASH_MISSING: - desired = build_hierarchy_chain(request.node, config) - common = 0 - # Compare on identity (nodeid); same-named ancestors at different paths - # MUST stay distinct. - while ( - common < len(hierarchy_stack) - and common < len(desired) - and hierarchy_stack[common][0] == desired[common][0] - ): - common += 1 - # Any change to the hierarchy chain orphans parametrize parents from the - # previous test. Drain them before mutating the hierarchy stack so - # ReportContext's top-of-stack invariant holds. Strict mode: a per-frame - # ``__exit__`` failure here signals a real upstream drift between the - # plugin stacks and ReportContext; raise it as a test error instead of a - # silenceable warning. - if common < len(hierarchy_stack) or common < len(desired): - drain_parametrize_stack(swallow_errors=False) - # Symmetric per-frame guard for the hierarchy pop so one bad ``__exit__`` - # doesn't leave hierarchy_stack partially drained for every subsequent test. - while len(hierarchy_stack) > common: - _identity, name, ns = hierarchy_stack.pop() - close_frame(name, ns) - if not desired[common:]: + from sift_client.pytest_plugin import REPORT_CONTEXT, SiftPytestStepDrainWarning + + step = ns.current_step + if step is None: return - # Fetch ``report_context`` lazily, but only when there's at least one - # rendered frame to push. Pure diff-only frames (e.g. a Package frame when - # ``sift_package_step=false``) just update hierarchy_stack with ns=None. - rc = None - # Roll back any partial push so a mid-loop exception doesn't leave half - # the chain orphaned on the stack. Per-frame guard inside the rollback so - # a failing ``__exit__`` doesn't shadow the original exception or leak - # the remaining opened frames. - opened: list[tuple[str, str, Any]] = [] + if REPORT_CONTEXT is not None: + ns._sift_end_time_override = REPORT_CONTEXT.parent_end_times.get(step.step_path) try: - for identity, name, doc, rendered in desired[common:]: - if rendered: - if rc is None: - rc = request.getfixturevalue("report_context") - ns = rc.new_step(name=name, description=doc, assertion_as_fail_not_error=False) - ns.__enter__() - opened.append((identity, name, ns)) - else: - opened.append((identity, name, None)) - except BaseException: - while opened: - _identity, name, ns = opened.pop() - close_frame(name, ns) - raise - hierarchy_stack.extend(opened) - - -def reconcile_parametrize(request: pytest.FixtureRequest, config: pytest.Config) -> None: - """Open/close shared parametrize parents so the open stack matches the item. - - Diffs the item's desired parametrize path against ``parametrize_stack``: - pops the stale tail, then opens new parents (everything except the innermost - frame, which the ``step`` fixture creates as the leaf). Parents persist - across sibling items so a tree like ``test_x[a=1]`` / ``test_x[a=2]`` shares - one ``test_x`` container. No-op when ``sift_parametrize_nesting=false``. + ns.__exit__(None, None, None) + except Exception as exc: + warnings.warn( + f"Sift plugin: closing parent step {step.name!r} raised {type(exc).__name__}: {exc}", + SiftPytestStepDrainWarning, + stacklevel=2, + ) + + +def close_parents_innermost_first(parents: list[NewStep]) -> None: + """Close the given open parents deepest-``step_path`` first. + + Innermost-first means a child parent's ``propagate_step_result`` (status) and + ``note_close`` (finish time) reach its parent's bookkeeping before that parent + resolves — so a failing/late subtree rolls up correctly whether parents close + mid-session or at session end. """ - if not PARAMETRIZE_NESTING_OPTION.resolve(config): - return - # Fall back to on-demand computation for dynamically-inserted items; - # see reconcile_hierarchy for the same rationale. - desired = request.node.stash.get(parametrize_path_key, STASH_MISSING) - if desired is STASH_MISSING: - desired = build_parametrize_path(request.node) - parents = desired[:-1] - common = 0 - while ( - common < len(parametrize_stack) - and common < len(parents) - and parametrize_stack[common][0] == parents[common] - ): - common += 1 - # Per-frame guard so one bad ``__exit__`` doesn't leave parametrize_stack - # partially drained for every subsequent test. - while len(parametrize_stack) > common: - name, ns = parametrize_stack.pop() - close_frame(name, ns) - if not parents[common:]: + parents.sort( + key=lambda ns: ns.current_step.step_path.count(".") if ns.current_step else -1, + reverse=True, + ) + for ns in parents: + close_parent(ns) + + +def finalize_parents() -> None: + """Close every still-open report-tree parent at session end, innermost-first. + + The backstop for anything ``release_finished_leaf`` did not already close + early (e.g. a parent whose subtree never fully ran because the session was + aborted). Idempotent: the registries and counters are cleared up front, so the + second drain site (``pytest_sessionfinish`` after ``report_context_impl``) is + a no-op. + """ + parents = [*parametrize_parents.values(), *hierarchy_parents.values()] + parametrize_parents.clear() + hierarchy_parents.clear() + expected_hierarchy.clear() + expected_parametrize.clear() + leaf_parents.clear() + close_parents_innermost_first(parents) + + +def tally_expected_parents(session: pytest.Session) -> None: + """Count each open-able parent's descendant leaves, for mid-session early close. + + Runs after all ``modifyitems`` and deselection (``pytest_collection_finish``), + so ``session.items`` is the final, selected set. Only gated-in items are + counted — that keeps ``sift_exclude``-d siblings (and an entirely gated-off + session, e.g. the dev suite's own outer run) out of the tallies, so a + partially-excluded class still closes when its included tests finish. The maps + are rebuilt every session because pytester runs inner sessions in-process, + sharing this module state. + """ + expected_hierarchy.clear() + expected_parametrize.clear() + leaf_parents.clear() + for item in session.items: + if not gate_enabled(item, session.config): + continue + hierarchy, parametrize = resolved_parents(item, session.config) + h_ids = [identity for identity, _, _ in hierarchy] + p_keys = [key for key, _ in parametrize] + if not h_ids and not p_keys: + continue # leaf is a report-root step; no parent to close + leaf_parents[item.nodeid] = (h_ids, p_keys) + for identity in h_ids: + expected_hierarchy[identity] = expected_hierarchy.get(identity, 0) + 1 + for key in p_keys: + expected_parametrize[key] = expected_parametrize.get(key, 0) + 1 + + +def _decrement_parent_counts( + keys: list[Any], + expected: dict[Any, int], + registry: dict[Any, NewStep], + ready: list[NewStep], +) -> None: + """Decrement each key's remaining-descendant count by one. + + When a count reaches zero the parent's subtree is complete: drop it from both + the count map and the registry and queue its still-open step (if any) onto + ``ready`` for closing. The hierarchy and parametrize branches of + ``release_finished_leaf`` differ only in which (count, registry) pair they + pass here. + """ + for key in keys: + remaining = expected.get(key) + if remaining is None: + continue + if remaining <= 1: + expected.pop(key, None) + closing = registry.pop(key, None) + if closing is not None: + ready.append(closing) + else: + expected[key] = remaining - 1 + + +def release_finished_leaf(nodeid: str) -> None: + """Decrement the finished item's parents; close any whose subtree is now done. + + Called from ``pytest_runtest_logfinish``, which fires once per item for every + outcome (pass / fail / skip / error). When a parent's remaining-leaf count + reaches zero its whole subtree has finished, so it is closed now rather than + at session end — giving incremental uploads a progressively-resolving report + under any execution order. Closes innermost-first so a child parent rolls its + result and finish time up before its own parent resolves; several levels can + complete on the same leaf (e.g. the last param variant closes its parametrize + parent, class, and module at once). Items not in ``leaf_parents`` (gated-off, + or injected after collection) are ignored; anything left open is handled by + ``finalize_parents``. + """ + entry = leaf_parents.pop(nodeid, None) + if entry is None: return - rc = request.getfixturevalue("report_context") - opened: list[tuple[str, Any]] = [] - try: - for display in parents[common:]: - ns = rc.new_step(name=display, assertion_as_fail_not_error=False) - ns.__enter__() - opened.append((display, ns)) - except BaseException: - while opened: - name, ns = opened.pop() - close_frame(name, ns) - raise - parametrize_stack.extend(opened) + h_ids, p_keys = entry + ready: list[NewStep] = [] + _decrement_parent_counts(h_ids, expected_hierarchy, hierarchy_parents, ready) + _decrement_parent_counts(p_keys, expected_parametrize, parametrize_parents, ready) + if ready: + close_parents_innermost_first(ready) diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py index 77e09bdf5..74c498fd1 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -28,6 +28,10 @@ class CapturedStep: parent_step_id: str | None statuses: list[TestStatus] = field(default_factory=list) error_messages: list[str] = field(default_factory=list) + # ``startTime`` from the create entry; ``endTime`` is the latest seen across + # create/update entries. Both are RFC3339 strings. + start_time: str | None = None + end_time: str | None = None _PROTO_STATUS_NAMES = { @@ -68,6 +72,8 @@ def parse_log(log_path: Path) -> dict[str, CapturedStep]: parent_step_id=test_step.get("parentStepId") or None, statuses=[_status(test_step.get("status"))], error_messages=[error_message] if error_message else [], + start_time=test_step.get("startTime"), + end_time=test_step.get("endTime"), ) elif request_type == "UpdateTestStep": step_id = test_step.get("testStepId") @@ -76,6 +82,8 @@ def parse_log(log_path: Path) -> dict[str, CapturedStep]: steps[step_id].statuses.append(_status(new_status)) if error_message: steps[step_id].error_messages.append(error_message) + if test_step.get("endTime") is not None: + steps[step_id].end_time = test_step.get("endTime") return steps @@ -127,6 +135,32 @@ def final_error_message(name: str) -> str | None: return step.error_messages[-1] if step and step.error_messages else None +def log_events(log_path: Path) -> list[tuple[str, str, TestStatus]]: + """Ordered ``(request_type, step_name, status)`` tuples as they appear in the log. + + Unlike ``load_steps`` (which collapses each step to its final state), this + preserves write order, so tests can assert *when* a step resolved relative to + other entries — e.g. that a container's terminal ``UpdateTestStep`` precedes a + later sibling's ``CreateTestStep`` (proof it closed mid-session, not at the + end). ``UpdateTestStep`` entries carry only an id, so the name is resolved + from the preceding ``CreateTestStep``. + """ + if not log_path.exists(): + return [] + id_to_name: dict[str, str] = {} + events: list[tuple[str, str, TestStatus]] = [] + for request_type, response_id, json_str in iter_log_data_lines(log_path): + test_step = json.loads(json_str).get("testStep", {}) + status = _status(test_step.get("status")) + if request_type == "CreateTestStep" and response_id: + name = test_step.get("name", "") + id_to_name[response_id] = name + events.append((request_type, name, status)) + elif request_type == "UpdateTestStep": + events.append((request_type, id_to_name.get(test_step.get("testStepId"), ""), status)) + return events + + def load_steps(log_path: Path) -> list[dict]: """Load the offline log as a list of step records keyed by hierarchy fields. @@ -144,6 +178,9 @@ def load_steps(log_path: Path) -> list[dict]: "name": s.name, "parent_step_id": s.parent_step_id, "step_path": s.step_path, + "statuses": s.statuses, + "start_time": s.start_time, + "end_time": s.end_time, } for s in parse_log(log_path).values() ] diff --git a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md index 7e366a512..cbd748c53 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md +++ b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md @@ -6,10 +6,13 @@ Reference for the pass/fail scenarios covered by [`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md). `TestStatus` values come from `sift_client.sift_types.test_report.TestStatus`: -`PASSED`, `FAILED`, `ERROR`, `SKIPPED`, `ABORTED`, `IN_PROGRESS`. Hard process -exits the plugin can observe (`SystemExit`, `KeyboardInterrupt` when pytest -delivers a call-phase report) map to `ABORTED`. A session-aborting interrupt -that fires before the plugin sees it leaves the step in `IN_PROGRESS`. +`PASSED`, `FAILED`, `ERROR`, `SKIPPED`, `ABORTED`, `IN_PROGRESS`. Hard exits map +to `ABORTED`, resolved during fixture teardown: from the call-phase report when +there is one (`SystemExit`), or, when a `KeyboardInterrupt` aborts the session +before that report, from setup having completed with no call outcome. The status +reaches the report only because pytest runs finalizers as it unwinds; a step +keeps the `IN_PROGRESS` it was created with only if the process is killed before +those finalizers run. ## Case ID scheme @@ -36,7 +39,7 @@ be traced back to its row here without rereading the scenario: | `CALL-03` | Generic exception in call phase | `raise ValueError("boom")` | `ERROR` | | `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | | `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | -| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` — session aborts before the plugin sees the interrupt; `ABORTED` if the plugin does see it | +| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `ABORTED` — the session aborts before a call-phase report, but fixture teardown still runs, so the cut-off step resolves to `ABORTED` rather than staying `IN_PROGRESS` | | `CALL-07` | Substep raises non-Assertion exception | `with step.substep(...): raise ValueError("boom")` | Substep `ERROR`, test step `FAILED` (child-failed signal outranks the propagating exception) | ## Skip paths diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index 39ee0ccf6..18b03c194 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -12,17 +12,36 @@ from __future__ import annotations +from datetime import datetime, timezone from textwrap import dedent +from types import SimpleNamespace from typing import TYPE_CHECKING import pytest from sift_client._tests.pytest_plugin import _step_status_capture as capture +from sift_client.sift_types.test_report import TestStatus if TYPE_CHECKING: from pathlib import Path +def _parse_ts(ts: str) -> datetime: + """Parse a protobuf-JSON RFC3339 timestamp across Python 3.8-3.14. + + ``datetime.fromisoformat`` only accepts ``Z`` / arbitrary fractional digits + on 3.11+, so parse the second-precision base with ``strptime`` and apply the + fractional part by hand (protobuf emits 0/3/6/9 digits). + """ + body = ts.rstrip("Z").split("+", 1)[0] + base, _, frac = body.partition(".") + # All Sift timestamps are UTC; tag it so comparisons stay unambiguous. + parsed = datetime.strptime(base, "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc) + if frac: + parsed = parsed.replace(microsecond=int(frac.ljust(6, "0")[:6])) + return parsed + + _INNER_CONFTEST = 'pytest_plugins = ["sift_client.pytest_plugin"]\n' @@ -86,6 +105,42 @@ def test_b(self): assert by_name["test_b"][0]["parent_step_id"] == class_id +def test_collection_skipped_method_nests_under_its_class( + pytester: pytest.Pytester, log_file: Path +) -> None: + """A collection-time skipped method nests under its class parent. + + ``@pytest.mark.skip`` is evaluated before the autouse fixtures run, so the + skipped item's step comes from the makereport hook rather than the ``step`` + fixture. The report-tree parents live off the step stack, so that inline step + must still resolve and attach to the class parent rather than the report root. + Order is pinned so the non-skipped sibling opens the class first. + """ + pytester.makepyfile( + test_skip_nest=dedent( + """ + import pytest + + class TestFoo: + def test_run(self): + pass + + @pytest.mark.skip(reason="x") + def test_skipped(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v", "-p", "no:randomly") + result.assert_outcomes(passed=1, skipped=1) + by_name = _by_name(capture.load_steps(log_file)) + assert len(by_name["TestFoo"]) == 1 + class_id = by_name["TestFoo"][0]["id"] + assert by_name["test_run"][0]["parent_step_id"] == class_id + assert by_name["test_skipped"][0]["parent_step_id"] == class_id + assert by_name["test_skipped"][0]["statuses"][-1] == TestStatus.SKIPPED + + def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_nested=dedent( @@ -264,7 +319,7 @@ def test_a(self): assert _ancestor_names(steps, leaf)[:3] == ["test_a", "TestFoo", "test_doc.py"] -def test_transition_between_class_chains_drains_parametrize( +def test_two_class_chains_keep_parametrize_isolated( pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( @@ -309,62 +364,67 @@ def test_y(self, w): # --------------------------------------------------------------------------- -def test_drain_step_stack_continues_past_failing_exit() -> None: - """Lenient mode: a misbehaving ``__exit__`` must not block the rest of the stack.""" - from sift_client._internal.pytest_plugin.steps import drain_step_stack - from sift_client.pytest_plugin import SiftPytestStepDrainWarning +class _FakeParent: + """Minimal stand-in for an open ``NewStep`` parent in the plugin registries.""" - class _Good: - def __init__(self) -> None: - self.closed = False + def __init__(self, name: str, step_path: str, *, raises: str | None = None) -> None: + self.current_step = SimpleNamespace(name=name, step_path=step_path) + self._raises = raises + self.closed = False - def __exit__(self, *_: object) -> None: - self.closed = True + def __exit__(self, *_: object) -> None: + if self._raises is not None: + raise RuntimeError(self._raises) + self.closed = True - class _Bad: - def __exit__(self, *_: object) -> None: - raise RuntimeError("boom") - g1, g2, bad = _Good(), _Good(), _Bad() - stack: list[tuple[str, object]] = [("g1", g1), ("bad", bad), ("g2", g2)] - with pytest.warns(SiftPytestStepDrainWarning, match="boom"): - drain_step_stack(stack) - assert stack == [] - assert g1.closed - assert g2.closed - - -def test_drain_step_stack_strict_drains_fully_then_raises() -> None: - """Strict mode: drain every frame, then raise with the FIRST failure chained.""" - from sift_client._internal.pytest_plugin.steps import drain_step_stack - from sift_client.pytest_plugin import SiftPytestStepDrainError +@pytest.fixture +def clean_parent_registries(): + """Save/restore the module-level parent registries and REPORT_CONTEXT. - class _Good: - def __init__(self) -> None: - self.closed = False + The ``finalize_parents`` resilience test pokes the globals directly, so + isolate them from any real session state. Registries and ``finalize_parents`` + live in ``_internal.pytest_plugin.steps``; ``REPORT_CONTEXT`` is the public + session global on ``sift_client.pytest_plugin``. + """ + from sift_client import pytest_plugin + from sift_client._internal.pytest_plugin import steps - def __exit__(self, *_: object) -> None: - self.closed = True + saved = ( + dict(steps.hierarchy_parents), + dict(steps.parametrize_parents), + pytest_plugin.REPORT_CONTEXT, + ) + steps.hierarchy_parents.clear() + steps.parametrize_parents.clear() + pytest_plugin.REPORT_CONTEXT = None # skip the end_time override lookup + try: + yield steps + finally: + steps.hierarchy_parents.clear() + steps.hierarchy_parents.update(saved[0]) + steps.parametrize_parents.clear() + steps.parametrize_parents.update(saved[1]) + pytest_plugin.REPORT_CONTEXT = saved[2] + + +def test_finalize_parents_continues_past_failing_exit(clean_parent_registries) -> None: + """Lenient mode: a misbehaving parent ``__exit__`` must not block the others.""" + from sift_client.pytest_plugin import SiftPytestStepDrainWarning - class _Bad: - def __init__(self, label: str) -> None: - self.label = label + steps = clean_parent_registries + good = _FakeParent("good", "1") + bad = _FakeParent("bad", "1.1", raises="boom") + steps.hierarchy_parents["good"] = good + steps.parametrize_parents[("t", "bad")] = bad - def __exit__(self, *_: object) -> None: - raise RuntimeError(f"boom-{self.label}") + with pytest.warns(SiftPytestStepDrainWarning, match="boom"): + steps.finalize_parents() - g, b1, b2 = _Good(), _Bad("first"), _Bad("second") - # Stack drains LIFO: pop order is b2, b1, g. So b2's failure is the first - # one collected and surfaces in __cause__. - stack: list[tuple[str, object]] = [("g", g), ("b1", b1), ("b2", b2)] - with pytest.raises(SiftPytestStepDrainError, match="2 step.*'b2'") as exc_info: - drain_step_stack(stack, swallow_errors=False) - # Stack fully drained even though it raised. - assert stack == [] - assert g.closed - # Original exception chained for debuggability. - assert isinstance(exc_info.value.__cause__, RuntimeError) - assert "boom-second" in str(exc_info.value.__cause__) + assert good.closed + # Registries cleared regardless of the per-parent failure. + assert steps.hierarchy_parents == {} + assert steps.parametrize_parents == {} def test_failing_test_in_class_does_not_orphan_class_step( @@ -858,3 +918,452 @@ def test_chain(a, b): chain = _ancestor_names(steps, leaf) # leaf b=… → a=… → test_chain → test_chain.py (module step) → root assert chain == ["b='x'", "a=1", "test_chain", "test_chain.py"] + + +# --------------------------------------------------------------------------- +# Order independence +# --------------------------------------------------------------------------- + + +def test_interleaved_execution_does_not_duplicate_parents( + pytester: pytest.Pytester, log_file: Path +) -> None: + """Sibling methods need not run contiguously to share one class parent. + + A conftest hook interleaves the two classes' methods + (``A::a1, B::b1, A::a2, B::b2``) — the order the removed sort used to + forbid, and the order pytest's own fixture-scope reordering can produce. + Each class must still open exactly once and every method parent to the + right class. + """ + # Overwrite the conftest with one that registers the plugin AND reorders + # items so the two classes interleave. The log_file fixture's pytest.ini + # (offline + log path) still applies. + pytester.makeconftest( + dedent( + """ + pytest_plugins = ["sift_client.pytest_plugin"] + + def pytest_collection_modifyitems(config, items): + a = [i for i in items if "TestA::" in i.nodeid] + b = [i for i in items if "TestB::" in i.nodeid] + interleaved = [] + for x, y in zip(a, b): + interleaved.append(x) + interleaved.append(y) + items[:] = interleaved + """ + ) + ) + pytester.makepyfile( + test_inter=dedent( + """ + class TestA: + def test_a1(self): + pass + + def test_a2(self): + pass + + class TestB: + def test_b1(self): + pass + + def test_b2(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=4) + steps = capture.load_steps(log_file) + by_name = _by_name(steps) + # Each class opens exactly once despite the interleaved run order. + assert len(by_name["TestA"]) == 1 + assert len(by_name["TestB"]) == 1 + a_id = by_name["TestA"][0]["id"] + b_id = by_name["TestB"][0]["id"] + assert by_name["test_a1"][0]["parent_step_id"] == a_id + assert by_name["test_a2"][0]["parent_step_id"] == a_id + assert by_name["test_b1"][0]["parent_step_id"] == b_id + assert by_name["test_b2"][0]["parent_step_id"] == b_id + + +# --------------------------------------------------------------------------- +# Parent status resolution +# --------------------------------------------------------------------------- + + +def test_parent_status_passed_when_all_children_pass( + pytester: pytest.Pytester, log_file: Path +) -> None: + pytester.makepyfile( + test_ok=dedent( + """ + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=2) + by_name = _by_name(capture.load_steps(log_file)) + assert by_name["TestFoo"][0]["statuses"][-1] == TestStatus.PASSED + assert by_name["test_ok.py"][0]["statuses"][-1] == TestStatus.PASSED + + +def test_parent_status_failed_propagates_up_and_isolates_siblings( + pytester: pytest.Pytester, log_file: Path +) -> None: + """A failing leaf marks its class and the module FAILED, but a sibling class + whose tests all pass stays PASSED. + """ + pytester.makepyfile( + test_fail=dedent( + """ + class TestFoo: + def test_a(self): + raise AssertionError("boom") + + def test_b(self): + pass + + class TestBar: + def test_c(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=2, failed=1) + by_name = _by_name(capture.load_steps(log_file)) + assert by_name["TestFoo"][0]["statuses"][-1] == TestStatus.FAILED + assert by_name["test_fail.py"][0]["statuses"][-1] == TestStatus.FAILED + assert by_name["TestBar"][0]["statuses"][-1] == TestStatus.PASSED + + +def test_parent_status_failure_propagates_through_parametrize( + pytester: pytest.Pytester, log_file: Path +) -> None: + """One failing parametrization fails the whole chain: parametrize parent → + class → module. + """ + pytester.makepyfile( + test_pfail=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.parametrize("v", [1, 2]) + def test_a(self, v): + if v == 1: + raise AssertionError("boom") + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=1, failed=1) + by_name = _by_name(capture.load_steps(log_file)) + assert by_name["test_a"][0]["statuses"][-1] == TestStatus.FAILED + assert by_name["TestFoo"][0]["statuses"][-1] == TestStatus.FAILED + assert by_name["test_pfail.py"][0]["statuses"][-1] == TestStatus.FAILED + + +def test_parent_opens_in_progress_and_resolves_exactly_once( + pytester: pytest.Pytester, log_file: Path +) -> None: + """A parent is created IN_PROGRESS and gets exactly one terminal status at + session end — it is never reopened, even as later siblings run under it. + + This locks in the "stay in-progress until every child is done, then resolve + once" behavior: a parent emits a CreateTestStep (IN_PROGRESS) and a single + UpdateTestStep (terminal), so its status timeline is exactly two entries. + """ + pytester.makepyfile( + test_once=dedent( + """ + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=2) + by_name = _by_name(capture.load_steps(log_file)) + # Created in-progress, resolved once — no intermediate churn, no reopen. + assert by_name["TestFoo"][0]["statuses"] == [TestStatus.IN_PROGRESS, TestStatus.PASSED] + assert by_name["test_once.py"][0]["statuses"] == [TestStatus.IN_PROGRESS, TestStatus.PASSED] + + +# --------------------------------------------------------------------------- +# Parent timing +# --------------------------------------------------------------------------- + + +def test_parent_timing_spans_its_children(pytester: pytest.Pytester, log_file: Path) -> None: + """A parent's [start, end] window covers its whole subtree: it starts no + later than its first child and ends exactly at its last child's finish. + """ + pytester.makepyfile( + test_span=dedent( + """ + import time + + class TestFoo: + def test_a(self): + time.sleep(0.02) + + def test_b(self): + time.sleep(0.02) + """ + ) + ) + result = pytester.runpytest_inprocess("-v", "-p", "no:randomly") + result.assert_outcomes(passed=2) + by_name = _by_name(capture.load_steps(log_file)) + klass = by_name["TestFoo"][0] + module = by_name["test_span.py"][0] + leaves = [by_name["test_a"][0], by_name["test_b"][0]] + leaf_starts = [_parse_ts(leaf["start_time"]) for leaf in leaves] + leaf_ends = [_parse_ts(leaf["end_time"]) for leaf in leaves] + + # Parent opened before (or with) its earliest child, and start precedes end. + assert _parse_ts(klass["start_time"]) <= min(leaf_starts) + assert _parse_ts(klass["start_time"]) <= _parse_ts(klass["end_time"]) + # Parent end is exactly the latest descendant finish — not a session-end stamp. + assert _parse_ts(klass["end_time"]) == max(leaf_ends) + # The module parent spans the class and rolls the same finish up a level. + assert _parse_ts(module["start_time"]) <= _parse_ts(klass["start_time"]) + assert _parse_ts(module["end_time"]) == max(leaf_ends) + + +def test_parent_end_time_reflects_a_later_child_under_interleaving( + pytester: pytest.Pytester, log_file: Path +) -> None: + """When a parent's children run non-contiguously, its end_time tracks the + LAST child to finish — even one that runs after a different parent's child. + + Execution order is pinned to ``a1, b1, a2`` via a conftest hook, so + ``TestA``'s second child (``a2``) closes after ``TestB``'s child. ``TestA`` + must end at ``a2``'s finish, not ``a1``'s. + """ + pytester.makeconftest( + dedent( + """ + pytest_plugins = ["sift_client.pytest_plugin"] + import pytest + + _ORDER = ["test_a1", "test_b1", "test_a2"] + + @pytest.hookimpl(trylast=True) + def pytest_collection_modifyitems(config, items): + # trylast so this runs after any reordering plugin and wins. + items.sort(key=lambda i: _ORDER.index(i.name) if i.name in _ORDER else 99) + """ + ) + ) + pytester.makepyfile( + test_il=dedent( + """ + import time + + class TestA: + def test_a1(self): + pass + + def test_a2(self): + time.sleep(0.02) + + class TestB: + def test_b1(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=3) + by_name = _by_name(capture.load_steps(log_file)) + a_end = by_name["TestA"][0]["end_time"] + a1_end = by_name["test_a1"][0]["end_time"] + a2_end = by_name["test_a2"][0]["end_time"] + # TestA ends at its later child (a2), not the one that happened to run first. + assert a_end == a2_end + assert a_end != a1_end + + +# --------------------------------------------------------------------------- +# Early close — parents resolve as soon as their descendants finish +# --------------------------------------------------------------------------- + + +def _index( + events: list[tuple], + request_type: str, + name: str, + *, + terminal: bool = False, + status: TestStatus | None = None, +) -> int: + """Index of the first matching log event. + + ``status`` matches that exact status; ``terminal`` matches any resolved + (non-``IN_PROGRESS``) status. + """ + + def matches(rt: str, nm: str, st: TestStatus) -> bool: + if rt != request_type or nm != name: + return False + if status is not None: + return st == status + return not terminal or st != TestStatus.IN_PROGRESS + + return next(i for i, (rt, nm, st) in enumerate(events) if matches(rt, nm, st)) + + +_INTERLEAVE_CONFTEST = """ +pytest_plugins = ["sift_client.pytest_plugin"] +import pytest + +_ORDER = ["test_a1", "test_b1", "test_a2"] + +@pytest.hookimpl(trylast=True) +def pytest_collection_modifyitems(config, items): + # trylast so this wins over any reordering plugin; pins A::a1, B::b1, A::a2. + items.sort(key=lambda i: _ORDER.index(i.name) if i.name in _ORDER else 99) +""" + + +def test_parent_closes_mid_session_not_at_end(pytester: pytest.Pytester, log_file: Path) -> None: + """A container resolves as soon as its last child finishes — before the next + container even opens — rather than all flipping at session end. + """ + pytester.makepyfile( + test_mid=dedent( + """ + class TestFoo: + def test_a(self): + pass + + def test_b(self): + pass + + class TestBar: + def test_c(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v", "-p", "no:randomly") + result.assert_outcomes(passed=3) + events = capture.log_events(log_file) + # TestFoo reaches a terminal status before TestBar is even created. + assert _index(events, "UpdateTestStep", "TestFoo", terminal=True) < _index( + events, "CreateTestStep", "TestBar" + ) + + +def test_failing_parent_resolves_failed_mid_session( + pytester: pytest.Pytester, log_file: Path +) -> None: + """Early close carries status too: a class with a failing test resolves FAILED + as soon as its subtree finishes, before the next class opens. + """ + pytester.makepyfile( + test_midfail=dedent( + """ + class TestFoo: + def test_a(self): + raise AssertionError("boom") + + class TestBar: + def test_c(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v", "-p", "no:randomly") + result.assert_outcomes(passed=1, failed=1) + events = capture.log_events(log_file) + foo_failed = _index(events, "UpdateTestStep", "TestFoo", status=TestStatus.FAILED) + assert foo_failed < _index(events, "CreateTestStep", "TestBar") + + +def test_close_is_completion_driven_not_order_driven( + pytester: pytest.Pytester, log_file: Path +) -> None: + """A single-child container closes the moment that child finishes, even though + a sibling container's test (collected earlier) runs afterward. + + Order is pinned to ``a1, b1, a2``: ``TestB`` (only child ``b1``) must resolve + before ``test_a2`` runs, proving close is driven by descendant completion, not + by reaching some position in the item list. + """ + pytester.makeconftest(_INTERLEAVE_CONFTEST) + pytester.makepyfile( + test_cd=dedent( + """ + class TestA: + def test_a1(self): + pass + + def test_a2(self): + pass + + class TestB: + def test_b1(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v") + result.assert_outcomes(passed=3) + events = capture.log_events(log_file) + # TestB resolves before test_a2 is even created. + assert _index(events, "UpdateTestStep", "TestB", terminal=True) < _index( + events, "CreateTestStep", "test_a2" + ) + + +def test_excluded_sibling_does_not_stall_parent_close( + pytester: pytest.Pytester, log_file: Path +) -> None: + """A ``sift_exclude``-d method is not counted toward its class's descendants, + so the class still closes promptly once its included tests finish. + + If the excluded test inflated the count, ``TestFoo`` could never reach zero + and would only resolve at the session-end drain — i.e. after ``TestBar`` is + created. Asserting it resolves *before* ``TestBar`` proves the gate filter. + """ + pytester.makepyfile( + test_excl_close=dedent( + """ + import pytest + + class TestFoo: + @pytest.mark.sift_exclude + def test_a(self): + pass + + def test_b(self): + pass + + class TestBar: + def test_c(self): + pass + """ + ) + ) + result = pytester.runpytest_inprocess("-v", "-p", "no:randomly") + result.assert_outcomes(passed=3) + events = capture.log_events(log_file) + assert _index(events, "UpdateTestStep", "TestFoo", terminal=True) < _index( + events, "CreateTestStep", "TestBar" + ) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index 52aa6f23c..112ef4055 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -61,6 +61,12 @@ def _run(pytester, body: str) -> None: "--sift-offline", f"--sift-log-file={log_path}", "--no-sift-git-metadata", + # Pin the inner session to definition order so ``test_sift_warmup`` runs + # before a marker-skipped ``test_x`` (see ``_WARMUP``). ``-p no:randomly`` + # is a no-op when pytest-randomly isn't installed, and keeps these tests + # deterministic when it is. + "-p", + "no:randomly", ) @@ -180,13 +186,13 @@ def test_x(step): assert capture.final_status("test_x") == TestStatus.PASSED -def test_keyboard_interrupt_leaves_step_in_progress(inner): +def test_keyboard_interrupt_resolves_step_to_aborted(inner): # Case: CALL-06 # KeyboardInterrupt aborts the session before the call-phase makereport - # fires; the plugin can't observe the interrupt. The contract is that - # the step is left in IN_PROGRESS rather than being silently resolved - # to PASSED — a session-aborting interrupt should not look like a clean - # pass in the report. + # fires; the plugin can't observe the interrupt directly. Setup completed + # but no call outcome was seen, so the step resolves to ABORTED rather than + # being left IN_PROGRESS (a finalized report should not carry a step that + # still reads as in-progress) or coerced to PASSED. try: _run( inner, @@ -199,7 +205,7 @@ def test_x(): pass outer = capture.test_step("test_x") assert outer is not None - assert outer.statuses[-1] == TestStatus.IN_PROGRESS + assert outer.statuses[-1] == TestStatus.ABORTED def test_substep_exception_records_error_with_failed_parent(inner): diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index a381d78cd..7e4c3c120 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -47,12 +47,13 @@ from sift_client._internal.pytest_plugin.steps import ( build_hierarchy_chain, build_parametrize_path, - drain_hierarchy_stack, - drain_parametrize_stack, + finalize_parents, + get_or_create_parent_chain, hierarchy_key, parametrize_path_key, - reconcile_hierarchy, - reconcile_parametrize, + release_finished_leaf, + resolve_parent_chain_in_context, + tally_expected_parents, ) from sift_client._internal.pytest_plugin.terminal import ( maybe_open_report, @@ -71,7 +72,6 @@ "NewStep", "ReportContext", "SiftPytestPluginWarning", - "SiftPytestStepDrainError", "SiftPytestStepDrainWarning", "client_has_connection", "report_context", @@ -90,18 +90,14 @@ class SiftPytestPluginWarning(SiftWarning): class SiftPytestStepDrainWarning(SiftPytestPluginWarning): - """A step's ``__exit__`` raised while the plugin was draining its stack. + """A parent step's ``__exit__`` raised while the plugin was closing it. - Surfaced at module-teardown or session-end so the drain can continue and - pytest test outcomes stay unaffected; the underlying exception is included - in the message for debugging. + Surfaced when a parent step is closed (early as its subtree finishes, or at + session end) so the close can continue and pytest test outcomes stay + unaffected; the underlying exception is included in the message for debugging. """ -class SiftPytestStepDrainError(RuntimeError): - """Raised when mid-session drain fails, signaling a likely upstream invariant break.""" - - # --------------------------------------------------------------------------- # Public session state and stash keys. # --------------------------------------------------------------------------- @@ -220,10 +216,9 @@ def report_context( The fixture is no longer autouse; it's instantiated on the first call to ``request.getfixturevalue("report_context")``, which today happens - inside the gated ``step``, ``_hierarchy_parents``, and - ``_parametrize_parents`` fixtures. If every test in the session is - excluded via the marker gate, this fixture is never resolved and no - ReportContext (or teardown subprocess) is created. + inside the gated ``step`` and ``_sift_parents`` fixtures. If every test in + the session is excluded via the marker gate, this fixture is never resolved + and no ReportContext (or teardown subprocess) is created. What gets yielded depends on the mode: @@ -274,7 +269,7 @@ def report_context( def step( request: pytest.FixtureRequest, pytestconfig: pytest.Config, - _parametrize_parents: None, + _sift_parents: None, ) -> Generator[NewStep | None, None, None]: """Create an outer step for the function when the Sift gate is on. @@ -294,39 +289,24 @@ def step( @pytest.fixture(autouse=True) -def _hierarchy_parents( +def _sift_parents( request: pytest.FixtureRequest, pytestconfig: pytest.Config, ) -> None: - """Open/close hierarchy parent steps (packages, modules, classes) for the current item. - - Gated off when the item is excluded (avoids eager ``report_context`` setup); - otherwise delegates to ``reconcile_hierarchy``, which diffs the item's - ancestor chain against the open stack and opens/closes parents to match. - """ - if not gate_enabled(request.node, pytestconfig): - return - reconcile_hierarchy(request, pytestconfig) + """Resolve (get-or-create) the report-tree parent for the current item. + Builds the item's hierarchy (packages / modules / classes) and parametrize + parents via ``get_or_create_parent_chain`` and stashes the innermost one on + the node as ``_sift_parent`` for the ``step`` fixture to nest the leaf under. + Parents are keyed by identity and reused across sibling items in any order, so + no reordering of test items is needed. -@pytest.fixture(autouse=True) -def _parametrize_parents( - request: pytest.FixtureRequest, - pytestconfig: pytest.Config, - _hierarchy_parents: None, -) -> None: - """Open/close shared parametrize parent steps for the current item. - - Ordered after ``_hierarchy_parents`` so parametrize parents nest inside the - hierarchy ones. Gated off when the item is excluded (so excluded items don't - eagerly request ``report_context``); otherwise delegates to - ``reconcile_parametrize``, which also no-ops when - ``sift_parametrize_nesting=false``. Parents persist until a later test's - chain pops them, or until ``pytest_sessionfinish`` drains the rest. + Gated off when the item is excluded so excluded items never eagerly create + ``report_context`` (preserving its lazy, first-gated-test creation). """ if not gate_enabled(request.node, pytestconfig): return - reconcile_parametrize(request, pytestconfig) + request.node._sift_parent = get_or_create_parent_chain(request.node, pytestconfig, request) # --------------------------------------------------------------------------- @@ -359,28 +339,32 @@ def pytest_configure(config: pytest.Config) -> None: warn_on_unknown_toml_keys(config) -def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: - """Stash each item's class chain + parametrize path and cluster siblings. +def pytest_itemcollected(item: pytest.Item) -> None: + """Cache each test item's hierarchy chain and parametrize path at collection. + + This is a per-item hook, not ``pytest_collection_modifyitems`` — the plugin + never touches the ``items`` list or its order, so it cannot conflict with a + user's (or another plugin's) collection-ordering hook. The report tree is + built from an identity-keyed registry (see ``get_or_create_parent_chain``), + so item order is irrelevant to nesting; ``pytest-randomly``, + ``pytest-ordering``, and pytest's own fixture-scope reordering are all + preserved untouched. - Sorts by ``(file_path, hierarchy_chain, parametrize_path)`` so sibling - items under a shared parent (package, module, class, or parametrize axis) - stay contiguous; otherwise a free function sorting between two class - methods would tear down + re-open the class step, producing duplicate - parents in the report tree. + The stash is a cache the autouse fixtures read back; both keys have an + on-demand recompute fallback, so an item a later hook injects without going + through this hook still resolves correctly. """ - for item in items: - item.stash[hierarchy_key] = build_hierarchy_chain(item, config) - item.stash[parametrize_path_key] = build_parametrize_path(item) - # Use ``.get(...)`` defensively: a third-party hook may inject items after - # our stashing loop runs, and we'd rather sort them at the tail than - # KeyError out of collection. - items.sort( - key=lambda i: ( - str(i.path), - tuple(identity for identity, _, _, _ in i.stash.get(hierarchy_key, ())), - i.stash.get(parametrize_path_key, ()), - ) - ) + item.stash[hierarchy_key] = build_hierarchy_chain(item, item.config) + item.stash[parametrize_path_key] = build_parametrize_path(item) + + +def pytest_collection_finish(session: pytest.Session) -> None: + """Tally each parent's descendant leaves so parents can close mid-session. + + Delegates to ``tally_expected_parents``; runs after deselection so the counts + reflect only the selected, gated-in items. See ``release_finished_leaf``. + """ + tally_expected_parents(session) @pytest.hookimpl(tryfirst=True, hookwrapper=True) @@ -407,25 +391,40 @@ def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): and report.outcome == "skipped" and getattr(item, "_sift_step", None) is None ): - with REPORT_CONTEXT.new_step(name=item.name) as inline_step: + # Nest the inline step under the same registry parents a running sibling + # would use. The autouse ``_sift_parents`` fixture never ran for a + # marker-skipped item, and the report-tree parents live off the step + # stack, so without resolving the parent here the step lands at the + # report root instead of under its module/class. + parent_ns = resolve_parent_chain_in_context(item, item.config, REPORT_CONTEXT) + parent_step = parent_ns.current_step if parent_ns is not None else None + with REPORT_CONTEXT.new_step(name=item.name, parent=parent_step) as inline_step: inline_step.current_step.update({"status": TestStatus.SKIPPED}) if report.when == "teardown": finalize_after_teardown(item, report) +def pytest_runtest_logfinish(nodeid: str, location: tuple[str, int | None, str]) -> None: + """Close report-tree parents whose subtree finished with this item. + + Fires once per item (pass / fail / skip / error); delegates to + ``release_finished_leaf``, which decrements the item's parents' remaining-leaf + counts and closes any that reach zero — so containers resolve progressively + rather than all at session end. + """ + release_finished_leaf(nodeid) + + def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: - """Drain any parent steps still open at session end (innermost first). + """Close any report-tree parents still open at session end (innermost first). - Wrapped so a failure in the inner drain does not prevent the outer one - from running. With ``module_substep`` removed, this is the sole place - where hierarchy parents close; they persist across all tests and only - drain when the session ends. + Normally a no-op: ``report_context_impl`` finalizes the parents inside the + ``ReportContext`` block so their updates reach the log before the import + worker drains, and most parents already closed early as their subtrees + finished. This is the idempotent backstop for anything still open. """ - try: - drain_parametrize_stack() - finally: - drain_hierarchy_stack() + finalize_parents() def pytest_report_header(config: pytest.Config) -> str | None: diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 5cd2c6729..84b97dab8 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -44,6 +44,13 @@ logger = logging.getLogger(__name__) +# Sentinel for ``create_step``/``new_step``'s ``parent`` argument. Distinguishes +# "parent omitted -> use the top of the step stack" (the default, linear +# behavior) from an explicit ``parent=None`` (create at the report root). The +# pytest plugin passes an explicit parent to build its report tree out of +# execution order; everyday ``new_step``/``substep`` callers omit it. +_USE_STACK_TOP = object() + def format_truncated_traceback( exc: type[BaseException] | None, @@ -139,8 +146,18 @@ class ReportContext(AbstractContextManager): log_file: Path | None step_is_open: bool step_stack: list[TestStep] - step_number_at_depth: dict[int, int] + # Per-parent child counter keyed by the parent's ``step_path`` (``""`` is the + # root bucket). Drives parent-relative path numbering so two parents at the + # same depth never collide and a step's path is stable regardless of the + # order siblings are created in. + child_counts: dict[str, int] open_step_results: dict[str, bool] + # Latest child ``end_time`` seen for each parent, keyed by the parent's + # ``step_path``. A parent that stays open across the whole run (e.g. a + # hierarchy/parametrize parent the pytest plugin holds in its registry) is + # closed with this time, so its duration spans first-child-start to + # last-descendant-finish rather than wall-clock at session end. + parent_end_times: dict[str, datetime] any_failures: bool # Every step created in this report (including hierarchy/parametrize # parents), retained after close so end-of-run summaries can tally final @@ -204,8 +221,9 @@ def __init__( self.replay_log_file = replay_log_file self.step_is_open = False self.step_stack = [] - self.step_number_at_depth = {} + self.child_counts = {} self.open_step_results = {} + self.parent_end_times = {} self.any_failures = False self.created_steps = [] self.created_measurements = [] @@ -368,29 +386,53 @@ def new_step( description: str | None = None, assertion_as_fail_not_error: bool = True, metadata: dict[str, str | float | bool] | None = None, + *, + parent: TestStep | None | object = _USE_STACK_TOP, + push: bool = True, ) -> NewStep: - """Alias to return a new step context manager from this report context. Use create_step for actually creating a TestStep in the current context.""" + """Alias to return a new step context manager from this report context. Use create_step for actually creating a TestStep in the current context. + + ``parent`` and ``push`` default to the linear, stack-based behavior used + by everyday callers. The pytest plugin passes an explicit ``parent`` with + ``push=False`` to open report-tree parents that persist outside the stack; + see :meth:`create_step`. + """ return NewStep( self, name=name, description=description, assertion_as_fail_not_error=assertion_as_fail_not_error, metadata=metadata, + parent=parent, + push=push, ) - def get_next_step_path(self) -> str: - """Get the next step path for the current depth.""" - top_step = self.step_stack[-1] if self.step_stack else None - step_path = top_step.step_path if top_step else "" - next_step_number = self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - prefix = f"{step_path}." if step_path else "" - return f"{prefix}{next_step_number}" + def _resolve_parent(self, parent: TestStep | None | object) -> TestStep | None: + """Resolve a ``parent`` argument to a concrete parent step (or None for root).""" + if parent is _USE_STACK_TOP: + return self.step_stack[-1] if self.step_stack else None + return parent # type: ignore[return-value] + + def get_next_step_path(self, parent: TestStep | None | object = _USE_STACK_TOP) -> str: + """Preview the path the next step under ``parent`` would get (no side effects). + + Parent-relative: a child's path is ``.``, or + ```` at the root. Defaults to the top of the step stack so existing + callers see the same value the next stacked ``create_step`` will assign. + """ + parent_step = self._resolve_parent(parent) + parent_path = parent_step.step_path if parent_step else "" + next_number = self.child_counts.get(parent_path, 0) + 1 + return f"{parent_path}.{next_number}" if parent_path else str(next_number) def create_step( self, name: str, description: str | None = None, metadata: dict[str, str | float | bool] | None = None, + *, + parent: TestStep | None | object = _USE_STACK_TOP, + push: bool = True, ) -> TestStep: """Create a new step in the report context. @@ -400,12 +442,23 @@ def create_step( metadata: [Optional] Structured key/value metadata to attach to the step. For metadata shared across every step in a report, prefer the `metadata` attribute of the enclosing `TestReport`. + parent: The parent step to nest under. ``_USE_STACK_TOP`` (the + default) parents to the current top of the step stack — the + linear behavior. An explicit ``TestStep`` parents under that step + regardless of stack state; explicit ``None`` creates a root step. + push: Whether to push the new step onto the step stack. True (the + default) for leaf/in-test steps so their substeps nest under + them. The pytest plugin passes False for hierarchy/parametrize + parents, which live in its own registry and would otherwise + trap unrelated steps beneath them. Returns: The created step. """ - step_path = self.get_next_step_path() - parent_step = self.step_stack[-1] if self.step_stack else None + parent_step = self._resolve_parent(parent) + parent_path = parent_step.step_path if parent_step else "" + next_number = self.child_counts.get(parent_path, 0) + 1 + step_path = f"{parent_path}.{next_number}" if parent_path else str(next_number) step = self.client.test_results.create_step( TestStepCreate( @@ -424,10 +477,9 @@ def create_step( ) # Update the step tracking structures. - self.step_number_at_depth[len(self.step_stack)] = ( - self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - ) - self.step_stack.append(step) + self.child_counts[parent_path] = next_number + if push: + self.step_stack.append(step) self.open_step_results[step.step_path] = True # Retained for end-of-run tallies; never popped (unlike step_stack). self.created_steps.append(step) @@ -473,15 +525,41 @@ def propagate_step_result(self, step: TestStep, status: TestStatus) -> bool: self.open_step_results[".".join(path_parts[:-1])] = False return succeeded - def exit_step(self, step: TestStep): - """Exit a step and update the report context.""" - self.step_number_at_depth[len(self.step_stack)] = 0 - stack_top = self.step_stack.pop() - self.open_step_results.pop(step.step_path) + def note_close(self, step: TestStep) -> None: + """Record a just-closed step's ``end_time`` against its parent. - if stack_top.id_ != step.id_: + Lets a long-lived parent (one closed later, out of band) adopt the finish + time of its latest child instead of wall-clock at its own close. Keyed by + the parent's ``step_path`` (the child path minus its last segment). + """ + end_time = step.end_time + if end_time is None: + return + path_parts = step.step_path.split(".") + if len(path_parts) <= 1: + return + parent_path = ".".join(path_parts[:-1]) + previous = self.parent_end_times.get(parent_path) + if previous is None or end_time > previous: + self.parent_end_times[parent_path] = end_time + + def exit_step(self, step: TestStep): + """Exit a step and update the report context. + + Stacked steps (leaves and their in-test substeps) close in strict LIFO + order, so a step that isn't the current top of the stack is a real + invariant break. Steps created with an explicit parent and ``push=False`` + (the pytest plugin's hierarchy/parametrize parents) never sit on the + stack and may close in any order — clearing ``open_step_results`` is all + that's needed; their result was already propagated to their own parent. + """ + self.open_step_results.pop(step.step_path, None) + if self.step_stack and self.step_stack[-1].id_ == step.id_: + self.step_stack.pop() + return + if any(s.id_ == step.id_ for s in self.step_stack): raise ValueError( - "The popped step was not the top of the stack. This should never happen." + "exit_step called out of LIFO order for a stacked step. This should never happen." ) @@ -496,6 +574,9 @@ class NewStep(AbstractContextManager): # status was already resolved upstream and ``__exit__`` should skip # re-classifying. Read via ``getattr`` so unset is treated as False. _sift_managed_externally: bool = False + # Set by the pytest plugin when finalizing a long-lived parent so ``__exit__`` + # stamps its last-descendant finish time instead of wall-clock at close. + _sift_end_time_override: datetime | None = None def __init__( self, @@ -504,6 +585,9 @@ def __init__( description: str | None = None, assertion_as_fail_not_error: bool = True, metadata: dict[str, str | float | bool] | None = None, + *, + parent: TestStep | None | object = _USE_STACK_TOP, + push: bool = True, ): """Initialize a new step context. @@ -513,10 +597,14 @@ def __init__( description: The description of the step. assertion_as_fail_not_error: Mark steps with assertion errors as failed instead of error+traceback (some users want assertions to work as simple failures especially when using pytest). metadata: [Optional] Structured key/value metadata to attach to the step. + parent: Parent step to nest under; see :meth:`ReportContext.create_step`. + push: Whether the step joins the step stack; see :meth:`ReportContext.create_step`. """ self.report_context = report_context self.client = report_context.client - self.current_step = self.report_context.create_step(name, description, metadata=metadata) + self.current_step = self.report_context.create_step( + name, description, metadata=metadata, parent=parent, push=push + ) self.assertion_as_fail_not_error = assertion_as_fail_not_error # Per-step measurement-failure count for ``measurements_passed``. # Tracks only direct ``measure*`` calls on this NewStep instance; @@ -589,6 +677,7 @@ def update_step_from_result( exc: type[Exception] | None, exc_value: Exception | None, tb: traceback.TracebackException | None, + end_time: datetime | None = None, ) -> bool: """Update the step based on its substeps and if there was an exception while executing the step. @@ -596,6 +685,10 @@ def update_step_from_result( exc: The class of Exception that was raised. exc_value: The exception value. tb: The traceback object. + end_time: Explicit end_time to stamp. Defaults to now(); the pytest + plugin passes the last-child finish time when closing a long-lived + parent so its duration reflects its subtree rather than its own + late close. returns: The false if step failed or errored, true otherwise. """ @@ -653,10 +746,11 @@ def update_step_from_result( current_step.update( { "status": status, - "end_time": datetime.now(timezone.utc), + "end_time": end_time if end_time is not None else datetime.now(timezone.utc), "error_info": error_info, }, ) + self.report_context.note_close(current_step) return result @@ -670,20 +764,24 @@ def __exit__(self, exc, exc_value, tb): if current_step is None: # The step was never opened; nothing to propagate. return True + override = getattr(self, "_sift_end_time_override", None) result = self.report_context.propagate_step_result(current_step, current_step.status) current_step.update( { "status": current_step.status, - "end_time": datetime.now(timezone.utc), + "end_time": override if override is not None else datetime.now(timezone.utc), "error_info": current_step.error_info, }, ) + self.report_context.note_close(current_step) self.report_context.exit_step(current_step) if hasattr(self, "force_result"): result = self.force_result return result - result = self.update_step_from_result(exc, exc_value, tb) + result = self.update_step_from_result( + exc, exc_value, tb, end_time=getattr(self, "_sift_end_time_override", None) + ) # Now that the step is updated. Let the report context handle removing it from the stack and updating the report context. self.report_context.exit_step(self.current_step) diff --git a/python/pyproject.toml b/python/pyproject.toml index b04bce6d3..61b2b03d2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -85,6 +85,7 @@ dev = [ 'pytest-benchmark==4.0.0', 'pytest-dotenv==0.5.2', 'pytest-mock==3.14.0', + 'pytest-randomly==3.15.0', 'pytest==8.2.2', 'ruff~=0.12.10', 'tomlkit~=0.13.3', @@ -105,6 +106,7 @@ dev-all = [ 'pytest-benchmark==4.0.0', 'pytest-dotenv==0.5.2', 'pytest-mock==3.14.0', + 'pytest-randomly==3.15.0', 'pytest==8.2.2', "rosbags~=0.0 ; python_full_version >= '3.8.2'", 'ruff~=0.12.10', @@ -120,6 +122,7 @@ development = [ 'pytest-benchmark==4.0.0', 'pytest-dotenv==0.5.2', 'pytest-mock==3.14.0', + 'pytest-randomly==3.15.0', 'pytest==8.2.2', 'ruff~=0.12.10', 'tomlkit~=0.13.3', @@ -158,6 +161,7 @@ docs-build = [ 'pytest-benchmark==4.0.0', 'pytest-dotenv==0.5.2', 'pytest-mock==3.14.0', + 'pytest-randomly==3.15.0', 'pytest==8.2.2', "rosbags~=0.0 ; python_full_version >= '3.8.2'", 'ruff~=0.12.10', @@ -206,6 +210,9 @@ development = [ "pytest-benchmark==4.0.0", "pytest-mock==3.14.0", "pytest-dotenv==0.5.2", + # 3.15.0 is the last line supporting Python 3.8; pinned (rather than 4.x, + # which needs 3.10+) so randomization is active on the 3.8 CI test job too. + "pytest-randomly==3.15.0", "ruff~=0.12.10", "tomlkit~=0.13.3" ] diff --git a/python/uv.lock b/python/uv.lock index d6391b311..dc463b99b 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -3615,6 +3615,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863, upload-time = "2024-03-21T22:14:02.694Z" }, ] +[[package]] +name = "pytest-randomly" +version = "3.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata", version = "8.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" }, + { name = "importlib-metadata", version = "8.7.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/d4/6e924a0b2855736d942703dec88dfc98b4fe0881c8fa849b6b0fbb9182fa/pytest_randomly-3.15.0.tar.gz", hash = "sha256:b908529648667ba5e54723088edd6f82252f540cc340d748d1fa985539687047", size = 21743, upload-time = "2023-08-15T18:04:59.857Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/d3/00e575657422055c4ea220b2f80e8cc6026ab7130372b7067444d1b0ac10/pytest_randomly-3.15.0-py3-none-any.whl", hash = "sha256:0516f4344b29f4e9cdae8bce31c4aeebf59d0b9ef05927c33354ff3859eeeca6", size = 8685, upload-time = "2023-08-15T18:04:57.913Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -4400,6 +4414,7 @@ dev = [ { name = "pytest-benchmark" }, { name = "pytest-dotenv" }, { name = "pytest-mock" }, + { name = "pytest-randomly" }, { name = "ruff" }, { name = "tomlkit" }, ] @@ -4427,6 +4442,7 @@ dev-all = [ { name = "pytest-benchmark" }, { name = "pytest-dotenv" }, { name = "pytest-mock" }, + { name = "pytest-randomly" }, { name = "rosbags", version = "0.9.23", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.8.2' and python_full_version < '3.10'" }, { name = "rosbags", version = "0.11.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "ruff" }, @@ -4444,6 +4460,7 @@ development = [ { name = "pytest-benchmark" }, { name = "pytest-dotenv" }, { name = "pytest-mock" }, + { name = "pytest-randomly" }, { name = "ruff" }, { name = "tomlkit" }, ] @@ -4489,6 +4506,7 @@ docs-build = [ { name = "pytest-benchmark" }, { name = "pytest-dotenv" }, { name = "pytest-mock" }, + { name = "pytest-randomly" }, { name = "rosbags", version = "0.9.23", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.8.2' and python_full_version < '3.10'" }, { name = "rosbags", version = "0.11.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "ruff" }, @@ -4628,6 +4646,10 @@ requires-dist = [ { name = "pytest-mock", marker = "extra == 'dev-all'", specifier = "==3.14.0" }, { name = "pytest-mock", marker = "extra == 'development'", specifier = "==3.14.0" }, { name = "pytest-mock", marker = "extra == 'docs-build'", specifier = "==3.14.0" }, + { name = "pytest-randomly", marker = "extra == 'dev'", specifier = "==3.15.0" }, + { name = "pytest-randomly", marker = "extra == 'dev-all'", specifier = "==3.15.0" }, + { name = "pytest-randomly", marker = "extra == 'development'", specifier = "==3.15.0" }, + { name = "pytest-randomly", marker = "extra == 'docs-build'", specifier = "==3.15.0" }, { name = "pyyaml", specifier = "~=6.0" }, { name = "rapidyaml", specifier = "~=0.11" }, { name = "requests", specifier = "~=2.25" },