From dbe2af07a4d2927049de25077125be817d356ff9 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 07:50:30 -0700
Subject: [PATCH 01/18] Python(refactor): extract pure bounds helpers and add
 ReportContext.offline

Adds ``value_passes_bounds``, ``to_numpy_array``, ``out_of_bounds_mask``,
and ``all_within_bounds`` to ``bounds`` so consumers that need pass/fail
semantics without recording a measurement (e.g. a stub plugin mode) can
share the same evaluation logic. ``evaluate_measurement_bounds`` now
delegates to ``value_passes_bounds`` to keep the two paths in sync.

Adds an ``offline`` flag to ``ReportContext``: when set, the context
auto-creates a temp log file if none is provided and skips spawning the
import-test-result-log replay subprocess at session end. The log file is
the sole sink in offline mode.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../sift_client/util/test_results/bounds.py   | 116 ++++++++++++++----
 .../util/test_results/context_manager.py      |  49 ++------
 2 files changed, 105 insertions(+), 60 deletions(-)

diff --git a/python/lib/sift_client/util/test_results/bounds.py b/python/lib/sift_client/util/test_results/bounds.py
index ef5c67ce5..49fbfa0bf 100644
--- a/python/lib/sift_client/util/test_results/bounds.py
+++ b/python/lib/sift_client/util/test_results/bounds.py
@@ -1,5 +1,10 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
+import numpy as np
+import pandas as pd
+
 from sift_client.sift_types.test_report import (
     NumericBounds,
     TestMeasurement,
@@ -8,6 +13,55 @@
     TestMeasurementUpdate,
 )
 
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
+
+
+def to_numpy_array(
+    values: list[float | int] | NDArray[np.float64] | pd.Series,
+) -> NDArray[np.float64]:
+    """Normalize a list / ndarray / pandas Series into a numpy array.
+
+    Shared by ``measure_avg`` and ``measure_all`` in both the real plugin and
+    the no-op sibling so the accepted input types stay in sync.
+    """
+    if isinstance(values, list):
+        return np.array(values)
+    if isinstance(values, np.ndarray):
+        return values
+    if isinstance(values, pd.Series):
+        return values.to_numpy()
+    raise ValueError(f"Invalid value type: {type(values)}")
+
+
+def out_of_bounds_mask(
+    arr: NDArray[np.float64],
+    bounds: dict[str, float] | NumericBounds,
+) -> NDArray[np.bool_]:
+    """Return a boolean mask selecting elements of ``arr`` that violate ``bounds``.
+
+    Raises ``ValueError`` when ``bounds`` has neither ``min`` nor ``max`` set.
+    """
+    if isinstance(bounds, dict):
+        bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max"))
+    mask: NDArray[np.bool_] | None = None
+    if bounds.min is not None:
+        mask = arr < bounds.min
+    if bounds.max is not None:
+        above = arr > bounds.max
+        mask = mask | above if mask is not None else above
+    if mask is None:
+        raise ValueError("No bounds provided")
+    return mask
+
+
+def all_within_bounds(
+    arr: NDArray[np.float64],
+    bounds: dict[str, float] | NumericBounds,
+) -> bool:
+    """Return True when every element of ``arr`` is within ``bounds``."""
+    return bool(arr[out_of_bounds_mask(arr, bounds)].size == 0)
+
 
 def assign_value_to_measurement(
     measurement: TestMeasurement | TestMeasurementCreate | TestMeasurementUpdate,
@@ -32,6 +86,43 @@ def assign_value_to_measurement(
         raise ValueError(f"Invalid value type: {type(value)}")
 
 
+def value_passes_bounds(
+    value: float | str | bool,
+    bounds: dict[str, float] | NumericBounds | str | bool | None,
+) -> bool:
+    """Evaluate a value against bounds without recording a measurement.
+
+    Used by consumers that need pass/fail semantics matching the real plugin but
+    do not transmit a measurement (e.g. ``--sift-disabled`` mode in the pytest
+    plugin).
+    """
+    if bounds is None:
+        return True
+    if isinstance(bounds, dict):
+        bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max"))
+    if isinstance(bounds, bool):
+        if isinstance(value, str):
+            return str(value).lower() == str(bounds).lower()
+        return bool(value) == bounds
+    if isinstance(bounds, str):
+        if not (isinstance(value, str) or isinstance(value, bool)):
+            raise ValueError("Value must be a string if bounds provided is a string")
+        if isinstance(value, bool):
+            return str(value).lower() == str(bounds).lower()
+        return value == bounds
+    # NumericBounds
+    try:
+        if bounds.min is not None and bounds.min > value:  # type: ignore[operator]
+            return False
+        if bounds.max is not None and bounds.max < value:  # type: ignore[operator]
+            return False
+    except TypeError:
+        raise TypeError(
+            f"Value must be a float or int to evaluate numeric bounds but gave {type(value)}"
+        ) from None
+    return True
+
+
 def evaluate_measurement_bounds(
     measurement: TestMeasurement | TestMeasurementCreate | TestMeasurementUpdate,
     value: float | str | bool,
@@ -53,31 +144,10 @@ def evaluate_measurement_bounds(
 
     if isinstance(bounds, dict):
         bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max"))
-    if isinstance(bounds, bool):
-        if isinstance(value, str):
-            measurement.passed = str(value).lower() == str(bounds).lower()
-        else:
-            measurement.passed = bool(value) == bounds
-        return bool(measurement.passed)
-    elif isinstance(bounds, str):
-        if not (isinstance(value, str) or isinstance(value, bool)):
-            raise ValueError("Value must be a string if bounds provided is a string")
+    if isinstance(bounds, str) and not isinstance(bounds, bool):
         measurement.string_expected_value = bounds
-        if isinstance(value, bool):
-            measurement.passed = str(value).lower() == str(bounds).lower()
-        else:
-            measurement.passed = value == bounds
     elif isinstance(bounds, NumericBounds):
         measurement.numeric_bounds = bounds
-        measurement.passed = True
-        try:
-            if measurement.numeric_bounds.min is not None:
-                measurement.passed = measurement.passed and measurement.numeric_bounds.min <= value  # type: ignore
-            if measurement.numeric_bounds.max is not None:
-                measurement.passed = measurement.passed and measurement.numeric_bounds.max >= value  # type: ignore
-        except TypeError:
-            raise TypeError(
-                f"Value must be a float or int to evaluate numeric bounds but gave {type(value)}"
-            ) from None
 
+    measurement.passed = value_passes_bounds(value, bounds)
     return bool(measurement.passed)
diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py
index 354f8564d..fe8579834 100644
--- a/python/lib/sift_client/util/test_results/context_manager.py
+++ b/python/lib/sift_client/util/test_results/context_manager.py
@@ -13,7 +13,6 @@
 from typing import TYPE_CHECKING
 
 import numpy as np
-import pandas as pd
 
 from sift_client.sift_types.test_report import (
     ErrorInfo,
@@ -28,9 +27,12 @@
 )
 from sift_client.util.test_results.bounds import (
     evaluate_measurement_bounds,
+    out_of_bounds_mask,
+    to_numpy_array,
 )
 
 if TYPE_CHECKING:
+    import pandas as pd
     from numpy.typing import NDArray
 
     from sift_client.client import SiftClient
@@ -118,6 +120,7 @@ def __init__(
         test_case: str | None = None,
         log_file: str | Path | bool | None = None,
         include_git_metadata: bool = False,
+        offline: bool = False,
     ):
         """Initialize a new report context.
 
@@ -130,15 +133,18 @@ def __init__(
             log_file: If True, create a temp log file. If a path, use that path.
                 All create/update operations will be logged to this file.
             include_git_metadata: If True, include git metadata in the report.
+            offline: If True, do not spawn the import-test-result-log replay subprocess.
+                A log file is required and will be created as a temp file if not provided.
         """
         self.client = client
+        self.offline = offline
         self.step_is_open = False
         self.step_stack = []
         self.step_number_at_depth = {}
         self.open_step_results = {}
         self.any_failures = False
 
-        if log_file is True:
+        if log_file is True or (offline and not log_file):
             tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False)
             self.log_file = Path(tmp.name)
             logger.info(f"Created temporary log file: {self.log_file}")
@@ -184,7 +190,7 @@ def _open_import_proc(self):
             )
 
     def __enter__(self):
-        if self.log_file:
+        if self.log_file and not self.offline:
             self._open_import_proc()
         return self
 
@@ -505,15 +511,7 @@ def measure_avg(
         returns: The true if the average of the values is within the bounds, false otherwise.
         """
         timestamp = timestamp if timestamp else datetime.now(timezone.utc)
-        np_array = None
-        if isinstance(values, list):
-            np_array = np.array(values)
-        elif isinstance(values, np.ndarray):
-            np_array = values
-        elif isinstance(values, pd.Series):
-            np_array = values.to_numpy()
-        else:
-            raise ValueError(f"Invalid value type: {type(values)}")
+        np_array = to_numpy_array(values)
         avg = float(np.mean(np_array))
         result = self.measure(
             name=name,
@@ -561,31 +559,8 @@ def measure_all(
         returns: The true if all values are within the bounds, false otherwise.
         """
         timestamp = timestamp if timestamp else datetime.now(timezone.utc)
-        np_array = None
-        if isinstance(values, list):
-            np_array = np.array(values)
-        elif isinstance(values, np.ndarray):
-            np_array = values
-        elif isinstance(values, pd.Series):
-            np_array = values.to_numpy()
-        else:
-            raise ValueError(f"Invalid value type: {type(values)}")
-
-        numeric_bounds = bounds
-        if isinstance(numeric_bounds, dict):
-            numeric_bounds = NumericBounds(min=bounds.get("min"), max=bounds.get("max"))  # type: ignore
-
-        # Construct a mask of the values that are outside the bounds.
-        mask = None
-        if numeric_bounds.min is not None:
-            mask = np_array < numeric_bounds.min
-        if numeric_bounds.max is not None:
-            val_above_max = np_array > numeric_bounds.max
-            mask = mask | val_above_max if mask is not None else val_above_max
-        if mask is None:
-            raise ValueError("No bounds provided")
-
-        rows_outside_bounds = np_array[mask]
+        np_array = to_numpy_array(values)
+        rows_outside_bounds = np_array[out_of_bounds_mask(np_array, bounds)]
         for row in rows_outside_bounds:
             self.measure(
                 name=name,

From 296e396a5459bab76958e6dd4ef3247333ae57ce Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 07:57:01 -0700
Subject: [PATCH 02/18] Python(feat): replace
 --sift-test-results-check-connection with --sift-offline and --sift-disabled

Replaces the per-fixture connection gate from #567 (which made
`report_context` / `step` / `module_substep` yield None on a failed
ping, breaking user code that calls `step.measure(...)`) with two
explicit modes:

* `--sift-offline` writes every create/update to the JSONL log file
  for later replay via `import-test-result-log`. The session-start
  ping is skipped and the replay subprocess is not spawned at session
  end. Combining with `--sift-log-file=none` is rejected as a usage
  error since the log file is the sole sink.

* `--sift-disabled` skips Sift entirely. Autouse fixtures yield stub
  objects (`_NoopStep`, `_NoopReportContext`) that share the bounds
  helpers from `bounds`, so `step.measure(...)` still returns real
  pass/fail booleans without any Sift configuration. Also honored via
  the `SIFT_DISABLED` env var. The two modes are mutually exclusive.

Online mode now fail-fasts at session start: a failed ping aborts the
session with `pytest.UsageError` naming both flags as escape hatches,
instead of letting fixtures silently no-op.

Preserves #567's marker system (`sift_include` / `sift_exclude` +
`sift_test_results_autouse` ini key), lazy `report_context`, per-module
gate, URI ini fallbacks, and the `_Option` CLI+ini plumbing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_tests/pytest_plugin/conftest.py          |   9 +
 .../pytest_plugin/test_configuration.py       |  68 +++-
 .../_tests/pytest_plugin/test_credentials.py  |   8 +-
 .../_tests/pytest_plugin/test_disabled.py     |  93 ++++++
 .../_tests/pytest_plugin/test_offline.py      |  52 +++
 .../_tests/pytest_plugin/test_online.py       |  75 +++++
 python/lib/sift_client/pytest_plugin.py       | 304 +++++++++++++++---
 7 files changed, 540 insertions(+), 69 deletions(-)
 create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
 create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_offline.py
 create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_online.py

diff --git a/python/lib/sift_client/_tests/pytest_plugin/conftest.py b/python/lib/sift_client/_tests/pytest_plugin/conftest.py
index 1fbd61e46..783a12bf4 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/conftest.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/conftest.py
@@ -25,6 +25,15 @@
 
 import pytest
 
+_SIFT_ENV_VARS = ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI", "SIFT_DISABLED")
+
+
+@pytest.fixture
+def clear_sift_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Unset all ``SIFT_*`` environment variables for the duration of the test."""
+    for name in _SIFT_ENV_VARS:
+        monkeypatch.delenv(name, raising=False)
+
 
 @pytest.fixture
 def write_plugin_conftest(pytester: pytest.Pytester) -> Callable[[], None]:
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
index 9b9be2d63..1f02dfbb2 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
@@ -87,26 +87,47 @@ def test_ini_log_file_path(
         result = pytester.runpytest_subprocess("-s", "--co")
         result.stdout.fnmatch_lines([f"RESOLVED: {log_path}"])
 
-    def test_ini_check_connection_true(
+    def test_ini_offline_true(
         self,
         pytester: pytest.Pytester,
         write_probe_conftest: Callable[[str], None],
     ) -> None:
         write_probe_conftest(
             """
-            from sift_client.pytest_plugin import _check_connection_enabled
-            print("CHECK:", _check_connection_enabled(config))
+            from sift_client.pytest_plugin import _is_offline
+            print("OFFLINE:", _is_offline(config))
             """,
         )
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_check_connection = true
+            sift_offline = true
             """
         )
         pytester.makepyfile("def test_noop(): pass")
         result = pytester.runpytest_subprocess("-s", "--co")
-        result.stdout.fnmatch_lines(["CHECK: True"])
+        result.stdout.fnmatch_lines(["OFFLINE: True"])
+
+    def test_ini_disabled_true(
+        self,
+        pytester: pytest.Pytester,
+        write_probe_conftest: Callable[[str], None],
+    ) -> None:
+        write_probe_conftest(
+            """
+            from sift_client.pytest_plugin import _is_disabled
+            print("DISABLED:", _is_disabled(config))
+            """,
+        )
+        pytester.makepyprojecttoml(
+            """
+            [tool.pytest.ini_options]
+            sift_disabled = true
+            """
+        )
+        pytester.makepyfile("def test_noop(): pass")
+        result = pytester.runpytest_subprocess("-s", "--co")
+        result.stdout.fnmatch_lines(["DISABLED: True"])
 
     def test_ini_git_metadata_false(
         self,
@@ -154,21 +175,37 @@ def test_cli_overrides_ini(
         )
         result.stdout.fnmatch_lines([f"RESOLVED: {cli_path}"])
 
-    def test_cli_check_connection_flag(
+    def test_cli_offline_flag(
+        self,
+        pytester: pytest.Pytester,
+        write_probe_conftest: Callable[[str], None],
+    ) -> None:
+        """The ``--sift-offline`` CLI flag flips the resolver to True."""
+        write_probe_conftest(
+            """
+            from sift_client.pytest_plugin import _is_offline
+            print("OFFLINE:", _is_offline(config))
+            """,
+        )
+        pytester.makepyfile("def test_noop(): pass")
+        result = pytester.runpytest_subprocess("-s", "--co", "--sift-offline")
+        result.stdout.fnmatch_lines(["OFFLINE: True"])
+
+    def test_cli_disabled_flag(
         self,
         pytester: pytest.Pytester,
         write_probe_conftest: Callable[[str], None],
     ) -> None:
-        """The ``--sift-test-results-check-connection`` CLI flag flips the resolver to True."""
+        """The ``--sift-disabled`` CLI flag flips the resolver to True."""
         write_probe_conftest(
             """
-            from sift_client.pytest_plugin import _check_connection_enabled
-            print("CHECK:", _check_connection_enabled(config))
+            from sift_client.pytest_plugin import _is_disabled
+            print("DISABLED:", _is_disabled(config))
             """,
         )
         pytester.makepyfile("def test_noop(): pass")
-        result = pytester.runpytest_subprocess("-s", "--co", "--sift-test-results-check-connection")
-        result.stdout.fnmatch_lines(["CHECK: True"])
+        result = pytester.runpytest_subprocess("-s", "--co", "--sift-disabled")
+        result.stdout.fnmatch_lines(["DISABLED: True"])
 
     def test_cli_no_git_metadata_flag(
         self,
@@ -198,11 +235,13 @@ def test_defaults_when_neither_set(
         write_probe_conftest(
             """
             from sift_client.pytest_plugin import (
-                _check_connection_enabled,
+                _is_disabled,
+                _is_offline,
                 _resolve_log_file,
             )
             print("RESOLVED:", _resolve_log_file(config))
-            print("CHECK:", _check_connection_enabled(config))
+            print("OFFLINE:", _is_offline(config))
+            print("DISABLED:", _is_disabled(config))
             print("INI_GIT:", config.getini("sift_test_results_git_metadata"))
             """,
         )
@@ -211,7 +250,8 @@ def test_defaults_when_neither_set(
         result.stdout.fnmatch_lines(
             [
                 "RESOLVED: True",
-                "CHECK: False",
+                "OFFLINE: False",
+                "DISABLED: False",
                 "INI_GIT: True",
             ]
         )
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py
index 9ee628e69..3f6d22a6e 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_credentials.py
@@ -31,8 +31,8 @@ def test_uris_from_ini(
             [tool.pytest.ini_options]
             sift_grpc_uri = "ini-grpc:1234"
             sift_rest_uri = "https://ini-rest"
-            sift_test_results_check_connection = true
-            sift_test_results_log_file = "false"
+            sift_offline = true
+
             """
         )
         pytester.makepyfile(
@@ -62,8 +62,8 @@ def test_env_var_overrides_ini_uri(
             [tool.pytest.ini_options]
             sift_grpc_uri = "ini-grpc:1234"
             sift_rest_uri = "https://ini-rest"
-            sift_test_results_check_connection = true
-            sift_test_results_log_file = "false"
+            sift_offline = true
+
             """
         )
         pytester.makepyfile(
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
new file mode 100644
index 000000000..b876e3fb5
--- /dev/null
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -0,0 +1,93 @@
+"""Tests for ``--sift-disabled`` mode.
+
+Disabled mode skips Sift entirely. Autouse fixtures yield stub objects so
+test code that calls ``step.measure(...)`` keeps working without any Sift
+configuration; ``measure*`` evaluates bounds locally and returns the real
+pass/fail boolean. Nothing reaches Sift and no log file is written.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    import pytest
+
+
+class TestDisabledMode:
+    def test_in_bounds_passes_out_of_bounds_fails(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Stub measure* evaluates bounds locally; pass/fail matches the real plugin."""
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            def test_passes_in_bounds(step):
+                assert step.measure(name="v", value=5.0, bounds={"min": 4.8, "max": 5.2})
+
+            def test_fails_out_of_bounds(step):
+                assert step.measure(name="v", value=99.0, bounds={"max": 5.2}) is False
+
+            def test_substep_and_report_outcome(step):
+                with step.substep(name="inner") as inner:
+                    assert inner.report_outcome(name="ok", result=True) is True
+
+            def test_string_bounds(step):
+                assert step.measure(name="fw", value="1.0", bounds="1.0") is True
+                assert step.measure(name="fw", value="1.0", bounds="2.0") is False
+
+            def test_measure_avg(step):
+                assert step.measure_avg(
+                    name="bus", values=[4.97, 5.01, 5.03], bounds={"min": 4.9, "max": 5.1}
+                ) is True
+
+            def test_measure_all_outlier(step):
+                assert step.measure_all(
+                    name="p", values=[10.1, 10.2, 99.9], bounds={"max": 11.0}
+                ) is False
+            """
+        )
+        result = pytester.runpytest_subprocess("--sift-disabled")
+        result.assert_outcomes(passed=6)
+
+    def test_disabled_does_not_require_credentials(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Disabled mode never reads SIFT_* env vars; runs cleanly without them."""
+        write_plugin_conftest()
+        pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)")
+        result = pytester.runpytest_subprocess("--sift-disabled")
+        result.assert_outcomes(passed=1)
+
+    def test_disabled_via_env_var(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """``SIFT_DISABLED=1`` triggers disabled mode without the CLI flag."""
+        write_plugin_conftest()
+        pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)")
+        monkeypatch.setenv("SIFT_DISABLED", "1")
+        result = pytester.runpytest_subprocess()
+        result.assert_outcomes(passed=1)
+
+    def test_disabled_incompatible_with_offline(
+        self,
+        pytester: pytest.Pytester,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """``--sift-disabled`` + ``--sift-offline`` is a usage error."""
+        write_plugin_conftest()
+        pytester.makepyfile("def test_should_not_run(): pass")
+        result = pytester.runpytest_subprocess("--sift-disabled", "--sift-offline")
+        assert result.ret != 0
+        combined = "\n".join(result.outlines + result.errlines)
+        assert "incompatible with --sift-offline" in combined, combined
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
new file mode 100644
index 000000000..9654a4061
--- /dev/null
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
@@ -0,0 +1,52 @@
+"""Tests for ``--sift-offline`` mode.
+
+Offline mode routes every create/update through the JSONL log file without
+contacting Sift. The session-start ping is skipped, the import worker is not
+spawned, and missing ``SIFT_*`` env vars are tolerated (placeholders are
+filled). Offline + ``--sift-test-results-log-file=none`` is rejected as a
+usage error since the log file is the sole sink in this mode.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    import pytest
+
+
+class TestOfflineMode:
+    def test_offline_runs_without_network(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Offline mode constructs the client locally and never pings."""
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            def test_in_bounds(step):
+                assert step.measure(name="v", value=5.0, bounds={"min": 4.8, "max": 5.2})
+
+            def test_out_of_bounds(step):
+                assert step.measure(name="v", value=10.0, bounds={"max": 5.2}) is False
+            """
+        )
+        result = pytester.runpytest_subprocess("--sift-offline")
+        result.assert_outcomes(passed=2)
+
+    def test_log_file_none_incompatible_with_offline(
+        self,
+        pytester: pytest.Pytester,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """``--sift-test-results-log-file=none`` + ``--sift-offline`` is a usage error."""
+        write_plugin_conftest()
+        pytester.makepyfile("def test_should_not_run(): pass")
+        result = pytester.runpytest_subprocess(
+            "--sift-offline", "--sift-test-results-log-file=none"
+        )
+        assert result.ret != 0
+        combined = "\n".join(result.outlines + result.errlines)
+        assert "incompatible with --sift-offline" in combined, combined
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_online.py b/python/lib/sift_client/_tests/pytest_plugin/test_online.py
new file mode 100644
index 000000000..6cc16573e
--- /dev/null
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_online.py
@@ -0,0 +1,75 @@
+"""Tests for online mode (the default).
+
+Online mode requires connectivity to Sift. The plugin pings via
+``client_has_connection`` at session start and aborts with
+``pytest.UsageError`` on failure. Missing ``SIFT_API_KEY`` /
+``SIFT_GRPC_URI`` / ``SIFT_REST_URI`` env vars are reported as a usage error
+so the failure is actionable.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    import pytest
+
+
+class TestOnlineMode:
+    def test_ping_failure_aborts(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+    ) -> None:
+        """Online mode with an unreachable ping aborts the session via UsageError."""
+        pytester.makeconftest(
+            """
+            import pytest
+            from unittest.mock import MagicMock
+
+            pytest_plugins = ["sift_client.pytest_plugin"]
+
+
+            @pytest.fixture(scope="session")
+            def sift_client():
+                client = MagicMock()
+                client.ping.ping.side_effect = ConnectionError("unreachable")
+                return client
+            """
+        )
+        pytester.makepyfile(
+            """
+            import pytest
+
+            @pytest.mark.sift_include
+            def test_should_not_run():
+                assert True
+            """
+        )
+        result = pytester.runpytest_subprocess()
+        assert result.ret != 0
+        combined = "\n".join(result.outlines + result.errlines)
+        assert "Sift ping failed" in combined, combined
+
+    def test_missing_env_vars_named_in_error(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """The default ``sift_client`` fixture names missing env vars in its error."""
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            import pytest
+
+            @pytest.mark.sift_include
+            def test_should_not_run():
+                pass
+            """
+        )
+        result = pytester.runpytest_subprocess()
+        assert result.ret != 0
+        combined = "\n".join(result.outlines + result.errlines)
+        for var in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"):
+            assert var in combined, combined
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index f2699a954..071b03a6e 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -1,21 +1,33 @@
 from __future__ import annotations
 
 import os
+from contextlib import AbstractContextManager
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator
 
+import numpy as np
 import pytest
 
 from sift_client import SiftClient, SiftConnectionConfig
 from sift_client.sift_types.test_report import TestStatus
 from sift_client.util.test_results import ReportContext
+from sift_client.util.test_results.bounds import (
+    all_within_bounds,
+    to_numpy_array,
+    value_passes_bounds,
+)
 
 if TYPE_CHECKING:
+    import pandas as pd
+    from numpy.typing import NDArray
+
+    from sift_client.sift_types.channel import Channel
+    from sift_client.sift_types.test_report import NumericBounds
     from sift_client.util.test_results.context_manager import NewStep
 
-REPORT_CONTEXT: ReportContext | None = None
+REPORT_CONTEXT: Any = None
 
 
 @dataclass(frozen=True)
@@ -60,16 +72,28 @@ class _Option:
     ini_default=True,
 )
 
-_CHECK_CONNECTION = _Option(
-    cli_flag="--sift-test-results-check-connection",
-    ini_name="sift_test_results_check_connection",
+_OFFLINE = _Option(
+    cli_flag="--sift-offline",
+    ini_name="sift_offline",
+    action="store_true",
+    cli_help="Run without contacting Sift. All create/update calls are written "
+    "to a JSONL log file for later replay via `import-test-result-log`. "
+    "No session-start ping is attempted.",
+    ini_help="When true, run in offline mode (same effect as --sift-offline). Defaults to false.",
+    ini_type="bool",
+    ini_default=False,
+)
+
+_DISABLED = _Option(
+    cli_flag="--sift-disabled",
+    ini_name="sift_disabled",
     action="store_true",
-    cli_help="Skip the sift test-result fixtures (report_context, step, module_substep) "
-    "when the Sift client has no connection to the server. Requires a "
-    "`client_has_connection` fixture to be available in the test session.",
-    ini_help="When true, skip the sift test-result fixtures if the client has "
-    "no connection (same effect as --sift-test-results-check-connection). "
-    "Defaults to false.",
+    cli_help="Disable Sift integration entirely. Autouse fixtures yield stub "
+    "objects; `step.measure(...)` still returns real pass/fail booleans by "
+    "evaluating bounds locally, but nothing is sent to Sift and no log file "
+    "is written. Also honored via the `SIFT_DISABLED` env var.",
+    ini_help="When true, run in disabled mode (same effect as --sift-disabled). "
+    "Also honored via the SIFT_DISABLED env var. Defaults to false.",
     ini_type="bool",
     ini_default=False,
 )
@@ -102,7 +126,8 @@ class _Option:
 _OPTIONS: tuple[_Option, ...] = (
     _LOG_FILE,
     _GIT_METADATA,
-    _CHECK_CONNECTION,
+    _OFFLINE,
+    _DISABLED,
     _GRPC_URI,
     _REST_URI,
     _AUTOUSE,
@@ -135,7 +160,7 @@ def pytest_addoption(parser: pytest.Parser) -> None:
 
 
 def pytest_configure(config: pytest.Config) -> None:
-    """Register the Sift gate markers so they show up in `pytest --markers`."""
+    """Register markers and reject mutually exclusive mode flags."""
     config.addinivalue_line(
         "markers",
         "sift_include: force the Sift autouse fixtures to activate for this test "
@@ -146,6 +171,22 @@ def pytest_configure(config: pytest.Config) -> None:
         "sift_exclude: force the Sift autouse fixtures to skip this test "
         "regardless of the `sift_test_results_autouse` ini default.",
     )
+    if _is_disabled(config) and _is_offline(config):
+        raise pytest.UsageError(
+            "--sift-disabled is incompatible with --sift-offline. Disabled mode "
+            "skips Sift entirely (no log file, no network); offline mode writes "
+            "to a log file for later replay. Pick one."
+        )
+
+
+def _is_offline(pytestconfig: pytest.Config | None) -> bool:
+    return bool(_option_or_ini(pytestconfig, _OFFLINE))
+
+
+def _is_disabled(pytestconfig: pytest.Config | None) -> bool:
+    if bool(_option_or_ini(pytestconfig, _DISABLED)):
+        return True
+    return os.getenv("SIFT_DISABLED", "").lower() in ("1", "true", "yes")
 
 
 def _sift_enabled_for(node: pytest.Item | pytest.Collector, default: bool) -> bool:
@@ -208,8 +249,18 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool |
     * A string (from CLI or ini) — interpret ``"true"`` / ``"1"`` as the temp
       file default, ``"false"`` / ``"none"`` as disable, anything else as a
       file path.
+
+    Rejects ``--sift-log-file=none`` combined with ``--sift-offline`` since
+    offline mode needs the log file as its sole sink.
     """
     raw = _option_or_ini(pytestconfig, _LOG_FILE)
+    disabled = raw is False or (isinstance(raw, str) and raw.lower() in ("false", "none"))
+    if disabled and _is_offline(pytestconfig):
+        raise pytest.UsageError(
+            "--sift-log-file=none is incompatible with --sift-offline; offline "
+            "mode requires a log file. Pin one with --sift-log-file=<path>, or "
+            "drop --sift-log-file=none to use a temp file."
+        )
     if raw is False:
         return None
     if not raw:
@@ -239,7 +290,7 @@ def _report_context_impl(
     sift_client: SiftClient,
     request: pytest.FixtureRequest,
     pytestconfig: pytest.Config | None = None,
-) -> Generator[ReportContext | None, None, None]:
+) -> Generator[ReportContext, None, None]:
     args = request.config.invocation_params.args
     test_path = Path(args[0]) if args else None
     if test_path is not None and test_path.exists():
@@ -251,26 +302,128 @@ def _report_context_impl(
     log_file = _resolve_log_file(pytestconfig)
     git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA)
     include_git_metadata = True if git_metadata is None else bool(git_metadata)
+    offline = _is_offline(pytestconfig)
     with ReportContext(
         sift_client,
         name=f"{base_name} {datetime.now(timezone.utc).isoformat()}",
         test_case=str(test_case),
         log_file=log_file,
         include_git_metadata=include_git_metadata,
+        offline=offline,
     ) as context:
         global REPORT_CONTEXT
         REPORT_CONTEXT = context
         yield context
 
 
-def _check_connection_enabled(pytestconfig: pytest.Config | None) -> bool:
-    """Return True when the caller opted into the check-connection mode via CLI or ini."""
-    return bool(_option_or_ini(pytestconfig, _CHECK_CONNECTION))
+class _NoopTestStep:
+    """Stub for ``TestStep`` exposing only the attributes user code touches."""
+
+    description: str | None = None
+    status: Any = None
+
+    def update(self, *_args: Any, **_kwargs: Any) -> None:
+        return None
+
+
+class _NoopReport:
+    """Stub for ``TestReport`` exposing only ``.update(...)``."""
+
+    def update(self, *_args: Any, **_kwargs: Any) -> None:
+        return None
 
 
-def _has_sift_connection(request: pytest.FixtureRequest) -> bool:
-    """Resolve the `client_has_connection` fixture lazily; only called when the check is enabled."""
-    return bool(request.getfixturevalue("client_has_connection"))
+class _NoopStep(AbstractContextManager["_NoopStep"]):
+    """Step shim mirroring ``NewStep``'s public surface without any I/O.
+
+    Used by ``--sift-disabled`` mode so test code that calls
+    ``step.measure(...)`` / ``step.substep(...)`` keeps working without any
+    Sift configuration. ``measure*`` calls evaluate bounds locally so pass/fail
+    outcomes match the real plugin exactly.
+    """
+
+    def __init__(self, name: str, description: str | None = None) -> None:
+        self.name = name
+        self.current_step = _NoopTestStep()
+        self.current_step.description = description
+
+    def __enter__(self) -> _NoopStep:
+        return self
+
+    def __exit__(self, exc_type, exc_value, tb) -> None:
+        return None
+
+    def measure(
+        self,
+        *,
+        name: str,
+        value: float | str | bool | int,
+        bounds: dict[str, float] | NumericBounds | str | bool | None = None,
+        timestamp: datetime | None = None,
+        unit: str | None = None,
+        description: str | None = None,
+        metadata: dict[str, str | float | bool] | None = None,
+        channel_names: list[str] | list[Channel] | None = None,
+    ) -> bool:
+        return value_passes_bounds(value, bounds)
+
+    def measure_avg(
+        self,
+        *,
+        name: str,
+        values: list[float | int] | NDArray[np.float64] | pd.Series,
+        bounds: dict[str, float] | NumericBounds,
+        timestamp: datetime | None = None,
+        unit: str | None = None,
+        description: str | None = None,
+        metadata: dict[str, str | float | bool] | None = None,
+        channel_names: list[str] | list[Channel] | None = None,
+    ) -> bool:
+        return value_passes_bounds(float(np.mean(to_numpy_array(values))), bounds)
+
+    def measure_all(
+        self,
+        *,
+        name: str,
+        values: list[float | int] | NDArray[np.float64] | pd.Series,
+        bounds: dict[str, float] | NumericBounds,
+        timestamp: datetime | None = None,
+        unit: str | None = None,
+        description: str | None = None,
+        metadata: dict[str, str | float | bool] | None = None,
+        channel_names: list[str] | list[Channel] | None = None,
+    ) -> bool:
+        return all_within_bounds(to_numpy_array(values), bounds)
+
+    def report_outcome(self, name: str, result: bool, reason: str | None = None) -> bool:
+        return result
+
+    def substep(
+        self,
+        name: str,
+        description: str | None = None,
+        metadata: dict[str, str | float | bool] | None = None,
+    ) -> _NoopStep:
+        return _NoopStep(name=name, description=description)
+
+    def update_step_from_result(self, *_args: Any, **_kwargs: Any) -> None:
+        return None
+
+
+class _NoopReportContext:
+    """Report context shim exposing only what user code or autouse hooks touch."""
+
+    def __init__(self) -> None:
+        self.report = _NoopReport()
+
+    def new_step(
+        self,
+        name: str,
+        description: str | None = None,
+        assertion_as_fail_not_error: bool = True,
+        metadata: dict[str, str | float | bool] | None = None,
+    ) -> _NoopStep:
+        return _NoopStep(name=name, description=description)
 
 
 _CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = (
@@ -308,10 +461,25 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
     etc.) can override this fixture by defining their own ``sift_client``
     in their ``conftest.py``; pytest fixture resolution prefers the local
     definition.
+
+    In ``--sift-offline`` mode the missing-credential check is relaxed:
+    real env vars and ini values still win when set (so the client is
+    constructible against a real backend even though no calls are made), but
+    anything still missing is filled with a placeholder. In ``--sift-disabled``
+    mode the credential resolution is skipped entirely and placeholders are
+    always used.
     """
+    if _is_disabled(pytestconfig):
+        return SiftClient(
+            connection_config=SiftConnectionConfig(
+                api_key=os.getenv("SIFT_API_KEY") or "disabled",
+                grpc_url=os.getenv("SIFT_GRPC_URI") or "disabled.invalid:0",
+                rest_url=os.getenv("SIFT_REST_URI") or "http://disabled.invalid",
+            )
+        )
     resolved = {env: _resolve_credential(pytestconfig, env, opt) for env, opt in _CREDENTIAL_KEYS}
     missing = [env for env, value in resolved.items() if not value]
-    if missing:
+    if missing and not _is_offline(pytestconfig):
         raise pytest.UsageError(
             "Sift credentials missing: "
             + ", ".join(missing)
@@ -319,8 +487,16 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
             "from a `.env` file automatically — or set the URIs via "
             "`sift_grpc_uri` / `sift_rest_uri` under `[tool.pytest.ini_options]` "
             "in pyproject.toml, or override the sift_client fixture in your "
-            "conftest.py."
+            "conftest.py, or pass --sift-offline / --sift-disabled to run "
+            "without contacting Sift."
         )
+    _OFFLINE_DEFAULTS = {
+        "SIFT_API_KEY": "offline",
+        "SIFT_GRPC_URI": "offline.invalid:0",
+        "SIFT_REST_URI": "http://offline.invalid",
+    }
+    for env in missing:
+        resolved[env] = _OFFLINE_DEFAULTS[env]
     # `or ""` is unreachable in practice since the `missing` check above guarantees
     # non-None values
     return SiftClient(
@@ -334,8 +510,8 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
 
 @pytest.fixture(scope="session")
 def report_context(
-    sift_client: SiftClient, request: pytest.FixtureRequest, pytestconfig: pytest.Config
-) -> Generator[ReportContext | None, None, None]:
+    request: pytest.FixtureRequest, pytestconfig: pytest.Config
+) -> Generator[ReportContext | _NoopReportContext, None, None]:
     """Lazy session-scoped Sift ReportContext.
 
     The fixture is no longer autouse; it's instantiated on the first call to
@@ -344,22 +520,48 @@ def report_context(
     session is excluded via the marker gate, this fixture is never resolved
     and no ReportContext (and no teardown subprocess) is created.
 
-    The log file destination is controlled by ``--sift-test-results-log-file``.
-    Defaults to a temp file when not set.
-
-    When ``--sift-test-results-check-connection`` is passed, this fixture will
-    yield ``None`` if the Sift client has no connection to the server. That mode
-    requires a ``client_has_connection`` fixture to be available in the session.
+    Mode selection:
+
+    * ``--sift-disabled`` — yield a stub context (no client, no network, no
+      log file). Test code calling ``step.measure(...)`` keeps working;
+      bounds are evaluated locally so pass/fail outcomes match the real
+      plugin exactly.
+    * ``--sift-offline`` — skip the session-start ping, route all
+      create/update calls through the JSONL log file, and don't spawn the
+      import-test-result-log replay subprocess at session end.
+    * default (online) — verify connectivity via the ``client_has_connection``
+      fixture. A failed ping aborts the session with ``pytest.UsageError``,
+      naming both ``--sift-offline`` and ``--sift-disabled`` as escape hatches.
+
+    The log file destination is controlled by ``--sift-test-results-log-file``;
+    defaults to a temp file when not set.
     """
-    if _check_connection_enabled(pytestconfig) and not _has_sift_connection(request):
-        yield None
+    if _is_disabled(pytestconfig):
+        global REPORT_CONTEXT
+        ctx = _NoopReportContext()
+        REPORT_CONTEXT = ctx
+        yield ctx
         return
+    sift_client = request.getfixturevalue("sift_client")
+    if not _is_offline(pytestconfig):
+        try:
+            request.getfixturevalue("client_has_connection")
+        except pytest.UsageError:
+            raise
+        except Exception as exc:
+            grpc_config = getattr(getattr(sift_client, "grpc_client", None), "_config", None)
+            grpc_url = getattr(grpc_config, "uri", "<unknown>")
+            raise pytest.UsageError(
+                f"Sift ping failed against {grpc_url}: {exc}. "
+                "Pass --sift-offline to run without contacting Sift, or "
+                "--sift-disabled to skip Sift entirely."
+            ) from exc
     yield from _report_context_impl(sift_client, request, pytestconfig=pytestconfig)
 
 
 def _step_impl(
-    report_context: ReportContext, request: pytest.FixtureRequest
-) -> Generator[NewStep | None, None, None]:
+    report_context: ReportContext | _NoopReportContext, request: pytest.FixtureRequest
+) -> Generator[NewStep | _NoopStep, None, None]:
     name = str(request.node.name)
     existing_docstring = request.node.obj.__doc__ or None
     with report_context.new_step(
@@ -378,23 +580,22 @@ def _step_impl(
 def step(
     request: pytest.FixtureRequest,
     pytestconfig: pytest.Config,
-) -> Generator[NewStep | None, None, None]:
+) -> Generator[NewStep | _NoopStep | None, None, None]:
     """Create an outer step for the function when the Sift gate is on.
 
     Resolves the gate via `_sift_enabled_for(request.node, ini_default)`:
     `sift_exclude` marker forces off, `sift_include` forces on, otherwise the
     `sift_test_results_autouse` ini default applies. When on, requests the
     session `report_context` lazily — the first gated test in the session
-    triggers its creation, subsequent gated tests reuse it.
+    triggers its creation, subsequent gated tests reuse it. In
+    ``--sift-disabled`` mode the report context is a stub, so the gate still
+    runs but the resulting step is a no-op shim.
     """
     default = bool(_option_or_ini(pytestconfig, _AUTOUSE))
     if not _sift_enabled_for(request.node, default):
         yield None
         return
     rc = request.getfixturevalue("report_context")
-    if rc is None:
-        yield None
-        return
     yield from _step_impl(rc, request)
 
 
@@ -402,7 +603,7 @@ def step(
 def module_substep(
     request: pytest.FixtureRequest,
     pytestconfig: pytest.Config,
-) -> Generator[NewStep | None, None, None]:
+) -> Generator[NewStep | _NoopStep | None, None, None]:
     """Create a per-module step when at least one test in the module is gated on.
 
     Inspects the module's collected items rather than gating on a single marker,
@@ -416,21 +617,22 @@ def module_substep(
         yield None
         return
     rc = request.getfixturevalue("report_context")
-    if rc is None:
-        yield None
-        return
     yield from _step_impl(rc, request)
 
 
 @pytest.fixture(scope="session")
-def client_has_connection(sift_client):
-    """Check if the SiftClient has a connection to the Sift server.
-
-    Can be used to skip tests that require a connection to the Sift server, and is
-    consulted by the Sift fixtures when ``--sift-test-results-check-connection`` is set.
+def client_has_connection(pytestconfig: pytest.Config, request: pytest.FixtureRequest) -> bool:
+    """Verify the ``SiftClient`` can reach Sift via ``/ping``.
+
+    Consulted at session start by ``report_context`` in online mode. A failed
+    ping raises through ``report_context`` and aborts the session with
+    ``pytest.UsageError``. Override this fixture in your conftest to use a
+    different reachability signal (e.g. a cached auth token) for environments
+    where pinging is the wrong check. Returns ``False`` in ``--sift-disabled``
+    mode without constructing a client.
     """
-    try:
-        sift_client.ping.ping()
-        return True
-    except Exception:
+    if _is_disabled(pytestconfig):
         return False
+    sift_client = request.getfixturevalue("sift_client")
+    sift_client.ping.ping()
+    return True

From c4037402fac22ad84ca4c6d0108e43f05ef95d32 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 08:14:43 -0700
Subject: [PATCH 03/18] Python(docs): document --sift-offline and
 --sift-disabled modes

Rewrites the "Running offline" section into a unified three-mode table
(online / offline / disabled) and replaces the old check-connection +
None-handling guidance with the new fail-fast and stub-yielding designs.

Repo conftest defaults to --sift-disabled for unit-test runs and stays
online (log-file inline) for integration runs against a real backend.
addopts now loads the plugin globally so the conftest can flip modes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 python/docs/examples/pytest_plugin.md         | 186 +++++++-----------
 python/lib/sift_client/_tests/conftest.py     |  16 +-
 .../sift_client/util/test_results/__init__.py |  13 +-
 python/pyproject.toml                         |   5 +-
 4 files changed, 101 insertions(+), 119 deletions(-)

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index 3557dd9c7..834c5af59 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -88,21 +88,22 @@ def sift_client() -> SiftClient:
 | `report_context` | fixture (autouse) | session | The `ReportContext` backing the run's `TestReport`. Use it to attach metadata or open ad-hoc steps. |
 | `step` | fixture (autouse) | function | A `NewStep` created for the current test function. Exposes `measure*`, `substep`, `report_outcome`, and `current_step`. |
 | `module_substep` | fixture (autouse) | module | One step per test file with each function nested as a substep. |
-| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted only when `--sift-test-results-check-connection` is set. |
+| `client_has_connection` | fixture | session | Calls `sift_client.ping.ping()`; consulted by `report_context` at session start in online mode (the default). Override to skip the ping or use a different reachability signal. |
 
 ### CLI options
 
 | Flag | Default | Effect |
 |---|---|---|
-| `--sift-test-results-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. |
+| `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. |
+| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Incompatible with `--sift-offline`. |
+| `--sift-test-results-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. |
 | `--no-sift-test-results-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. |
-| `--sift-test-results-check-connection` | off | Make `report_context`, `step`, and `module_substep` no-op (yield `None`) when `client_has_connection` is `False`. Lets the same suite run locally without a Sift backend. |
 
 These can be passed permanently via `addopts`:
 
 ```ini title="pytest.ini"
 [pytest]
-addopts = --sift-test-results-check-connection
+addopts = --sift-offline
 ```
 
 Or set the matching ini key directly (recommended for stable per-project
@@ -114,7 +115,8 @@ CLI flags, when passed, override the ini values.
 |---|---|---|
 | `sift_test_results_log_file` | string (`true` / `false` / `none` / path) | `--sift-test-results-log-file=<value>` |
 | `sift_test_results_git_metadata` | bool (default `true`) | `--no-sift-test-results-git-metadata` (sets to `false`) |
-| `sift_test_results_check_connection` | bool (default `false`) | `--sift-test-results-check-connection` |
+| `sift_offline` | bool (default `false`) | `--sift-offline` |
+| `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) |
 | `sift_test_results_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ |
 
 The default `sift_client` fixture reads its two URIs from environment first
@@ -133,8 +135,7 @@ flags for credentials.
 
 ```toml title="pyproject.toml"
 [tool.pytest.ini_options]
-sift_test_results_check_connection = true
-sift_test_results_log_file = "false"
+sift_offline = true
 sift_test_results_git_metadata = false
 sift_grpc_uri = "your-org.sift.example:443"
 sift_rest_uri = "https://your-org.sift.example"
@@ -142,8 +143,7 @@ sift_rest_uri = "https://your-org.sift.example"
 
 ```ini title="pytest.ini"
 [pytest]
-sift_test_results_check_connection = true
-sift_test_results_log_file = false
+sift_offline = true
 sift_test_results_git_metadata = false
 sift_grpc_uri = your-org.sift.example:443
 sift_rest_uri = https://your-org.sift.example
@@ -660,92 +660,36 @@ pytest
 pytest --sift-test-results-log-file=./sift-results.jsonl
 ```
 
-See [Running offline](#running-offline) for the same suite running with or
-without a reachable Sift server.
+See [Running modes](#running-modes) for the offline and disabled flags
+that let the same suite run without (or without contacting) Sift.
 
-## Running offline
+## Running modes
 
-The plugin supports two offline workflows, depending on whether you want a
-Sift report at all when the test environment can't reach Sift. The first
-turns the plugin into a no-op when the server is unreachable. The second
-keeps the plugin running normally and writes every create/update to a local
-JSONL file that you upload from a connected machine afterward.
+The plugin runs in one of three modes, selected at invocation:
 
-| Pattern | Flag | Runtime behavior | Follow-up |
-|---|---|---|---|
-| Skip when offline | `--sift-test-results-check-connection` | Fixtures yield `None`, no log file, no report. Pytest still reports pass/fail. | None. |
-| Capture locally, upload later | `--sift-test-results-log-file=<path>` | Plugin writes every create/update to the JSONL file. | `import-test-result-log <path>` from a connected machine. |
-
-Pattern 1 suits laptop dev and CI without Sift secrets. Pattern 2 suits
-field tests, vehicles on remote sites, and air-gapped labs.
-
-### Pattern 1: skip when offline
-
-`--sift-test-results-check-connection` makes the plugin ping Sift once at
-session start through the `client_has_connection` fixture (which by default
-calls `sift_client.ping.ping()`). On a failed ping, `report_context`,
-`step`, and `module_substep` yield `None` for the rest of the session.
-Pytest still runs the tests and still reports pass/fail.
-
-```bash
-pytest --sift-test-results-check-connection
-```
-
-```ini title="pytest.ini"
-[pytest]
-addopts = --sift-test-results-check-connection
-```
-
-#### Handling `None` in tests
-
-Calls on `step` raise `AttributeError` when it's `None`, so tests that take
-`step` as a parameter need a guard. The cleanest fix is to shadow the
-plugin's `step` fixture in your conftest and turn the `None` case into an
-automatic skip.
-
-```python title="conftest.py"
-import pytest
-
-pytest_plugins = ["sift_client.pytest_plugin"]
+| Mode | Flag | Network | Log file | `step.measure(...)` | Use case |
+|---|---|---|---|---|---|
+| Online (default) | _(none)_ | yes (pings at session start, fail-fast) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant |
+| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, vehicles, air-gapped labs, CI without network |
+| Disabled | `--sift-disabled` | none | none | local bounds eval returning a real bool | local dev / CI without Sift credentials or where reports aren't wanted |
 
+`--sift-offline` and `--sift-disabled` are mutually exclusive — disabled
+writes nothing, offline writes everything to a log file.
 
-@pytest.fixture(autouse=True)
-def step(step):
-    if step is None:
-        pytest.skip("Sift unavailable")
-    yield step
-```
-
-The `step` parameter on the override resolves to the plugin's fixture, not
-to the override itself. `autouse=True` is required so the skip applies to
-tests that don't request `step` directly. The same shadowing trick works
-for `module_substep` and `report_context`.
+### Online mode (default)
 
-For one-off tests that don't share a conftest, an inline guard works just
-as well:
-
-```python
-def test_battery_voltage(step):
-    if step is None:
-        pytest.skip("Sift unavailable")
-    step.measure(name="battery_voltage", value=4.97, bounds={"min": 4.8, "max": 5.2})
-```
-
-If you'd rather have tests pass through silently than skip them, wrap the
-calls in a helper that no-ops on `None`:
-
-```python
-def safe_measure(step, **kwargs):
-    if step is None:
-        return True
-    return step.measure(**kwargs)
-```
+`report_context` resolves the `client_has_connection` fixture at session
+start. The default implementation calls `sift_client.ping.ping()`. A failed
+ping aborts the whole session with `pytest.UsageError`, naming both
+`--sift-offline` and `--sift-disabled` as escape hatches. This is fail-fast
+on purpose: a CI run that silently no-ops because the network was flaky
+won't be noticed until someone goes looking for the report.
 
 #### Overriding the connection check
 
-The default `client_has_connection` fixture calls `sift_client.ping.ping()`.
-Override it in your conftest if pinging is the wrong signal for your
-environment, for example a token cache that's only warm when authenticated:
+Override `client_has_connection` in your conftest when pinging is the wrong
+signal for your environment, for example a token cache that's only warm
+when authenticated:
 
 ```python title="conftest.py"
 from pathlib import Path
@@ -758,35 +702,22 @@ def client_has_connection(sift_client) -> bool:
     return Path("~/.sift-token-cache").expanduser().is_file()
 ```
 
-The plugin only consults this fixture when `--sift-test-results-check-connection`
-is set, so an unused override has no effect on a normal run.
+The override has no effect under `--sift-offline` or `--sift-disabled`,
+both of which skip the check entirely.
 
-### Pattern 2: capture locally, upload later
+### Offline mode (`--sift-offline`)
 
-This pattern keeps the plugin running normally even when Sift is
-unreachable. The plugin writes to the log file, the worker dies on connect,
-and the file is left on disk for you to upload later. Pin the log file path
-so you can find it afterward, and don't pass
-`--sift-test-results-check-connection`, which would suppress the logging
-this pattern relies on.
+Offline mode keeps the plugin running normally — same fixtures, same
+`step.measure(...)` semantics — but routes every create/update through the
+JSONL log file instead of the Sift API. The session-start ping is skipped,
+missing `SIFT_*` env vars are tolerated (placeholders are filled), and the
+`import-test-result-log --incremental` worker subprocess is not spawned at
+session end. The log file is the sole sink.
 
 ```bash
-pytest --sift-test-results-log-file=./run.jsonl
+pytest --sift-offline --sift-test-results-log-file=./run.jsonl
 ```
 
-What happens during the run:
-
-- Every report, step, and measurement create/update is written to
-  `run.jsonl`. The plugin doesn't contact the Sift API for any of these
-  calls; they return simulated responses keyed by UUIDs that the replay
-  later maps to real IDs.
-- The `import-test-result-log --incremental` worker subprocess starts and
-  exits early when it can't reach Sift. The session does not fail when the
-  worker exits before the run ends.
-- Tests run against a real `step` fixture, so `step.measure(...)`,
-  substeps, parametrize, fixtures, and `module_substep` behave exactly as
-  they do online. No conftest changes are needed.
-
 Once you have connectivity, replay the file:
 
 ```bash
@@ -797,12 +728,45 @@ The replay creates the report, steps, and measurements against Sift in one
 batch. See [Replaying a saved log file](#replaying-a-saved-log-file) for
 details on cleanup and the incremental flag.
 
-!!! warning "Pin the log path for Pattern 2"
-    Without `--sift-test-results-log-file=<path>`, the plugin writes to a
+`--sift-test-results-log-file=none` is rejected as a usage error under
+`--sift-offline` since the log file is the sole sink — without it, results
+are lost.
+
+!!! warning "Pin the log path"
+    Without `--sift-test-results-log-file=<path>`, offline mode writes to a
     `tempfile.NamedTemporaryFile` and only surfaces the path via a
     `logger.info` line. Always pin a known path when you intend to replay
     the file later.
 
+### Disabled mode (`--sift-disabled`)
+
+Disabled mode skips Sift entirely. The plugin stays loaded so the autouse
+fixtures and markers still exist, but `report_context`, `step`, and
+`module_substep` yield stub objects. `step.measure(...)`, `step.measure_avg(...)`,
+`step.measure_all(...)`, and `step.substep(...)` all keep working — bounds
+are evaluated locally and return the real pass/fail boolean — but nothing
+is sent to Sift and no log file is written. No `SIFT_*` env vars or ini
+keys are read.
+
+Three ways to opt in, in order of how a project typically reaches for them:
+
+```bash
+# Per-environment: set once in .envrc, devcontainer, or CI job config
+export SIFT_DISABLED=1
+
+# Per-invocation: explicit kill-switch on the command line
+pytest --sift-disabled
+
+# Per-project as the default mode (rare; usually online is the default)
+# pyproject.toml:
+#   [tool.pytest.ini_options]
+#   sift_disabled = true
+```
+
+Use this for local dev without Sift credentials, for open-source consumers
+of a library that uses Sift internally, and for CI jobs (e.g. PR runs)
+that shouldn't pollute the report stream with intermediate results.
+
 ## Replaying a saved log file
 
 When the worker doesn't finish cleanly the plugin will print a hint mentioning
diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 79b079d39..5997a7265 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -79,5 +79,17 @@ def ci_pytest_tag(sift_client):
 
 
 def pytest_configure(config: pytest.Config) -> None:
-    """Enable the Sift connection-check mode for the fixtures used in this test suite since we run w/ mock client in non-integration tests."""
-    config.option.sift_test_results_check_connection = True
+    """Pick a Sift plugin mode based on whether integration tests are running.
+
+    Integration runs targeting a real backend stay online with the log file
+    disabled (writes go inline). Other runs default to ``--sift-disabled`` so
+    unit tests don't need credentials or a log file.
+    """
+    is_integration_run = "integration" in (config.option.markexpr or "")
+    have_real_backend = all(
+        os.getenv(name) for name in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI")
+    )
+    if is_integration_run and have_real_backend:
+        config.option.sift_test_results_log_file = False
+    else:
+        config.option.sift_disabled = True
diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py
index ea213056e..ebd39bc08 100644
--- a/python/lib/sift_client/util/test_results/__init__.py
+++ b/python/lib/sift_client/util/test_results/__init__.py
@@ -105,14 +105,18 @@ def pytest_collection_modifyitems(config, items):
 
 CLI options registered by the plugin:
 
+- `--sift-offline`: Run without contacting Sift. All create/update calls are
+  written to the JSONL log file for later replay via `import-test-result-log`.
+  No session-start ping is attempted.
+- `--sift-disabled`: Skip Sift entirely. Autouse fixtures yield stub objects;
+  `step.measure(...)` still returns real pass/fail booleans by evaluating
+  bounds locally, but nothing is sent to Sift and no log file is written.
+  Also honored via the `SIFT_DISABLED` env var. Incompatible with `--sift-offline`.
 - `--sift-test-results-log-file`: Path to write the JSONL log file. `true`
   (default) auto-creates a temp file. `false` or `none` disables logging.
   Any other value is treated as a file path.
 - `--no-sift-test-results-git-metadata`: Exclude git metadata (repo, branch,
   commit) from the test report. Included by default.
-- `--sift-test-results-check-connection`: Make `report_context`, `step`, and
-  `module_substep` no-op when the client has no connection. Requires a
-  `client_has_connection` fixture (the plugin ships a default).
 
 Each option has a matching ini key for per-project configuration under
 ``[tool.pytest.ini_options]`` in ``pyproject.toml`` (or ``[pytest]`` in
@@ -127,8 +131,7 @@ def pytest_collection_modifyitems(config, items):
 ```toml
 [tool.pytest.ini_options]
 sift_test_results_autouse = false
-sift_test_results_log_file = "false"
-sift_test_results_check_connection = true
+sift_offline = true
 sift_test_results_git_metadata = false
 sift_grpc_uri = "your-org.sift.example:443"
 sift_rest_uri = "https://your-org.sift.example"
diff --git a/python/pyproject.toml b/python/pyproject.toml
index c0156f679..ffaa84b34 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -391,7 +391,10 @@ env_files = [
 # `pytester` is registered globally because pytest 8+ disallows `pytest_plugins`
 # in non-top-level conftests. Only the plugin test suite uses it; activating it
 # globally is harmless since the fixture is opt-in.
-addopts = "-p pytester"
+# The Sift pytest plugin is loaded so the project's own integration tests can
+# use its fixtures. Unit-test runs are flipped to `--sift-disabled` mode by
+# `lib/sift_client/_tests/conftest.py`.
+addopts = "-p pytester -p sift_client.pytest_plugin"
 # The Sift plugin is loaded for the whole project via `python/conftest.py`.
 # The autouse gate defaults to off here so unit tests don't use it. The
 # integration subtree (lib/sift_client/_tests/util/) opts back in via

From 4e38f32cac97c012cc3a0d5d76f6a363e335c73d Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 08:33:28 -0700
Subject: [PATCH 04/18] Python(refactor): let --sift-disabled supersede
 --sift-offline instead of erroring

Disabled mode is the "skip Sift entirely" hammer; combining it with
offline (or anything else) shouldn't be a usage error since disabled
already skips the work the other flag describes. Drops the mutex check
in `pytest_configure`. The existing fixture ordering already makes
disabled win: `report_context` and `sift_client` short-circuit on
`_is_disabled` before consulting `_is_offline`. Updates the help text,
docs, and the test that previously asserted the error.

The only remaining incompatibility is `--sift-offline` +
`--sift-test-results-log-file=none`, which still errors because offline
mode genuinely needs the log file as its sole sink.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 python/docs/examples/pytest_plugin.md         |  7 ++++---
 .../_tests/pytest_plugin/test_disabled.py     | 21 +++++++++++++------
 python/lib/sift_client/pytest_plugin.py       | 14 +++++--------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index 834c5af59..9cac2b929 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -95,7 +95,7 @@ def sift_client() -> SiftClient:
 | Flag | Default | Effect |
 |---|---|---|
 | `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. |
-| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Incompatible with `--sift-offline`. |
+| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag — if both `--sift-disabled` and `--sift-offline` are passed, disabled wins. |
 | `--sift-test-results-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. |
 | `--no-sift-test-results-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. |
 
@@ -673,8 +673,9 @@ The plugin runs in one of three modes, selected at invocation:
 | Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, vehicles, air-gapped labs, CI without network |
 | Disabled | `--sift-disabled` | none | none | local bounds eval returning a real bool | local dev / CI without Sift credentials or where reports aren't wanted |
 
-`--sift-offline` and `--sift-disabled` are mutually exclusive — disabled
-writes nothing, offline writes everything to a log file.
+When both `--sift-offline` and `--sift-disabled` are passed, disabled wins.
+Disabled mode supersedes every other flag — it's the "skip Sift entirely"
+hammer.
 
 ### Online mode (default)
 
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
index b876e3fb5..30d17ec81 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -79,15 +79,24 @@ def test_disabled_via_env_var(
         result = pytester.runpytest_subprocess()
         result.assert_outcomes(passed=1)
 
-    def test_disabled_incompatible_with_offline(
+    def test_disabled_supersedes_offline(
         self,
         pytester: pytest.Pytester,
+        clear_sift_env: None,
         write_plugin_conftest: Callable[[], None],
     ) -> None:
-        """``--sift-disabled`` + ``--sift-offline`` is a usage error."""
+        """``--sift-disabled`` wins when combined with ``--sift-offline``.
+
+        Disabled is the "skip Sift entirely" hammer; passing it alongside
+        offline shouldn't error. The session runs without credentials, without
+        a log file, and without the offline-mode replay machinery.
+        """
         write_plugin_conftest()
-        pytester.makepyfile("def test_should_not_run(): pass")
+        pytester.makepyfile(
+            """
+            def test_runs(step):
+                assert step.measure(name="v", value=5.0, bounds={"max": 10.0}) is True
+            """
+        )
         result = pytester.runpytest_subprocess("--sift-disabled", "--sift-offline")
-        assert result.ret != 0
-        combined = "\n".join(result.outlines + result.errlines)
-        assert "incompatible with --sift-offline" in combined, combined
+        result.assert_outcomes(passed=1)
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index 071b03a6e..dec3b3625 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -91,9 +91,11 @@ class _Option:
     cli_help="Disable Sift integration entirely. Autouse fixtures yield stub "
     "objects; `step.measure(...)` still returns real pass/fail booleans by "
     "evaluating bounds locally, but nothing is sent to Sift and no log file "
-    "is written. Also honored via the `SIFT_DISABLED` env var.",
+    "is written. Also honored via the `SIFT_DISABLED` env var. Supersedes "
+    "every other flag.",
     ini_help="When true, run in disabled mode (same effect as --sift-disabled). "
-    "Also honored via the SIFT_DISABLED env var. Defaults to false.",
+    "Also honored via the SIFT_DISABLED env var. Supersedes every other "
+    "setting. Defaults to false.",
     ini_type="bool",
     ini_default=False,
 )
@@ -160,7 +162,7 @@ def pytest_addoption(parser: pytest.Parser) -> None:
 
 
 def pytest_configure(config: pytest.Config) -> None:
-    """Register markers and reject mutually exclusive mode flags."""
+    """Register the Sift gate markers so they show up in `pytest --markers`."""
     config.addinivalue_line(
         "markers",
         "sift_include: force the Sift autouse fixtures to activate for this test "
@@ -171,12 +173,6 @@ def pytest_configure(config: pytest.Config) -> None:
         "sift_exclude: force the Sift autouse fixtures to skip this test "
         "regardless of the `sift_test_results_autouse` ini default.",
     )
-    if _is_disabled(config) and _is_offline(config):
-        raise pytest.UsageError(
-            "--sift-disabled is incompatible with --sift-offline. Disabled mode "
-            "skips Sift entirely (no log file, no network); offline mode writes "
-            "to a log file for later replay. Pick one."
-        )
 
 
 def _is_offline(pytestconfig: pytest.Config | None) -> bool:

From 41c2ed7c86e6c239e040ffa5b9b8ec10bd26f875 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 08:36:59 -0700
Subject: [PATCH 05/18] Python(fix): drop credential check from
 integration-mode gate in conftest

The previous conditional ``is_integration_run and have_real_backend``
silently fell back to ``--sift-disabled`` when someone ran ``-m
integration`` without ``SIFT_*`` env vars set, which both swallowed the
missing-credentials usage error and ran integration tests against the
disabled-mode stubs instead of the real backend.

The credential check belongs to the plugin's ``sift_client`` fixture,
which already raises a ``pytest.UsageError`` naming the missing env
vars. ``-m integration`` is a commitment; let that error fire.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 python/lib/sift_client/_tests/conftest.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 5997a7265..45ad404fa 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -81,15 +81,14 @@ def ci_pytest_tag(sift_client):
 def pytest_configure(config: pytest.Config) -> None:
     """Pick a Sift plugin mode based on whether integration tests are running.
 
-    Integration runs targeting a real backend stay online with the log file
-    disabled (writes go inline). Other runs default to ``--sift-disabled`` so
-    unit tests don't need credentials or a log file.
+    Integration runs (``-m integration``) stay online with the log file
+    disabled so writes go inline against the real backend; missing
+    ``SIFT_*`` env vars surface as an actionable usage error from the
+    plugin's ``sift_client`` fixture. Every other run defaults to
+    ``--sift-disabled`` so unit tests don't need credentials.
     """
     is_integration_run = "integration" in (config.option.markexpr or "")
-    have_real_backend = all(
-        os.getenv(name) for name in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI")
-    )
-    if is_integration_run and have_real_backend:
+    if is_integration_run:
         config.option.sift_test_results_log_file = False
     else:
         config.option.sift_disabled = True

From 9ad358ef82f2361281757cf6cf9139666110d76b Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 08:48:48 -0700
Subject: [PATCH 06/18] Python(test): assert mode-specific fixture behavior
 beyond pass/fail

Adds tests that probe what each mode actually does, not just whether
the session exits 0:

* Disabled: ``step`` / ``report_context`` / ``module_substep`` are
  ``_NoopStep`` / ``_NoopReportContext`` instances; no log file is
  written even when a path is pinned; ``sift_client`` and
  ``client_has_connection`` are never resolved (raising overrides stay
  un-triggered).
* Offline: ``step`` / ``report_context`` are the real ``NewStep`` /
  ``ReportContext``; the pinned JSONL log file contains
  ``[CreateTestReport:...]`` and ``[CreateTestStep:...]`` lines after
  the run; ``client_has_connection`` is never resolved.
* Online: ``client_has_connection`` is resolved exactly once at session
  start (counter file written by the override).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../_tests/pytest_plugin/test_disabled.py     | 79 +++++++++++++++++
 .../_tests/pytest_plugin/test_offline.py      | 86 +++++++++++++++++++
 .../_tests/pytest_plugin/test_online.py       | 58 +++++++++++++
 3 files changed, 223 insertions(+)

diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
index 30d17ec81..11f6f6b81 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -11,6 +11,8 @@
 from typing import TYPE_CHECKING, Callable
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import pytest
 
 
@@ -100,3 +102,80 @@ def test_runs(step):
         )
         result = pytester.runpytest_subprocess("--sift-disabled", "--sift-offline")
         result.assert_outcomes(passed=1)
+
+    def test_disabled_yields_stub_fixtures(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """``step`` / ``report_context`` / ``module_substep`` are stub instances in disabled mode."""
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            from sift_client.pytest_plugin import _NoopReportContext, _NoopStep
+
+            def test_types(step, report_context, module_substep):
+                assert isinstance(step, _NoopStep)
+                assert isinstance(module_substep, _NoopStep)
+                assert isinstance(report_context, _NoopReportContext)
+            """
+        )
+        result = pytester.runpytest_subprocess("--sift-disabled")
+        result.assert_outcomes(passed=1)
+
+    def test_disabled_writes_no_log_file_even_when_path_pinned(
+        self,
+        pytester: pytest.Pytester,
+        tmp_path: Path,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Disabled mode skips the log-file pipeline even when a path is pinned."""
+        log_path = tmp_path / "should-not-exist.jsonl"
+        write_plugin_conftest()
+        pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)")
+        result = pytester.runpytest_subprocess(
+            "--sift-disabled", f"--sift-test-results-log-file={log_path}"
+        )
+        result.assert_outcomes(passed=1)
+        assert not log_path.exists(), f"log file unexpectedly created at {log_path}"
+
+    def test_disabled_skips_client_has_connection_and_sift_client(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+    ) -> None:
+        """Disabled mode never resolves ``client_has_connection`` or ``sift_client``.
+
+        The plugin's ``report_context`` short-circuits to the stub before
+        consulting either fixture. Overrides that raise on resolution stay
+        un-triggered, so the inner test passes cleanly.
+        """
+        pytester.makeconftest(
+            """
+            import pytest
+
+            pytest_plugins = ["sift_client.pytest_plugin"]
+
+
+            @pytest.fixture(scope="session")
+            def sift_client():
+                raise AssertionError("sift_client should not resolve in disabled mode")
+
+
+            @pytest.fixture(scope="session")
+            def client_has_connection():
+                raise AssertionError(
+                    "client_has_connection should not resolve in disabled mode"
+                )
+            """
+        )
+        pytester.makepyfile(
+            """
+            def test_runs(step):
+                assert step.measure(name="v", value=5.0, bounds={"max": 10.0}) is True
+            """
+        )
+        result = pytester.runpytest_subprocess("--sift-disabled")
+        result.assert_outcomes(passed=1)
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
index 9654a4061..8b1114172 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
@@ -12,6 +12,8 @@
 from typing import TYPE_CHECKING, Callable
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import pytest
 
 
@@ -50,3 +52,87 @@ def test_log_file_none_incompatible_with_offline(
         assert result.ret != 0
         combined = "\n".join(result.outlines + result.errlines)
         assert "incompatible with --sift-offline" in combined, combined
+
+    def test_offline_yields_real_fixtures(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Offline mode keeps the real ``ReportContext`` / ``NewStep``, not the stubs."""
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            from sift_client.pytest_plugin import _NoopReportContext, _NoopStep
+            from sift_client.util.test_results import ReportContext
+            from sift_client.util.test_results.context_manager import NewStep
+
+            def test_types(step, report_context):
+                assert isinstance(report_context, ReportContext)
+                assert isinstance(step, NewStep)
+                assert not isinstance(report_context, _NoopReportContext)
+                assert not isinstance(step, _NoopStep)
+            """
+        )
+        result = pytester.runpytest_subprocess("--sift-offline")
+        result.assert_outcomes(passed=1)
+
+    def test_offline_writes_jsonl_to_pinned_log_file(
+        self,
+        pytester: pytest.Pytester,
+        tmp_path: Path,
+        clear_sift_env: None,
+        write_plugin_conftest: Callable[[], None],
+    ) -> None:
+        """Offline mode populates the pinned JSONL file with create/update entries."""
+        log_path = tmp_path / "run.jsonl"
+        write_plugin_conftest()
+        pytester.makepyfile(
+            """
+            def test_one(step):
+                assert step.measure(
+                    name="v", value=5.0, bounds={"min": 4.8, "max": 5.2}
+                ) is True
+            """
+        )
+        result = pytester.runpytest_subprocess(
+            "--sift-offline", f"--sift-test-results-log-file={log_path}"
+        )
+        result.assert_outcomes(passed=1)
+        assert log_path.exists(), f"offline mode did not create {log_path}"
+        content = log_path.read_text()
+        assert content.strip(), "log file is empty"
+        # Each non-empty line is ``[Operation:uuid] {json}``. A successful
+        # session produces at least the report create + step create lines.
+        lines = [line for line in content.splitlines() if line.strip()]
+        assert any(line.startswith("[CreateTestReport:") for line in lines), content
+        assert any(line.startswith("[CreateTestStep:") for line in lines), content
+
+    def test_offline_skips_client_has_connection(
+        self,
+        pytester: pytest.Pytester,
+        clear_sift_env: None,
+    ) -> None:
+        """Offline mode never resolves ``client_has_connection``.
+
+        Override the fixture to raise on resolution. If the override is
+        invoked, the session aborts; if it isn't, the inner test passes
+        cleanly — confirming the offline path skips the ping check.
+        """
+        pytester.makeconftest(
+            """
+            import pytest
+
+            pytest_plugins = ["sift_client.pytest_plugin"]
+
+
+            @pytest.fixture(scope="session")
+            def client_has_connection():
+                raise AssertionError(
+                    "client_has_connection should not resolve in offline mode"
+                )
+            """
+        )
+        pytester.makepyfile("def test_runs(step): pass")
+        result = pytester.runpytest_subprocess("--sift-offline")
+        result.assert_outcomes(passed=1)
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_online.py b/python/lib/sift_client/_tests/pytest_plugin/test_online.py
index 6cc16573e..876fffb0e 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_online.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_online.py
@@ -12,6 +12,8 @@
 from typing import TYPE_CHECKING, Callable
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import pytest
 
 
@@ -73,3 +75,59 @@ def test_should_not_run():
         combined = "\n".join(result.outlines + result.errlines)
         for var in ("SIFT_API_KEY", "SIFT_GRPC_URI", "SIFT_REST_URI"):
             assert var in combined, combined
+
+    def test_online_resolves_client_has_connection_once(
+        self,
+        pytester: pytest.Pytester,
+        tmp_path: Path,
+        clear_sift_env: None,
+    ) -> None:
+        """Online mode resolves ``client_has_connection`` exactly once at session start.
+
+        Overrides the fixture to bump a counter persisted to a file the outer
+        test reads after the inner session finishes. Outcomes aren't asserted
+        because the real ``ReportContext`` constructed against a ``MagicMock``
+        client crashes downstream when Pydantic sees mock IDs; what we're
+        verifying is the ping path itself, which runs before construction.
+        """
+        counter_file = tmp_path / "ping_calls.txt"
+        pytester.makeconftest(
+            f"""
+            from pathlib import Path
+            from unittest.mock import MagicMock
+
+            import pytest
+
+            pytest_plugins = ["sift_client.pytest_plugin"]
+
+            _COUNTER = Path({str(counter_file)!r})
+
+
+            @pytest.fixture(scope="session")
+            def sift_client():
+                return MagicMock()
+
+
+            @pytest.fixture(scope="session")
+            def client_has_connection():
+                prior = int(_COUNTER.read_text()) if _COUNTER.exists() else 0
+                _COUNTER.write_text(str(prior + 1))
+                return True
+            """
+        )
+        pytester.makepyfile(
+            """
+            import pytest
+
+            @pytest.mark.sift_include
+            def test_a(): pass
+
+            @pytest.mark.sift_include
+            def test_b(): pass
+            """
+        )
+        pytester.runpytest_subprocess()
+        assert counter_file.exists(), "client_has_connection was not resolved"
+        assert counter_file.read_text() == "1", (
+            f"expected session-scoped fixture to resolve once, got {counter_file.read_text()}"
+        )

From f0f222a3e771e4c0e6605035bdeca5de7b49f69d Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 09:49:25 -0700
Subject: [PATCH 07/18] some clean up

---
 python/docs/examples/pytest_plugin.md         | 127 +++++++++---------
 python/lib/sift_client/_tests/conftest.py     |   2 +-
 .../pytest_plugin/test_configuration.py       |  36 ++---
 .../_tests/pytest_plugin/test_disabled.py     |   2 +-
 .../_tests/pytest_plugin/test_offline.py      |  12 +-
 .../lib/sift_client/_tests/util/conftest.py   |   4 +-
 python/lib/sift_client/pytest_plugin.py       |  75 ++++++-----
 .../sift_client/util/test_results/__init__.py |  16 +--
 .../sift_client/util/test_results/bounds.py   |   4 +-
 python/pyproject.toml                         |   9 +-
 10 files changed, 143 insertions(+), 144 deletions(-)

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index 9cac2b929..f2fef6519 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -95,9 +95,9 @@ def sift_client() -> SiftClient:
 | Flag | Default | Effect |
 |---|---|---|
 | `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. |
-| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag — if both `--sift-disabled` and `--sift-offline` are passed, disabled wins. |
-| `--sift-test-results-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. |
-| `--no-sift-test-results-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. |
+| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (if both `--sift-disabled` and `--sift-offline` are passed, disabled wins). |
+| `--sift-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. |
+| `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. |
 
 These can be passed permanently via `addopts`:
 
@@ -113,11 +113,11 @@ CLI flags, when passed, override the ini values.
 
 | Ini key | Type | Equivalent CLI flag |
 |---|---|---|
-| `sift_test_results_log_file` | string (`true` / `false` / `none` / path) | `--sift-test-results-log-file=<value>` |
-| `sift_test_results_git_metadata` | bool (default `true`) | `--no-sift-test-results-git-metadata` (sets to `false`) |
+| `sift_log_file` | string (`true` / `false` / `none` / path) | `--sift-log-file=<value>` |
+| `sift_git_metadata` | bool (default `true`) | `--no-sift-git-metadata` (sets to `false`) |
 | `sift_offline` | bool (default `false`) | `--sift-offline` |
 | `sift_disabled` | bool (default `false`) | `--sift-disabled` (also honors `SIFT_DISABLED` env var) |
-| `sift_test_results_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ |
+| `sift_autouse` | bool (default `true`) | _(no CLI flag; controls the marker gate below)_ |
 
 The default `sift_client` fixture reads its two URIs from environment first
 and falls back to ini keys when the env vars are unset. `SIFT_API_KEY` is
@@ -136,7 +136,7 @@ flags for credentials.
 ```toml title="pyproject.toml"
 [tool.pytest.ini_options]
 sift_offline = true
-sift_test_results_git_metadata = false
+sift_git_metadata = false
 sift_grpc_uri = "your-org.sift.example:443"
 sift_rest_uri = "https://your-org.sift.example"
 ```
@@ -144,7 +144,7 @@ sift_rest_uri = "https://your-org.sift.example"
 ```ini title="pytest.ini"
 [pytest]
 sift_offline = true
-sift_test_results_git_metadata = false
+sift_git_metadata = false
 sift_grpc_uri = your-org.sift.example:443
 sift_rest_uri = https://your-org.sift.example
 ```
@@ -171,7 +171,7 @@ into `os.environ` before tests run.
 glue is needed.
 
 !!! warning "FedRAMP / shared environments"
-    Pass `--sift-test-results-log-file=false` (or set the ini key to `"false"`)
+    Pass `--sift-log-file=false` (or set the ini key to `"false"`)
     to skip the temp file + worker pipeline. Create/update calls then run
     inline against the API instead of being deferred through a subprocess.
 
@@ -184,7 +184,7 @@ Every report the plugin creates includes:
 - `system_operator`: `getpass.getuser()`.
 - `start_time` / `end_time`: set on session enter/exit.
 - `status`: starts at `IN_PROGRESS`, finalized to `PASSED` or `FAILED` on session exit (failure if any step failed or an exception escaped the session).
-- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-test-results-git-metadata` or when not in a git repo.
+- `metadata.git_repo`, `metadata.git_branch`, `metadata.git_commit`: captured via `git remote get-url origin` / `git rev-parse --abbrev-ref HEAD` / `git describe --always --dirty --exclude '*'`. Suppressed by `--no-sift-git-metadata` or when not in a git repo.
 
 Example invocations:
 
@@ -207,7 +207,7 @@ useful when a repo holds tests that you don't want included in the Sift test rep
 
 | Setting                                                 | Effect                                                                                       |
 |---------------------------------------------------------|----------------------------------------------------------------------------------------------|
-| `sift_test_results_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. |
+| `sift_autouse = false` in `pyproject.toml` | Flip the project-wide default off. Tests no longer produce steps unless explicitly opted in. |
 | `@pytest.mark.sift_include` on a test, class, or module | Force reporting on for that scope, regardless of the project default.                        |
 | `@pytest.mark.sift_exclude` on a test, class, or module | Force reporting off for that scope, regardless of the project default.                       |
 
@@ -237,7 +237,7 @@ def pytest_collection_modifyitems(config, items):
 ```
 
 This applies `sift_include` to every test collected under `tests/example/`.
-Combine with `sift_test_results_autouse = false` in `pyproject.toml` for
+Combine with `sift_autouse = false` in `pyproject.toml` for
 opting in to specific directories. 
 
 `pytest_collection_modifyitems` receives every item in the session, not just
@@ -657,7 +657,7 @@ The `unit` argument is a free-form string label (e.g. `"V"`, `"C"`, `"psi"`).
 pytest
 
 # Pin the log file so you can replay it later if the import worker dies
-pytest --sift-test-results-log-file=./sift-results.jsonl
+pytest --sift-log-file=./sift-results.jsonl
 ```
 
 See [Running modes](#running-modes) for the offline and disabled flags
@@ -665,32 +665,32 @@ that let the same suite run without (or without contacting) Sift.
 
 ## Running modes
 
-The plugin runs in one of three modes, selected at invocation:
+The plugin runs in one of three modes, picked at invocation:
 
-| Mode | Flag | Network | Log file | `step.measure(...)` | Use case |
+| Mode | Flag | Network | Log file | `step.measure(...)` | When to use |
 |---|---|---|---|---|---|
-| Online (default) | _(none)_ | yes (pings at session start, fail-fast) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant |
-| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, vehicles, air-gapped labs, CI without network |
-| Disabled | `--sift-disabled` | none | none | local bounds eval returning a real bool | local dev / CI without Sift credentials or where reports aren't wanted |
+| Online (default) | _(none)_ | yes (pings at session start, aborts if it fails) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant |
+| Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, air-gapped labs, CI without network |
+| Disabled | `--sift-disabled` | none | none | local bounds eval returning a real bool | local dev or CI that doesn't have (or want) Sift |
 
-When both `--sift-offline` and `--sift-disabled` are passed, disabled wins.
-Disabled mode supersedes every other flag — it's the "skip Sift entirely"
-hammer.
+Pass both flags? Disabled wins. It's the "skip Sift entirely" hammer and
+supersedes everything else.
 
 ### Online mode (default)
 
-`report_context` resolves the `client_has_connection` fixture at session
-start. The default implementation calls `sift_client.ping.ping()`. A failed
-ping aborts the whole session with `pytest.UsageError`, naming both
-`--sift-offline` and `--sift-disabled` as escape hatches. This is fail-fast
-on purpose: a CI run that silently no-ops because the network was flaky
-won't be noticed until someone goes looking for the report.
+`report_context` resolves `client_has_connection` at session start. The
+default implementation calls `sift_client.ping.ping()`. A failed ping
+aborts the whole session with `pytest.UsageError` and points at
+`--sift-offline` and `--sift-disabled` as escape hatches.
+
+This is loud on purpose. A CI run that silently no-ops on a flaky network
+won't get noticed until somebody goes looking for the report, which is
+usually weeks later, which is usually too late.
 
 #### Overriding the connection check
 
-Override `client_has_connection` in your conftest when pinging is the wrong
-signal for your environment, for example a token cache that's only warm
-when authenticated:
+Override `client_has_connection` when ping isn't the right signal, for
+example a token cache that's only warm when authenticated:
 
 ```python title="conftest.py"
 from pathlib import Path
@@ -703,70 +703,69 @@ def client_has_connection(sift_client) -> bool:
     return Path("~/.sift-token-cache").expanduser().is_file()
 ```
 
-The override has no effect under `--sift-offline` or `--sift-disabled`,
-both of which skip the check entirely.
+The override is ignored under `--sift-offline` and `--sift-disabled`.
 
 ### Offline mode (`--sift-offline`)
 
-Offline mode keeps the plugin running normally — same fixtures, same
-`step.measure(...)` semantics — but routes every create/update through the
-JSONL log file instead of the Sift API. The session-start ping is skipped,
-missing `SIFT_*` env vars are tolerated (placeholders are filled), and the
-`import-test-result-log --incremental` worker subprocess is not spawned at
-session end. The log file is the sole sink.
+Same fixtures, same `step.measure(...)` semantics as online. The
+difference is where the writes go: every create/update lands in a JSONL
+log file instead of hitting the Sift API. The session-start ping is
+skipped, missing `SIFT_*` env vars are tolerated (placeholders are
+filled), and the replay worker (`import-test-result-log --incremental`)
+does not get spawned at session end.
 
 ```bash
-pytest --sift-offline --sift-test-results-log-file=./run.jsonl
+pytest --sift-offline --sift-log-file=./run.jsonl
 ```
 
-Once you have connectivity, replay the file:
+Once you have connectivity, replay it:
 
 ```bash
 import-test-result-log ./run.jsonl
 ```
 
-The replay creates the report, steps, and measurements against Sift in one
-batch. See [Replaying a saved log file](#replaying-a-saved-log-file) for
-details on cleanup and the incremental flag.
+That replay creates the report, steps, and measurements against Sift.
+See [Replaying a saved log file](#replaying-a-saved-log-file) for cleanup
+and the incremental flag.
 
-`--sift-test-results-log-file=none` is rejected as a usage error under
-`--sift-offline` since the log file is the sole sink — without it, results
-are lost.
+`--sift-log-file=none` is rejected when offline is set. The
+log file is the only sink in offline mode, so without it the results are
+gone.
 
 !!! warning "Pin the log path"
-    Without `--sift-test-results-log-file=<path>`, offline mode writes to a
-    `tempfile.NamedTemporaryFile` and only surfaces the path via a
-    `logger.info` line. Always pin a known path when you intend to replay
-    the file later.
+    Without `--sift-log-file=<path>`, offline mode writes to
+    a `tempfile.NamedTemporaryFile` and only surfaces the path via a
+    `logger.info` line. Pin a known path when you intend to replay later.
 
 ### Disabled mode (`--sift-disabled`)
 
-Disabled mode skips Sift entirely. The plugin stays loaded so the autouse
-fixtures and markers still exist, but `report_context`, `step`, and
-`module_substep` yield stub objects. `step.measure(...)`, `step.measure_avg(...)`,
-`step.measure_all(...)`, and `step.substep(...)` all keep working — bounds
-are evaluated locally and return the real pass/fail boolean — but nothing
-is sent to Sift and no log file is written. No `SIFT_*` env vars or ini
-keys are read.
+The plugin stays loaded so the autouse fixtures and markers still exist,
+but `report_context`, `step`, and `module_substep` yield stub objects.
+The stubs still do something useful: `step.measure(...)`,
+`step.measure_avg(...)`, `step.measure_all(...)`, and `step.substep(...)`
+all keep working, bounds are evaluated locally, and you get a real
+pass/fail boolean back. Nothing leaves the process. No log file, no
+`SIFT_*` env vars, no ini keys.
 
-Three ways to opt in, in order of how a project typically reaches for them:
+How to turn it on, in the order most projects pick:
 
 ```bash
-# Per-environment: set once in .envrc, devcontainer, or CI job config
+# In an .envrc, devcontainer, or CI job config
 export SIFT_DISABLED=1
 
-# Per-invocation: explicit kill-switch on the command line
+# Per-invocation kill-switch
 pytest --sift-disabled
 
-# Per-project as the default mode (rare; usually online is the default)
+# Per-project default (uncommon; online is usually the right default)
 # pyproject.toml:
 #   [tool.pytest.ini_options]
 #   sift_disabled = true
 ```
 
-Use this for local dev without Sift credentials, for open-source consumers
-of a library that uses Sift internally, and for CI jobs (e.g. PR runs)
-that shouldn't pollute the report stream with intermediate results.
+Good fit for local dev without Sift credentials. Also for library
+consumers who don't have a Sift tenant. Also useful in CI for runs that
+shouldn't add noise to the report stream, like a PR job re-running the
+same suite five times in a row.
 
 ## Replaying a saved log file
 
diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 45ad404fa..4fcb7dfe3 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -89,6 +89,6 @@ def pytest_configure(config: pytest.Config) -> None:
     """
     is_integration_run = "integration" in (config.option.markexpr or "")
     if is_integration_run:
-        config.option.sift_test_results_log_file = False
+        config.option.sift_log_file = False
     else:
         config.option.sift_disabled = True
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
index 1f02dfbb2..12f385480 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
@@ -34,7 +34,7 @@ def test_ini_log_file_none(
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_log_file = "none"
+            sift_log_file = "none"
             """
         )
         pytester.makepyfile("def test_noop(): pass")
@@ -46,7 +46,7 @@ def test_python_false_disables_log_file(
         pytester: pytest.Pytester,
         write_probe_conftest: Callable[[str], None],
     ) -> None:
-        """`config.option.sift_test_results_log_file = False` disables logging.
+        """`config.option.sift_log_file = False` disables logging.
 
         Conftests use this pattern (see lib/sift_client/_tests/util/conftest.py)
         to opt their subtree out of log-file mode. Regression test for the
@@ -55,7 +55,7 @@ def test_python_false_disables_log_file(
         """
         write_probe_conftest(
             """
-            config.option.sift_test_results_log_file = False
+            config.option.sift_log_file = False
             from sift_client.pytest_plugin import _resolve_log_file
             print("RESOLVED:", _resolve_log_file(config))
             """,
@@ -80,7 +80,7 @@ def test_ini_log_file_path(
         pytester.makepyprojecttoml(
             f"""
             [tool.pytest.ini_options]
-            sift_test_results_log_file = "{log_path}"
+            sift_log_file = "{log_path}"
             """
         )
         pytester.makepyfile("def test_noop(): pass")
@@ -136,13 +136,13 @@ def test_ini_git_metadata_false(
     ) -> None:
         write_probe_conftest(
             """
-            print("INI_GIT:", config.getini("sift_test_results_git_metadata"))
+            print("INI_GIT:", config.getini("sift_git_metadata"))
             """,
         )
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_git_metadata = false
+            sift_git_metadata = false
             """
         )
         pytester.makepyfile("def test_noop(): pass")
@@ -166,12 +166,12 @@ def test_cli_overrides_ini(
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_log_file = "none"
+            sift_log_file = "none"
             """
         )
         pytester.makepyfile("def test_noop(): pass")
         result = pytester.runpytest_subprocess(
-            "-s", "--co", f"--sift-test-results-log-file={cli_path}"
+            "-s", "--co", f"--sift-log-file={cli_path}"
         )
         result.stdout.fnmatch_lines([f"RESOLVED: {cli_path}"])
 
@@ -212,7 +212,7 @@ def test_cli_no_git_metadata_flag(
         pytester: pytest.Pytester,
         write_probe_conftest: Callable[[str], None],
     ) -> None:
-        """The ``--no-sift-test-results-git-metadata`` CLI flag flips git_metadata to False.
+        """The ``--no-sift-git-metadata`` CLI flag flips git_metadata to False.
 
         Guards the negation flag's ``dest`` binding: the flag name doesn't match
         the ini key, so a broken ``dest`` would silently fall back to the ini
@@ -220,11 +220,11 @@ def test_cli_no_git_metadata_flag(
         """
         write_probe_conftest(
             """
-            print("CLI_GIT:", config.getoption("sift_test_results_git_metadata"))
+            print("CLI_GIT:", config.getoption("sift_git_metadata"))
             """,
         )
         pytester.makepyfile("def test_noop(): pass")
-        result = pytester.runpytest_subprocess("-s", "--co", "--no-sift-test-results-git-metadata")
+        result = pytester.runpytest_subprocess("-s", "--co", "--no-sift-git-metadata")
         result.stdout.fnmatch_lines(["CLI_GIT: False"])
 
     def test_defaults_when_neither_set(
@@ -242,7 +242,7 @@ def test_defaults_when_neither_set(
             print("RESOLVED:", _resolve_log_file(config))
             print("OFFLINE:", _is_offline(config))
             print("DISABLED:", _is_disabled(config))
-            print("INI_GIT:", config.getini("sift_test_results_git_metadata"))
+            print("INI_GIT:", config.getini("sift_git_metadata"))
             """,
         )
         pytester.makepyfile("def test_noop(): pass")
@@ -278,7 +278,7 @@ def report_context():
 
 
 class TestAutouseGate:
-    """`sift_include` / `sift_exclude` markers and the `sift_test_results_autouse` ini gate."""
+    """`sift_include` / `sift_exclude` markers and the `sift_autouse` ini gate."""
 
     def test_default_ini_true_activates(self, pytester: pytest.Pytester) -> None:
         """Plugin default (ini absent) keeps the autouse fixtures active."""
@@ -293,12 +293,12 @@ def test_inner(step):
         result.assert_outcomes(passed=1)
 
     def test_default_ini_false_skips(self, pytester: pytest.Pytester) -> None:
-        """`sift_test_results_autouse = false` makes the autouse fixtures no-op by default."""
+        """`sift_autouse = false` makes the autouse fixtures no-op by default."""
         pytester.makeconftest(_GATE_INNER_CONFTEST)
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_autouse = false
+            sift_autouse = false
             """
         )
         pytester.makepyfile(
@@ -316,7 +316,7 @@ def test_sift_include_marker_forces_on(self, pytester: pytest.Pytester) -> None:
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_autouse = false
+            sift_autouse = false
             """
         )
         pytester.makepyfile(
@@ -368,7 +368,7 @@ def test_module_pytestmark_inherits(self, pytester: pytest.Pytester) -> None:
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_autouse = false
+            sift_autouse = false
             """
         )
         pytester.makepyfile(
@@ -399,7 +399,7 @@ def test_bulk_apply_via_conftest_hook(self, pytester: pytest.Pytester) -> None:
         pytester.makepyprojecttoml(
             """
             [tool.pytest.ini_options]
-            sift_test_results_autouse = false
+            sift_autouse = false
             """
         )
         included = pytester.mkdir("included_subtree")
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
index 11f6f6b81..61a8d3d39 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -136,7 +136,7 @@ def test_disabled_writes_no_log_file_even_when_path_pinned(
         write_plugin_conftest()
         pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)")
         result = pytester.runpytest_subprocess(
-            "--sift-disabled", f"--sift-test-results-log-file={log_path}"
+            "--sift-disabled", f"--sift-log-file={log_path}"
         )
         result.assert_outcomes(passed=1)
         assert not log_path.exists(), f"log file unexpectedly created at {log_path}"
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
index 8b1114172..5a291f2d3 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
@@ -3,7 +3,7 @@
 Offline mode routes every create/update through the JSONL log file without
 contacting Sift. The session-start ping is skipped, the import worker is not
 spawned, and missing ``SIFT_*`` env vars are tolerated (placeholders are
-filled). Offline + ``--sift-test-results-log-file=none`` is rejected as a
+filled). Offline + ``--sift-log-file=none`` is rejected as a
 usage error since the log file is the sole sink in this mode.
 """
 
@@ -43,11 +43,11 @@ def test_log_file_none_incompatible_with_offline(
         pytester: pytest.Pytester,
         write_plugin_conftest: Callable[[], None],
     ) -> None:
-        """``--sift-test-results-log-file=none`` + ``--sift-offline`` is a usage error."""
+        """``--sift-log-file=none`` + ``--sift-offline`` is a usage error."""
         write_plugin_conftest()
         pytester.makepyfile("def test_should_not_run(): pass")
         result = pytester.runpytest_subprocess(
-            "--sift-offline", "--sift-test-results-log-file=none"
+            "--sift-offline", "--sift-log-file=none"
         )
         assert result.ret != 0
         combined = "\n".join(result.outlines + result.errlines)
@@ -96,7 +96,7 @@ def test_one(step):
             """
         )
         result = pytester.runpytest_subprocess(
-            "--sift-offline", f"--sift-test-results-log-file={log_path}"
+            "--sift-offline", f"--sift-log-file={log_path}"
         )
         result.assert_outcomes(passed=1)
         assert log_path.exists(), f"offline mode did not create {log_path}"
@@ -116,8 +116,8 @@ def test_offline_skips_client_has_connection(
         """Offline mode never resolves ``client_has_connection``.
 
         Override the fixture to raise on resolution. If the override is
-        invoked, the session aborts; if it isn't, the inner test passes
-        cleanly — confirming the offline path skips the ping check.
+        invoked, the session aborts. If it isn't, the inner test passes
+        cleanly, which confirms the offline path skipped the ping check.
         """
         pytester.makeconftest(
             """
diff --git a/python/lib/sift_client/_tests/util/conftest.py b/python/lib/sift_client/_tests/util/conftest.py
index 2f371e69e..9e255da8a 100644
--- a/python/lib/sift_client/_tests/util/conftest.py
+++ b/python/lib/sift_client/_tests/util/conftest.py
@@ -7,13 +7,13 @@
 
 def pytest_configure(config: pytest.Config) -> None:
     """Configure the pytest configuration to disable the Sift test results log file."""
-    config.option.sift_test_results_log_file = False
+    config.option.sift_log_file = False
 
 
 def pytest_collection_modifyitems(config: pytest.Config, items: "list[pytest.Item]") -> None:
     """Bulk-apply ``@pytest.mark.sift_include`` to integration tests under util/.
 
-    The project-wide default in ``pyproject.toml`` is ``sift_test_results_autouse
+    The project-wide default in ``pyproject.toml`` is ``sift_autouse
     = false`` so unit tests pay nothing for the globally-loaded Sift plugin.
     Integration tests in this subtree still need the autouse fixtures, so this
     hook flips the gate back on for any test already marked
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index dec3b3625..774d60a0d 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -49,25 +49,25 @@ class _Option:
 
 
 _LOG_FILE = _Option(
-    cli_flag="--sift-test-results-log-file",
-    ini_name="sift_test_results_log_file",
+    cli_flag="--sift-log-file",
+    ini_name="sift_log_file",
     cli_help="Path to write the Sift test result log file. "
     "Use 'true' (default) to auto-create a temp file, "
     "False, 'false', or 'none' to disable logging, "
     "or a file path to write to a specific location.",
-    ini_help="Default value for --sift-test-results-log-file. Same values "
-    "accepted as the CLI flag (path, 'true', 'false', 'none').",
+    ini_help="Default value for --sift-log-file. Same values accepted as "
+    "the CLI flag (path, 'true', 'false', 'none').",
 )
 
 _GIT_METADATA = _Option(
-    cli_flag="--no-sift-test-results-git-metadata",
-    ini_name="sift_test_results_git_metadata",
+    cli_flag="--no-sift-git-metadata",
+    ini_name="sift_git_metadata",
     action="store_false",
     cli_help="Exclude git metadata from the Sift test results. "
     "Git metadata (repo, branch, commit) is included by default.",
     ini_help="Include git repo/branch/commit in the report (true/false). "
-    "Defaults to true. The --no-sift-test-results-git-metadata CLI flag "
-    "overrides this when passed.",
+    "Defaults to true. The --no-sift-git-metadata CLI flag overrides "
+    "this when passed.",
     ini_type="bool",
     ini_default=True,
 )
@@ -115,7 +115,7 @@ class _Option:
 )
 
 _AUTOUSE = _Option(
-    ini_name="sift_test_results_autouse",
+    ini_name="sift_autouse",
     ini_help="Default for the Sift autouse fixtures (report_context, step, "
     "module_substep). When true (default), tests are included unless marked "
     "with @pytest.mark.sift_exclude. When false, tests are skipped unless "
@@ -166,12 +166,12 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line(
         "markers",
         "sift_include: force the Sift autouse fixtures to activate for this test "
-        "regardless of the `sift_test_results_autouse` ini default.",
+        "regardless of the `sift_autouse` ini default.",
     )
     config.addinivalue_line(
         "markers",
         "sift_exclude: force the Sift autouse fixtures to skip this test "
-        "regardless of the `sift_test_results_autouse` ini default.",
+        "regardless of the `sift_autouse` ini default.",
     )
 
 
@@ -240,7 +240,7 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool |
     * ``None`` — unset; nothing was passed on the CLI and the ini key is
       absent. Treat as the default "use a temp file."
     * Python ``False`` — an explicit disable, typically set in a conftest via
-      ``config.option.sift_test_results_log_file = False``. Return ``None`` so
+      ``config.option.sift_log_file = False``. Return ``None`` so
       the rest of the pipeline knows to skip logging entirely.
     * A string (from CLI or ini) — interpret ``"true"`` / ``"1"`` as the temp
       file default, ``"false"`` / ``"none"`` as disable, anything else as a
@@ -333,9 +333,9 @@ class _NoopStep(AbstractContextManager["_NoopStep"]):
     """Step shim mirroring ``NewStep``'s public surface without any I/O.
 
     Used by ``--sift-disabled`` mode so test code that calls
-    ``step.measure(...)`` / ``step.substep(...)`` keeps working without any
-    Sift configuration. ``measure*`` calls evaluate bounds locally so pass/fail
-    outcomes match the real plugin exactly.
+    ``step.measure(...)`` or ``step.substep(...)`` keeps working without
+    any Sift configuration. ``measure*`` calls evaluate bounds locally and
+    return a real pass/fail boolean.
     """
 
     def __init__(self, name: str, description: str | None = None) -> None:
@@ -510,27 +510,28 @@ def report_context(
 ) -> Generator[ReportContext | _NoopReportContext, None, None]:
     """Lazy session-scoped Sift ReportContext.
 
-    The fixture is no longer autouse; it's instantiated on the first call to
-    ``request.getfixturevalue("report_context")``, which today happens inside
-    the gated ``step`` and ``module_substep`` fixtures. If every test in the
-    session is excluded via the marker gate, this fixture is never resolved
-    and no ReportContext (and no teardown subprocess) is created.
-
-    Mode selection:
-
-    * ``--sift-disabled`` — yield a stub context (no client, no network, no
-      log file). Test code calling ``step.measure(...)`` keeps working;
-      bounds are evaluated locally so pass/fail outcomes match the real
-      plugin exactly.
-    * ``--sift-offline`` — skip the session-start ping, route all
-      create/update calls through the JSONL log file, and don't spawn the
-      import-test-result-log replay subprocess at session end.
-    * default (online) — verify connectivity via the ``client_has_connection``
-      fixture. A failed ping aborts the session with ``pytest.UsageError``,
-      naming both ``--sift-offline`` and ``--sift-disabled`` as escape hatches.
-
-    The log file destination is controlled by ``--sift-test-results-log-file``;
-    defaults to a temp file when not set.
+    The fixture is no longer autouse; it's instantiated on the first call
+    to ``request.getfixturevalue("report_context")``, which today happens
+    inside the gated ``step`` and ``module_substep`` fixtures. If every
+    test in the session is excluded via the marker gate, this fixture is
+    never resolved and no ReportContext (or teardown subprocess) is created.
+
+    What gets yielded depends on the mode:
+
+    * ``--sift-disabled``: a stub context with no client, no network, and
+      no log file. Test code that calls ``step.measure(...)`` keeps
+      working because bounds are evaluated locally.
+    * ``--sift-offline``: a real ReportContext, but the session-start ping
+      is skipped, all create/update calls go to the JSONL log file, and
+      the import-test-result-log replay subprocess is not spawned at
+      session end.
+    * default (online): verify connectivity via ``client_has_connection``
+      before constructing the context. A failed ping aborts the session
+      with ``pytest.UsageError`` and points at ``--sift-offline`` and
+      ``--sift-disabled`` as escape hatches.
+
+    The log-file destination is controlled by
+    ``--sift-log-file``; defaults to a temp file when unset.
     """
     if _is_disabled(pytestconfig):
         global REPORT_CONTEXT
@@ -581,7 +582,7 @@ def step(
 
     Resolves the gate via `_sift_enabled_for(request.node, ini_default)`:
     `sift_exclude` marker forces off, `sift_include` forces on, otherwise the
-    `sift_test_results_autouse` ini default applies. When on, requests the
+    `sift_autouse` ini default applies. When on, requests the
     session `report_context` lazily — the first gated test in the session
     triggers its creation, subsequent gated tests reuse it. In
     ``--sift-disabled`` mode the report context is a stub, so the gate still
diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py
index ebd39bc08..daed444ef 100644
--- a/python/lib/sift_client/util/test_results/__init__.py
+++ b/python/lib/sift_client/util/test_results/__init__.py
@@ -68,13 +68,13 @@ def main(self):
 
 Note: FedRAMP users: results are buffered to a temp file and uploaded by a
 subprocess at session end (no API calls during the run). Disable the buffer
-entirely with `--sift-test-results-log-file=false` for inline uploads.
+entirely with `--sift-log-file=false` for inline uploads.
 
 ### Controlling which tests produce reports
 
 The autouse fixtures fire for every test by default. To narrow that:
 
-- Set `sift_test_results_autouse = false` in `pyproject.toml` to flip the
+- Set `sift_autouse = false` in `pyproject.toml` to flip the
   project default off, then opt tests back in below.
 - `@pytest.mark.sift_include` forces reporting on for a test, class, or
   module. `@pytest.mark.sift_exclude` forces it off. Closest marker wins.
@@ -111,17 +111,17 @@ def pytest_collection_modifyitems(config, items):
 - `--sift-disabled`: Skip Sift entirely. Autouse fixtures yield stub objects;
   `step.measure(...)` still returns real pass/fail booleans by evaluating
   bounds locally, but nothing is sent to Sift and no log file is written.
-  Also honored via the `SIFT_DISABLED` env var. Incompatible with `--sift-offline`.
-- `--sift-test-results-log-file`: Path to write the JSONL log file. `true`
+  Also honored via the `SIFT_DISABLED` env var. Supersedes every other flag.
+- `--sift-log-file`: Path to write the JSONL log file. `true`
   (default) auto-creates a temp file. `false` or `none` disables logging.
   Any other value is treated as a file path.
-- `--no-sift-test-results-git-metadata`: Exclude git metadata (repo, branch,
+- `--no-sift-git-metadata`: Exclude git metadata (repo, branch,
   commit) from the test report. Included by default.
 
 Each option has a matching ini key for per-project configuration under
 ``[tool.pytest.ini_options]`` in ``pyproject.toml`` (or ``[pytest]`` in
 ``pytest.ini``). CLI flags override ini values. The
-``sift_test_results_autouse`` ini key (bool, default ``true``) sets the
+``sift_autouse`` ini key (bool, default ``true``) sets the
 project-wide default for the gate described above. The default
 ``sift_client`` fixture reads ``sift_grpc_uri`` and ``sift_rest_uri`` as
 fallbacks when the corresponding env vars are unset (env vars win when
@@ -130,9 +130,9 @@ def pytest_collection_modifyitems(config, items):
 
 ```toml
 [tool.pytest.ini_options]
-sift_test_results_autouse = false
+sift_autouse = false
 sift_offline = true
-sift_test_results_git_metadata = false
+sift_git_metadata = false
 sift_grpc_uri = "your-org.sift.example:443"
 sift_rest_uri = "https://your-org.sift.example"
 ```
diff --git a/python/lib/sift_client/util/test_results/bounds.py b/python/lib/sift_client/util/test_results/bounds.py
index 49fbfa0bf..99cd4956e 100644
--- a/python/lib/sift_client/util/test_results/bounds.py
+++ b/python/lib/sift_client/util/test_results/bounds.py
@@ -22,8 +22,8 @@ def to_numpy_array(
 ) -> NDArray[np.float64]:
     """Normalize a list / ndarray / pandas Series into a numpy array.
 
-    Shared by ``measure_avg`` and ``measure_all`` in both the real plugin and
-    the no-op sibling so the accepted input types stay in sync.
+    Shared by ``measure_avg`` and ``measure_all`` in both the real and
+    stub step implementations so the accepted input types stay in sync.
     """
     if isinstance(values, list):
         return np.array(values)
diff --git a/python/pyproject.toml b/python/pyproject.toml
index ffaa84b34..45e9c98e3 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -395,11 +395,10 @@ env_files = [
 # use its fixtures. Unit-test runs are flipped to `--sift-disabled` mode by
 # `lib/sift_client/_tests/conftest.py`.
 addopts = "-p pytester -p sift_client.pytest_plugin"
-# The Sift plugin is loaded for the whole project via `python/conftest.py`.
-# The autouse gate defaults to off here so unit tests don't use it. The
-# integration subtree (lib/sift_client/_tests/util/) opts back in via
-# `pytest.mark.sift_include` applied in its conftest.
-sift_test_results_autouse = false
+# The autouse gate defaults to off so unit tests don't use the Sift
+# fixtures. The integration subtree (lib/sift_client/_tests/util/) opts
+# back in via `pytest.mark.sift_include` applied in its conftest.
+sift_autouse = false
 testpaths = [
     "lib/sift_py",
     "lib/sift_client/_tests",

From 785c174de58db367d78df44fcffdb92a9ba7ebf0 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 09:49:46 -0700
Subject: [PATCH 08/18] format

---
 .../_tests/pytest_plugin/test_configuration.py            | 4 +---
 .../lib/sift_client/_tests/pytest_plugin/test_disabled.py | 4 +---
 .../lib/sift_client/_tests/pytest_plugin/test_offline.py  | 8 ++------
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
index 12f385480..4efb9f554 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_configuration.py
@@ -170,9 +170,7 @@ def test_cli_overrides_ini(
             """
         )
         pytester.makepyfile("def test_noop(): pass")
-        result = pytester.runpytest_subprocess(
-            "-s", "--co", f"--sift-log-file={cli_path}"
-        )
+        result = pytester.runpytest_subprocess("-s", "--co", f"--sift-log-file={cli_path}")
         result.stdout.fnmatch_lines([f"RESOLVED: {cli_path}"])
 
     def test_cli_offline_flag(
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
index 61a8d3d39..499d18e6b 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -135,9 +135,7 @@ def test_disabled_writes_no_log_file_even_when_path_pinned(
         log_path = tmp_path / "should-not-exist.jsonl"
         write_plugin_conftest()
         pytester.makepyfile("def test_runs(step): step.measure(name='v', value=1.0)")
-        result = pytester.runpytest_subprocess(
-            "--sift-disabled", f"--sift-log-file={log_path}"
-        )
+        result = pytester.runpytest_subprocess("--sift-disabled", f"--sift-log-file={log_path}")
         result.assert_outcomes(passed=1)
         assert not log_path.exists(), f"log file unexpectedly created at {log_path}"
 
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
index 5a291f2d3..b15bfbd3b 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
@@ -46,9 +46,7 @@ def test_log_file_none_incompatible_with_offline(
         """``--sift-log-file=none`` + ``--sift-offline`` is a usage error."""
         write_plugin_conftest()
         pytester.makepyfile("def test_should_not_run(): pass")
-        result = pytester.runpytest_subprocess(
-            "--sift-offline", "--sift-log-file=none"
-        )
+        result = pytester.runpytest_subprocess("--sift-offline", "--sift-log-file=none")
         assert result.ret != 0
         combined = "\n".join(result.outlines + result.errlines)
         assert "incompatible with --sift-offline" in combined, combined
@@ -95,9 +93,7 @@ def test_one(step):
                 ) is True
             """
         )
-        result = pytester.runpytest_subprocess(
-            "--sift-offline", f"--sift-log-file={log_path}"
-        )
+        result = pytester.runpytest_subprocess("--sift-offline", f"--sift-log-file={log_path}")
         result.assert_outcomes(passed=1)
         assert log_path.exists(), f"offline mode did not create {log_path}"
         content = log_path.read_text()

From ddac199b4d64b2a8b60894041b106efe6a7145f4 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 10:15:43 -0700
Subject: [PATCH 09/18] fix issue with exit codes and pre push hook

---
 python/lib/sift_client/pytest_plugin.py | 14 +++++++++-----
 python/scripts/dev                      |  3 ++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index 774d60a0d..d46a2b89d 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -428,6 +428,15 @@ def new_step(
     ("SIFT_REST_URI", _REST_URI),
 )
 
+# Placeholder credentials used in --sift-offline mode when env/ini values
+# are missing. Offline mode never makes network calls, so the values are
+# only syntactically required by SiftConnectionConfig.
+_OFFLINE_DEFAULTS = {
+    "SIFT_API_KEY": "offline",
+    "SIFT_GRPC_URI": "offline.invalid:0",
+    "SIFT_REST_URI": "http://offline.invalid",
+}
+
 
 def _resolve_credential(
     pytestconfig: pytest.Config | None, env_name: str, opt: _Option | None
@@ -486,11 +495,6 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
             "conftest.py, or pass --sift-offline / --sift-disabled to run "
             "without contacting Sift."
         )
-    _OFFLINE_DEFAULTS = {
-        "SIFT_API_KEY": "offline",
-        "SIFT_GRPC_URI": "offline.invalid:0",
-        "SIFT_REST_URI": "http://offline.invalid",
-    }
     for env in missing:
         resolved[env] = _OFFLINE_DEFAULTS[env]
     # `or ""` is unreachable in practice since the `missing` check above guarantees
diff --git a/python/scripts/dev b/python/scripts/dev
index 5a1397803..81d5ec826 100755
--- a/python/scripts/dev
+++ b/python/scripts/dev
@@ -201,4 +201,5 @@ case "$1" in
         ;;
 esac
 
-exit 0
+# Leave the script's exit code as the subcommand's. A trailing `exit 0` here
+# silently masked ruff / mypy / pytest failures from the pre-push hook.

From 3d6d504c52ec16e904bec8b2aa5a77ba81d7fdd8 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 11:24:49 -0700
Subject: [PATCH 10/18] lint

---
 python/lib/sift_client/pytest_plugin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index d46a2b89d..c99208ac8 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -329,7 +329,7 @@ def update(self, *_args: Any, **_kwargs: Any) -> None:
         return None
 
 
-class _NoopStep(AbstractContextManager["_NoopStep"]):
+class _NoopStep(AbstractContextManager):
     """Step shim mirroring ``NewStep``'s public surface without any I/O.
 
     Used by ``--sift-disabled`` mode so test code that calls

From cd1dd7a12dd236d17d6bebc131cda9fb5f4eeba2 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 11:55:33 -0700
Subject: [PATCH 11/18] remove tests from wheel

---
 python/pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 45e9c98e3..ae8ed9f54 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -333,6 +333,7 @@ ignore_errors = true
 
 [tool.setuptools.packages.find]
 where = ["lib"]
+exclude = ["sift_client._tests", "sift_client._tests.*"]
 
 [tool.setuptools.package-data]
 sift_grafana = ["py.typed"]

From 8c82676084f36a536609775638583a28a905b733 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Tue, 19 May 2026 12:41:35 -0700
Subject: [PATCH 12/18] CI fix attempt to use importlib

---
 python/pyproject.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index ae8ed9f54..e1523bb43 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -395,7 +395,11 @@ env_files = [
 # The Sift pytest plugin is loaded so the project's own integration tests can
 # use its fixtures. Unit-test runs are flipped to `--sift-disabled` mode by
 # `lib/sift_client/_tests/conftest.py`.
-addopts = "-p pytester -p sift_client.pytest_plugin"
+# `--import-mode=importlib` loads test files by path with unique synthetic
+# module names. The default `prepend` mode would try to import
+# `lib/sift_client/_tests/conftest.py` as `sift_client._tests.conftest`, which
+# fails because `_tests` is excluded from the wheel (see packages.find above).
+addopts = "-p pytester -p sift_client.pytest_plugin --import-mode=importlib"
 # The autouse gate defaults to off so unit tests don't use the Sift
 # fixtures. The integration subtree (lib/sift_client/_tests/util/) opts
 # back in via `pytest.mark.sift_include` applied in its conftest.

From 9e0381f482ed0ce327cf7a0994e4d8dedc71e1fd Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Wed, 20 May 2026 12:28:54 -0700
Subject: [PATCH 13/18] remove noop path and leverage simulated more heavily
 for the disabled path

---
 .../low_level_wrappers/test_results.py        |  33 +++-
 .../_tests/pytest_plugin/test_disabled.py     |  14 +-
 .../_tests/pytest_plugin/test_offline.py      |   9 +-
 python/lib/sift_client/client.py              |   5 +
 python/lib/sift_client/pytest_plugin.py       | 187 ++++--------------
 .../lib/sift_client/resources/test_results.py |  23 ++-
 .../sift_types/_mixins/simulated.py           |  32 +++
 .../lib/sift_client/sift_types/test_report.py |  13 +-
 .../util/test_results/context_manager.py      |   9 +
 9 files changed, 153 insertions(+), 172 deletions(-)
 create mode 100644 python/lib/sift_client/sift_types/_mixins/simulated.py

diff --git a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py
index d15f86c48..ff0c2b515 100644
--- a/python/lib/sift_client/_internal/low_level_wrappers/test_results.py
+++ b/python/lib/sift_client/_internal/low_level_wrappers/test_results.py
@@ -3,7 +3,7 @@
 import logging
 import uuid
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, TypeVar, cast
 
 from google.protobuf import json_format
 from sift.test_reports.v1.test_reports_pb2 import (
@@ -68,6 +68,9 @@
 logger = logging.getLogger(__name__)
 
 
+_EntityT = TypeVar("_EntityT", TestReport, TestStep, TestMeasurement)
+
+
 class TestResultsLowLevelClient(LowLevelClientBase, WithGrpcClient):
     """Low-level client for the TestResultsAPI.
 
@@ -82,6 +85,16 @@ def __init__(self, grpc_client: GrpcClient):
         """
         super().__init__(grpc_client)
 
+    @staticmethod
+    def _mark_simulated(instance: _EntityT) -> _EntityT:
+        """Stamp an entity as having been produced by the simulate path.
+
+        Mirrors the ``__dict__`` write used by ``BaseType._apply_client_to_instance``
+        to bypass pydantic's frozen-model guard.
+        """
+        instance.__dict__["_simulated"] = True
+        return instance
+
     @staticmethod
     def simulate_create_test_report_response(
         request: CreateTestReportRequest,
@@ -387,7 +400,7 @@ async def create_test_report(
                     request,
                     response_id=simulated_proto.test_report_id,
                 )
-            return TestReport._from_proto(simulated_proto)
+            return self._mark_simulated(TestReport._from_proto(simulated_proto))
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestReport(request)
         grpc_test_report = cast("CreateTestReportResponse", response).test_report
@@ -505,7 +518,9 @@ async def update_test_report(
         if log_file is not None or simulate:
             if log_file is not None:
                 log_request_to_file(log_file, "UpdateTestReport", request)
-            return self.simulate_update_test_report_response(request, existing=existing)
+            return self._mark_simulated(
+                self.simulate_update_test_report_response(request, existing=existing)
+            )
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestReport(request)
         grpc_test_report = cast("UpdateTestReportResponse", response).test_report
@@ -560,7 +575,7 @@ async def create_test_step(
                     request,
                     response_id=simulated_proto.test_step_id,
                 )
-            return TestStep._from_proto(simulated_proto)
+            return self._mark_simulated(TestStep._from_proto(simulated_proto))
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestStep(request)
         grpc_test_step = cast("CreateTestStepResponse", response).test_step
@@ -661,7 +676,9 @@ async def update_test_step(
         if log_file is not None or simulate:
             if log_file is not None:
                 log_request_to_file(log_file, "UpdateTestStep", request)
-            return self.simulate_update_test_step_response(request, existing=existing)
+            return self._mark_simulated(
+                self.simulate_update_test_step_response(request, existing=existing)
+            )
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestStep(request)
         grpc_test_step = cast("UpdateTestStepResponse", response).test_step
@@ -716,7 +733,7 @@ async def create_test_measurement(
                     request,
                     response_id=simulated_proto.measurement_id,
                 )
-            return TestMeasurement._from_proto(simulated_proto)
+            return self._mark_simulated(TestMeasurement._from_proto(simulated_proto))
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).CreateTestMeasurement(
             request
@@ -861,7 +878,9 @@ async def update_test_measurement(
         if log_file is not None or simulate:
             if log_file is not None:
                 log_request_to_file(log_file, "UpdateTestMeasurement", request)
-            return self.simulate_update_test_measurement_response(request, existing=existing)
+            return self._mark_simulated(
+                self.simulate_update_test_measurement_response(request, existing=existing)
+            )
 
         response = await self._grpc_client.get_stub(TestReportServiceStub).UpdateTestMeasurement(
             request
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
index 499d18e6b..cba4bc1ee 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_disabled.py
@@ -109,16 +109,20 @@ def test_disabled_yields_stub_fixtures(
         clear_sift_env: None,
         write_plugin_conftest: Callable[[], None],
     ) -> None:
-        """``step`` / ``report_context`` / ``module_substep`` are stub instances in disabled mode."""
+        """`report_context` / `step` / `module_substep` are real instances backed by a simulate client."""
         write_plugin_conftest()
         pytester.makepyfile(
             """
-            from sift_client.pytest_plugin import _NoopReportContext, _NoopStep
+            from sift_client.util.test_results import ReportContext
+            from sift_client.util.test_results.context_manager import NewStep
 
             def test_types(step, report_context, module_substep):
-                assert isinstance(step, _NoopStep)
-                assert isinstance(module_substep, _NoopStep)
-                assert isinstance(report_context, _NoopReportContext)
+                assert isinstance(report_context, ReportContext)
+                assert report_context.is_simulated is True
+                assert report_context.report.is_simulated is True
+                assert step.current_step.is_simulated is True
+                assert isinstance(step, NewStep)
+                assert isinstance(module_substep, NewStep)
             """
         )
         result = pytester.runpytest_subprocess("--sift-disabled")
diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
index b15bfbd3b..f0470bad3 100644
--- a/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
+++ b/python/lib/sift_client/_tests/pytest_plugin/test_offline.py
@@ -57,19 +57,20 @@ def test_offline_yields_real_fixtures(
         clear_sift_env: None,
         write_plugin_conftest: Callable[[], None],
     ) -> None:
-        """Offline mode keeps the real ``ReportContext`` / ``NewStep``, not the stubs."""
+        """Offline mode runs a real ReportContext; entities still report `is_simulated=True` because the log-file path synthesizes responses prior to replay."""
         write_plugin_conftest()
         pytester.makepyfile(
             """
-            from sift_client.pytest_plugin import _NoopReportContext, _NoopStep
             from sift_client.util.test_results import ReportContext
             from sift_client.util.test_results.context_manager import NewStep
 
             def test_types(step, report_context):
                 assert isinstance(report_context, ReportContext)
                 assert isinstance(step, NewStep)
-                assert not isinstance(report_context, _NoopReportContext)
-                assert not isinstance(step, _NoopStep)
+                assert report_context.client._simulate is False
+                # log-file mode synthesizes responses, so entities are flagged simulated.
+                assert report_context.is_simulated is True
+                assert step.current_step.is_simulated is True
             """
         )
         result = pytester.runpytest_subprocess("--sift-offline")
diff --git a/python/lib/sift_client/client.py b/python/lib/sift_client/client.py
index 95fd25b71..ff574adba 100644
--- a/python/lib/sift_client/client.py
+++ b/python/lib/sift_client/client.py
@@ -152,6 +152,11 @@ def __init__(
         WithGrpcClient.__init__(self, grpc_client=grpc_client)
         WithRestClient.__init__(self, rest_client=rest_client)
 
+        # When set, test-results writes return synthesized responses without
+        # contacting Sift. Read by `TestResultsAPIAsync._simulate`. Used by the
+        # pytest plugin's ``--sift-disabled`` mode.
+        self._simulate: bool = False
+
         self.ping = PingAPI(self)
         self.assets = AssetsAPI(self)
         self.calculated_channels = CalculatedChannelsAPI(self)
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index c99208ac8..ebbd6b5a0 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -1,30 +1,18 @@
 from __future__ import annotations
 
 import os
-from contextlib import AbstractContextManager
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator
 
-import numpy as np
 import pytest
 
 from sift_client import SiftClient, SiftConnectionConfig
 from sift_client.sift_types.test_report import TestStatus
 from sift_client.util.test_results import ReportContext
-from sift_client.util.test_results.bounds import (
-    all_within_bounds,
-    to_numpy_array,
-    value_passes_bounds,
-)
 
 if TYPE_CHECKING:
-    import pandas as pd
-    from numpy.typing import NDArray
-
-    from sift_client.sift_types.channel import Channel
-    from sift_client.sift_types.test_report import NumericBounds
     from sift_client.util.test_results.context_manager import NewStep
 
 REPORT_CONTEXT: Any = None
@@ -295,10 +283,14 @@ def _report_context_impl(
     else:
         base_name = "pytest " + " ".join(args) if args else "pytest"
         test_case = base_name
-    log_file = _resolve_log_file(pytestconfig)
+    # When the client is in simulate mode (disabled), bypass the log-file
+    # pipeline — pinning a path would create a file the simulate path never
+    # writes to — and skip the import subprocess (no real RPCs to replay).
+    simulate = sift_client._simulate
+    log_file: str | Path | bool | None = False if simulate else _resolve_log_file(pytestconfig)
     git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA)
     include_git_metadata = True if git_metadata is None else bool(git_metadata)
-    offline = _is_offline(pytestconfig)
+    offline = False if simulate else _is_offline(pytestconfig)
     with ReportContext(
         sift_client,
         name=f"{base_name} {datetime.now(timezone.utc).isoformat()}",
@@ -312,116 +304,6 @@ def _report_context_impl(
         yield context
 
 
-class _NoopTestStep:
-    """Stub for ``TestStep`` exposing only the attributes user code touches."""
-
-    description: str | None = None
-    status: Any = None
-
-    def update(self, *_args: Any, **_kwargs: Any) -> None:
-        return None
-
-
-class _NoopReport:
-    """Stub for ``TestReport`` exposing only ``.update(...)``."""
-
-    def update(self, *_args: Any, **_kwargs: Any) -> None:
-        return None
-
-
-class _NoopStep(AbstractContextManager):
-    """Step shim mirroring ``NewStep``'s public surface without any I/O.
-
-    Used by ``--sift-disabled`` mode so test code that calls
-    ``step.measure(...)`` or ``step.substep(...)`` keeps working without
-    any Sift configuration. ``measure*`` calls evaluate bounds locally and
-    return a real pass/fail boolean.
-    """
-
-    def __init__(self, name: str, description: str | None = None) -> None:
-        self.name = name
-        self.current_step = _NoopTestStep()
-        self.current_step.description = description
-
-    def __enter__(self) -> _NoopStep:
-        return self
-
-    def __exit__(self, exc_type, exc_value, tb) -> None:
-        return None
-
-    def measure(
-        self,
-        *,
-        name: str,
-        value: float | str | bool | int,
-        bounds: dict[str, float] | NumericBounds | str | bool | None = None,
-        timestamp: datetime | None = None,
-        unit: str | None = None,
-        description: str | None = None,
-        metadata: dict[str, str | float | bool] | None = None,
-        channel_names: list[str] | list[Channel] | None = None,
-    ) -> bool:
-        return value_passes_bounds(value, bounds)
-
-    def measure_avg(
-        self,
-        *,
-        name: str,
-        values: list[float | int] | NDArray[np.float64] | pd.Series,
-        bounds: dict[str, float] | NumericBounds,
-        timestamp: datetime | None = None,
-        unit: str | None = None,
-        description: str | None = None,
-        metadata: dict[str, str | float | bool] | None = None,
-        channel_names: list[str] | list[Channel] | None = None,
-    ) -> bool:
-        return value_passes_bounds(float(np.mean(to_numpy_array(values))), bounds)
-
-    def measure_all(
-        self,
-        *,
-        name: str,
-        values: list[float | int] | NDArray[np.float64] | pd.Series,
-        bounds: dict[str, float] | NumericBounds,
-        timestamp: datetime | None = None,
-        unit: str | None = None,
-        description: str | None = None,
-        metadata: dict[str, str | float | bool] | None = None,
-        channel_names: list[str] | list[Channel] | None = None,
-    ) -> bool:
-        return all_within_bounds(to_numpy_array(values), bounds)
-
-    def report_outcome(self, name: str, result: bool, reason: str | None = None) -> bool:
-        return result
-
-    def substep(
-        self,
-        name: str,
-        description: str | None = None,
-        metadata: dict[str, str | float | bool] | None = None,
-    ) -> _NoopStep:
-        return _NoopStep(name=name, description=description)
-
-    def update_step_from_result(self, *_args: Any, **_kwargs: Any) -> None:
-        return None
-
-
-class _NoopReportContext:
-    """Report context shim exposing only what user code or autouse hooks touch."""
-
-    def __init__(self) -> None:
-        self.report = _NoopReport()
-
-    def new_step(
-        self,
-        name: str,
-        description: str | None = None,
-        assertion_as_fail_not_error: bool = True,
-        metadata: dict[str, str | float | bool] | None = None,
-    ) -> _NoopStep:
-        return _NoopStep(name=name, description=description)
-
-
 _CREDENTIAL_KEYS: tuple[tuple[str, _Option | None], ...] = (
     ("SIFT_API_KEY", None),  # env-only; never read from ini to keep secrets out of source control.
     ("SIFT_GRPC_URI", _GRPC_URI),
@@ -438,6 +320,24 @@ def new_step(
 }
 
 
+def _build_disabled_client() -> SiftClient:
+    """Construct a SiftClient for ``--sift-disabled`` mode.
+
+    Tagged with ``_simulate=True`` so test-results writes short-circuit through
+    the existing low-level simulate path without contacting Sift. The URLs are
+    syntactically valid but unreachable; nothing dials them.
+    """
+    client = SiftClient(
+        connection_config=SiftConnectionConfig(
+            api_key="disabled",
+            grpc_url="disabled.invalid:0",
+            rest_url="http://disabled.invalid",
+        )
+    )
+    client._simulate = True
+    return client
+
+
 def _resolve_credential(
     pytestconfig: pytest.Config | None, env_name: str, opt: _Option | None
 ) -> str | None:
@@ -475,13 +375,7 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
     always used.
     """
     if _is_disabled(pytestconfig):
-        return SiftClient(
-            connection_config=SiftConnectionConfig(
-                api_key=os.getenv("SIFT_API_KEY") or "disabled",
-                grpc_url=os.getenv("SIFT_GRPC_URI") or "disabled.invalid:0",
-                rest_url=os.getenv("SIFT_REST_URI") or "http://disabled.invalid",
-            )
-        )
+        return _build_disabled_client()
     resolved = {env: _resolve_credential(pytestconfig, env, opt) for env, opt in _CREDENTIAL_KEYS}
     missing = [env for env, value in resolved.items() if not value]
     if missing and not _is_offline(pytestconfig):
@@ -511,7 +405,7 @@ def sift_client(pytestconfig: pytest.Config) -> SiftClient:
 @pytest.fixture(scope="session")
 def report_context(
     request: pytest.FixtureRequest, pytestconfig: pytest.Config
-) -> Generator[ReportContext | _NoopReportContext, None, None]:
+) -> Generator[ReportContext, None, None]:
     """Lazy session-scoped Sift ReportContext.
 
     The fixture is no longer autouse; it's instantiated on the first call
@@ -522,9 +416,12 @@ def report_context(
 
     What gets yielded depends on the mode:
 
-    * ``--sift-disabled``: a stub context with no client, no network, and
-      no log file. Test code that calls ``step.measure(...)`` keeps
-      working because bounds are evaluated locally.
+    * ``--sift-disabled``: a real ``ReportContext`` against a placeholder
+      ``SiftClient`` with ``_simulate=True``. Every test-results write
+      returns a synthesized response without contacting Sift; no log file
+      is written; the replay subprocess never spawns. Test code that calls
+      ``step.measure(...)`` keeps working because bounds are evaluated as
+      usual and routed through the simulate path.
     * ``--sift-offline``: a real ReportContext, but the session-start ping
       is skipped, all create/update calls go to the JSONL log file, and
       the import-test-result-log replay subprocess is not spawned at
@@ -538,10 +435,9 @@ def report_context(
     ``--sift-log-file``; defaults to a temp file when unset.
     """
     if _is_disabled(pytestconfig):
-        global REPORT_CONTEXT
-        ctx = _NoopReportContext()
-        REPORT_CONTEXT = ctx
-        yield ctx
+        yield from _report_context_impl(
+            _build_disabled_client(), request, pytestconfig=pytestconfig
+        )
         return
     sift_client = request.getfixturevalue("sift_client")
     if not _is_offline(pytestconfig):
@@ -561,8 +457,8 @@ def report_context(
 
 
 def _step_impl(
-    report_context: ReportContext | _NoopReportContext, request: pytest.FixtureRequest
-) -> Generator[NewStep | _NoopStep, None, None]:
+    report_context: ReportContext, request: pytest.FixtureRequest
+) -> Generator[NewStep, None, None]:
     name = str(request.node.name)
     existing_docstring = request.node.obj.__doc__ or None
     with report_context.new_step(
@@ -581,7 +477,7 @@ def _step_impl(
 def step(
     request: pytest.FixtureRequest,
     pytestconfig: pytest.Config,
-) -> Generator[NewStep | _NoopStep | None, None, None]:
+) -> Generator[NewStep | None, None, None]:
     """Create an outer step for the function when the Sift gate is on.
 
     Resolves the gate via `_sift_enabled_for(request.node, ini_default)`:
@@ -589,8 +485,9 @@ def step(
     `sift_autouse` ini default applies. When on, requests the
     session `report_context` lazily — the first gated test in the session
     triggers its creation, subsequent gated tests reuse it. In
-    ``--sift-disabled`` mode the report context is a stub, so the gate still
-    runs but the resulting step is a no-op shim.
+    ``--sift-disabled`` mode the report context is backed by a
+    ``SiftClient(_simulate=True)`` placeholder, so every write returns a
+    synthesized response without contacting Sift.
     """
     default = bool(_option_or_ini(pytestconfig, _AUTOUSE))
     if not _sift_enabled_for(request.node, default):
@@ -604,7 +501,7 @@ def step(
 def module_substep(
     request: pytest.FixtureRequest,
     pytestconfig: pytest.Config,
-) -> Generator[NewStep | _NoopStep | None, None, None]:
+) -> Generator[NewStep | None, None, None]:
     """Create a per-module step when at least one test in the module is gated on.
 
     Inspects the module's collected items rather than gating on a single marker,
diff --git a/python/lib/sift_client/resources/test_results.py b/python/lib/sift_client/resources/test_results.py
index 22e984b5e..9e88b6081 100644
--- a/python/lib/sift_client/resources/test_results.py
+++ b/python/lib/sift_client/resources/test_results.py
@@ -96,6 +96,7 @@ async def create(
         created_report = await self._low_level_client.create_test_report(
             test_report=test_report,
             log_file=log_file,
+            simulate=self.client._simulate,
         )
         return self._finalize(created_report, log_file)
 
@@ -271,7 +272,7 @@ async def update(
         update.resource_id = test_report_id
         existing = test_report if isinstance(test_report, TestReport) else None
         updated_test_report = await self._low_level_client.update_test_report(
-            update, log_file=log_file, existing=existing
+            update, log_file=log_file, existing=existing, simulate=self.client._simulate
         )
         return self._finalize(updated_test_report, log_file)
 
@@ -319,7 +320,7 @@ async def create_step(
         if isinstance(test_step, dict):
             test_step = TestStepCreate.model_validate(test_step)
         test_step_result = await self._low_level_client.create_test_step(
-            test_step, log_file=log_file
+            test_step, log_file=log_file, simulate=self.client._simulate
         )
         return self._finalize(test_step_result, log_file)
 
@@ -450,7 +451,7 @@ async def update_step(
         update.resource_id = test_step_id
         existing = test_step if isinstance(test_step, TestStep) else None
         updated_test_step = await self._low_level_client.update_test_step(
-            update, log_file=log_file, existing=existing
+            update, log_file=log_file, existing=existing, simulate=self.client._simulate
         )
         return self._finalize(updated_test_step, log_file)
 
@@ -484,10 +485,10 @@ async def create_measurement(
         if isinstance(test_measurement, dict):
             test_measurement = TestMeasurementCreate.model_validate(test_measurement)
         test_measurement_result = await self._low_level_client.create_test_measurement(
-            test_measurement, log_file=log_file
+            test_measurement, log_file=log_file, simulate=self.client._simulate
         )
         measurement = self._finalize(test_measurement_result, log_file)
-        if update_step and log_file is None:
+        if update_step and log_file is None and not self.client._simulate:
             step = await self.get_step(test_step=test_measurement_result.test_step_id)
             if step.status == TestStatus.PASSED and not measurement.passed:
                 await self.update_step(test_step=step, update={"status": TestStatus.FAILED})
@@ -508,7 +509,7 @@ async def create_measurements(
             A tuple of (measurements_created_count, measurement_ids).
         """
         return await self._low_level_client.create_test_measurements(
-            test_measurements, log_file=log_file
+            test_measurements, log_file=log_file, simulate=self.client._simulate
         )
 
     async def list_measurements(
@@ -621,10 +622,16 @@ async def update_measurement(
 
         update.resource_id = test_measurement.id_
         updated_test_measurement = await self._low_level_client.update_test_measurement(
-            update, log_file=log_file, existing=test_measurement
+            update, log_file=log_file, existing=test_measurement, simulate=self.client._simulate
         )
         updated_test_measurement = self._finalize(updated_test_measurement, log_file)
-        if update_step and log_file is None and update.passed is not None and not update.passed:
+        if (
+            update_step
+            and log_file is None
+            and not self.client._simulate
+            and update.passed is not None
+            and not update.passed
+        ):
             step = await self.get_step(test_step=updated_test_measurement.test_step_id)
             if step.status == TestStatus.PASSED:
                 await self.update_step(test_step=step, update={"status": TestStatus.FAILED})
diff --git a/python/lib/sift_client/sift_types/_mixins/simulated.py b/python/lib/sift_client/sift_types/_mixins/simulated.py
new file mode 100644
index 000000000..bdc2c572a
--- /dev/null
+++ b/python/lib/sift_client/sift_types/_mixins/simulated.py
@@ -0,0 +1,32 @@
+"""Mixin that exposes ``is_simulated`` on test-results entity types."""
+
+from __future__ import annotations
+
+
+class SimulatedMixin:
+    """Mixin for sift_types whose response can be produced by the simulate path.
+
+    The low-level wrapper stamps ``_simulated=True`` on entities it returns from
+    a simulated branch (see ``TestResultsLowLevelClient._mark_simulated``). This
+    mixin exposes that flag as a read-only ``is_simulated`` property so
+    consumers and tests can detect when an instance was synthesized rather than
+    round-tripped through Sift.
+
+    Inheriting classes are expected to declare a private field
+    ``_simulated: bool = False`` so pydantic tracks the default correctly.
+    """
+
+    _simulated: bool
+
+    @property
+    def is_simulated(self) -> bool:
+        """True when this instance was returned from the simulate path.
+
+        Set by the low-level wrapper when the call short-circuited to a
+        synthesized response (either ``SiftClient._simulate`` mode or per-call
+        ``log_file`` / ``simulate=True``). False for entities returned from a
+        normal online call or constructed manually outside the SDK. Offline
+        mode also reports True since responses are synthesized prior to
+        replay.
+        """
+        return self._simulated
diff --git a/python/lib/sift_client/sift_types/test_report.py b/python/lib/sift_client/sift_types/test_report.py
index ecc24f52f..c4abfc548 100644
--- a/python/lib/sift_client/sift_types/test_report.py
+++ b/python/lib/sift_client/sift_types/test_report.py
@@ -36,6 +36,7 @@
     ModelUpdate,
 )
 from sift_client.sift_types._mixins.file_attachments import FileAttachmentsMixin
+from sift_client.sift_types._mixins.simulated import SimulatedMixin
 from sift_client.sift_types.channel import Channel
 from sift_client.util.metadata import metadata_dict_to_proto, metadata_proto_to_dict
 
@@ -153,7 +154,7 @@ def to_proto(self) -> TestStepProto:
         return proto
 
 
-class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin):
+class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin, SimulatedMixin):
     """TestStep model representing a step in a test."""
 
     test_report_id: str
@@ -169,6 +170,8 @@ class TestStep(BaseType[TestStepProto, "TestStep"], FileAttachmentsMixin):
     metadata: dict[str, str | float | bool] | None = None
     # Set by the resource layer when this instance was produced from a logging-mode call
     _log_file: str | Path | None = None
+    # Set by the low-level wrapper when this instance came from the simulate path
+    _simulated: bool = False
 
     @classmethod
     def _from_proto(cls, proto: TestStepProto, sift_client: SiftClient | None = None) -> TestStep:
@@ -383,7 +386,7 @@ def to_proto(self) -> TestMeasurementProto:
         return proto
 
 
-class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"]):
+class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"], SimulatedMixin):
     """TestMeasurement model representing a measurement in a test."""
 
     measurement_type: TestMeasurementType
@@ -404,6 +407,8 @@ class TestMeasurement(BaseType[TestMeasurementProto, "TestMeasurement"]):
 
     # Set by the resource layer when this instance was produced from a logging-mode call
     _log_file: str | Path | None = None
+    # Set by the low-level wrapper when this instance came from the simulate path
+    _simulated: bool = False
 
     @classmethod
     def _from_proto(
@@ -599,7 +604,7 @@ def _to_proto(self) -> ErrorInfoProto:
         )
 
 
-class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin):
+class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin, SimulatedMixin):
     """TestReport model representing a test report."""
 
     status: TestStatus
@@ -617,6 +622,8 @@ class TestReport(BaseType[TestReportProto, "TestReport"], FileAttachmentsMixin):
     is_archived: bool
     # Set by the resource layer when this instance was produced from a logging-mode call
     _log_file: str | Path | None = None
+    # Set by the low-level wrapper when this instance came from the simulate path
+    _simulated: bool = False
 
     @classmethod
     def _from_proto(
diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py
index fe8579834..e1d9da6f1 100644
--- a/python/lib/sift_client/util/test_results/context_manager.py
+++ b/python/lib/sift_client/util/test_results/context_manager.py
@@ -216,6 +216,15 @@ def __exit__(self, exc_type, exc_value, traceback):
 
         return True
 
+    @property
+    def is_simulated(self) -> bool:
+        """True when this context's report came from the simulate path.
+
+        Delegates to ``self.report.is_simulated``; see ``TestReport.is_simulated``
+        for the full semantics.
+        """
+        return self.report.is_simulated
+
     def new_step(
         self,
         name: str,

From 30cd16d2e793d89c9bcdb3764e28ed65938472d8 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Wed, 20 May 2026 12:42:22 -0700
Subject: [PATCH 14/18] clean up docs

---
 python/docs/examples/pytest_plugin.md         | 22 +++++++++++--------
 python/lib/sift_client/pytest_plugin.py       | 10 ++++-----
 .../sift_client/util/test_results/__init__.py |  9 ++++----
 .../sift_client/util/test_results/bounds.py   | 11 +++-------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index f2fef6519..a2438effd 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -95,7 +95,7 @@ def sift_client() -> SiftClient:
 | Flag | Default | Effect |
 |---|---|---|
 | `--sift-offline` | off (online) | Skip the session-start ping and don't contact Sift. All create/update calls go to the JSONL log file for later replay via `import-test-result-log`. Missing `SIFT_*` env vars are tolerated; placeholders are filled. |
-| `--sift-disabled` | off | Skip Sift entirely. Autouse fixtures yield stub objects; `step.measure(...)` still returns real pass/fail booleans by evaluating bounds locally. Nothing is sent to Sift and no log file is written. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (if both `--sift-disabled` and `--sift-offline` are passed, disabled wins). |
+| `--sift-disabled` | off | Skip Sift entirely. Nothing contacts the API and no log file is written; `step.measure(...)` still evaluates bounds and returns a real pass/fail boolean. Also honored via `SIFT_DISABLED=1`. Supersedes every other flag (disabled wins over offline). |
 | `--sift-log-file=<path\|true\|false>` | temp file | Where the JSONL log of create/update calls goes. With a log file set, the plugin spawns an `import-test-result-log --incremental` worker that polls the file and replays entries against Sift while the run is in flight. Pass `false` to disable the file entirely; create/update calls then go straight to the API synchronously during tests. Incompatible with `--sift-offline` since offline mode needs the log file as its sole sink. |
 | `--no-sift-git-metadata` | git metadata on | Skip capturing git repo/branch/commit on the report's metadata. |
 
@@ -671,7 +671,7 @@ The plugin runs in one of three modes, picked at invocation:
 |---|---|---|---|---|---|
 | Online (default) | _(none)_ | yes (pings at session start, aborts if it fails) | optional write-through backup | real measurement against Sift | CI with Sift credentials, local dev hitting your tenant |
 | Offline | `--sift-offline` | none | required (the sole sink) | real measurement queued to log | field tests, air-gapped labs, CI without network |
-| Disabled | `--sift-disabled` | none | none | local bounds eval returning a real bool | local dev or CI that doesn't have (or want) Sift |
+| Disabled | `--sift-disabled` | none | none | bounds eval; returns a real bool | local dev or CI that doesn't have (or want) Sift |
 
 Pass both flags? Disabled wins. It's the "skip Sift entirely" hammer and
 supersedes everything else.
@@ -739,13 +739,17 @@ gone.
 
 ### Disabled mode (`--sift-disabled`)
 
-The plugin stays loaded so the autouse fixtures and markers still exist,
-but `report_context`, `step`, and `module_substep` yield stub objects.
-The stubs still do something useful: `step.measure(...)`,
-`step.measure_avg(...)`, `step.measure_all(...)`, and `step.substep(...)`
-all keep working, bounds are evaluated locally, and you get a real
-pass/fail boolean back. Nothing leaves the process. No log file, no
-`SIFT_*` env vars, no ini keys.
+The plugin stays loaded with the same fixtures and markers as the other
+modes. Nothing contacts Sift, no log file is written, and no `SIFT_*`
+env vars are required. `step.measure(...)`, `step.measure_avg(...)`,
+`step.measure_all(...)`, `step.substep(...)`, and
+`report_context.report.update({...})` all behave normally — bounds
+evaluate and you get a real pass/fail boolean back.
+
+Entities returned in disabled mode report `is_simulated == True` (on
+`TestReport`, `TestStep`, `TestMeasurement`, and `ReportContext`) so
+consumers and tests can branch on provenance. Offline-mode entities
+also report `is_simulated == True`.
 
 How to turn it on, in the order most projects pick:
 
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index ebbd6b5a0..559ec32ca 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -76,11 +76,11 @@ class _Option:
     cli_flag="--sift-disabled",
     ini_name="sift_disabled",
     action="store_true",
-    cli_help="Disable Sift integration entirely. Autouse fixtures yield stub "
-    "objects; `step.measure(...)` still returns real pass/fail booleans by "
-    "evaluating bounds locally, but nothing is sent to Sift and no log file "
-    "is written. Also honored via the `SIFT_DISABLED` env var. Supersedes "
-    "every other flag.",
+    cli_help="Disable Sift integration entirely. Nothing contacts the API "
+    "and no log file is written. `step.measure(...)` still returns real "
+    "pass/fail booleans. Returned entities expose `is_simulated == True`. "
+    "Also honored via the `SIFT_DISABLED` env var. Supersedes every other "
+    "flag.",
     ini_help="When true, run in disabled mode (same effect as --sift-disabled). "
     "Also honored via the SIFT_DISABLED env var. Supersedes every other "
     "setting. Defaults to false.",
diff --git a/python/lib/sift_client/util/test_results/__init__.py b/python/lib/sift_client/util/test_results/__init__.py
index daed444ef..ddce0326c 100644
--- a/python/lib/sift_client/util/test_results/__init__.py
+++ b/python/lib/sift_client/util/test_results/__init__.py
@@ -108,10 +108,11 @@ def pytest_collection_modifyitems(config, items):
 - `--sift-offline`: Run without contacting Sift. All create/update calls are
   written to the JSONL log file for later replay via `import-test-result-log`.
   No session-start ping is attempted.
-- `--sift-disabled`: Skip Sift entirely. Autouse fixtures yield stub objects;
-  `step.measure(...)` still returns real pass/fail booleans by evaluating
-  bounds locally, but nothing is sent to Sift and no log file is written.
-  Also honored via the `SIFT_DISABLED` env var. Supersedes every other flag.
+- `--sift-disabled`: Skip Sift entirely. Nothing contacts the API and no
+  log file is written. `step.measure(...)` still evaluates bounds and
+  returns a real pass/fail boolean. Returned entities expose
+  ``is_simulated == True``. Also honored via the `SIFT_DISABLED` env
+  var. Supersedes every other flag.
 - `--sift-log-file`: Path to write the JSONL log file. `true`
   (default) auto-creates a temp file. `false` or `none` disables logging.
   Any other value is treated as a file path.
diff --git a/python/lib/sift_client/util/test_results/bounds.py b/python/lib/sift_client/util/test_results/bounds.py
index 99cd4956e..b734cc126 100644
--- a/python/lib/sift_client/util/test_results/bounds.py
+++ b/python/lib/sift_client/util/test_results/bounds.py
@@ -22,8 +22,8 @@ def to_numpy_array(
 ) -> NDArray[np.float64]:
     """Normalize a list / ndarray / pandas Series into a numpy array.
 
-    Shared by ``measure_avg`` and ``measure_all`` in both the real and
-    stub step implementations so the accepted input types stay in sync.
+    Shared by ``measure_avg`` and ``measure_all`` on ``NewStep`` so the
+    accepted input types stay in sync across measurement variants.
     """
     if isinstance(values, list):
         return np.array(values)
@@ -90,12 +90,7 @@ def value_passes_bounds(
     value: float | str | bool,
     bounds: dict[str, float] | NumericBounds | str | bool | None,
 ) -> bool:
-    """Evaluate a value against bounds without recording a measurement.
-
-    Used by consumers that need pass/fail semantics matching the real plugin but
-    do not transmit a measurement (e.g. ``--sift-disabled`` mode in the pytest
-    plugin).
-    """
+    """Evaluate a value against bounds without recording a measurement."""
     if bounds is None:
         return True
     if isinstance(bounds, dict):

From 0ccc46d7dca1214cbf947956bec81da5c723254f Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Wed, 20 May 2026 13:04:27 -0700
Subject: [PATCH 15/18] clean up conditional

---
 python/lib/sift_client/_tests/conftest.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/python/lib/sift_client/_tests/conftest.py b/python/lib/sift_client/_tests/conftest.py
index 4fcb7dfe3..0b939ae39 100644
--- a/python/lib/sift_client/_tests/conftest.py
+++ b/python/lib/sift_client/_tests/conftest.py
@@ -81,14 +81,11 @@ def ci_pytest_tag(sift_client):
 def pytest_configure(config: pytest.Config) -> None:
     """Pick a Sift plugin mode based on whether integration tests are running.
 
-    Integration runs (``-m integration``) stay online with the log file
-    disabled so writes go inline against the real backend; missing
-    ``SIFT_*`` env vars surface as an actionable usage error from the
-    plugin's ``sift_client`` fixture. Every other run defaults to
-    ``--sift-disabled`` so unit tests don't need credentials.
+    Integration runs (``-m integration``) stay online with the default
+    log-file pipeline enabled so CI exercises the JSONL write + import
+    worker replay path that production users hit. Every other run defaults
+    to ``--sift-disabled`` so unit tests don't need credentials.
     """
     is_integration_run = "integration" in (config.option.markexpr or "")
-    if is_integration_run:
-        config.option.sift_log_file = False
-    else:
+    if not is_integration_run:
         config.option.sift_disabled = True

From 9c1f476f3f27bf9839655294557878fd5998378a Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Thu, 21 May 2026 13:34:30 -0700
Subject: [PATCH 16/18] improve clarity around replay and handle more failures
 with subprocess errors

---
 python/docs/examples/pytest_plugin.md         | 11 ++++
 python/lib/sift_client/pytest_plugin.py       | 15 ++---
 .../util/test_results/context_manager.py      | 58 +++++++++++++++----
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index a2438effd..a883601f2 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -687,6 +687,17 @@ This is loud on purpose. A CI run that silently no-ops on a flaky network
 won't get noticed until somebody goes looking for the report, which is
 usually weeks later, which is usually too late.
 
+With the default `--sift-log-file` setting on, create/update calls are
+written to a JSONL log file during the run and an
+`import-test-result-log --incremental` worker replays them against Sift
+in the background. If the worker hits a connection failure mid-session,
+the session keeps writing to the log file and the failure is logged at
+session end with the worker's stderr and a `replay-test-result-log`
+command for manual recovery. Test outcomes are unaffected — the local
+log file is preserved so you can re-import after fixing connectivity.
+Pass `--sift-log-file=false` to make every create/update synchronous
+against the API instead.
+
 #### Overriding the connection check
 
 Override `client_has_connection` when ping isn't the right signal, for
diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py
index 559ec32ca..494ded3b6 100644
--- a/python/lib/sift_client/pytest_plugin.py
+++ b/python/lib/sift_client/pytest_plugin.py
@@ -283,21 +283,22 @@ def _report_context_impl(
     else:
         base_name = "pytest " + " ".join(args) if args else "pytest"
         test_case = base_name
-    # When the client is in simulate mode (disabled), bypass the log-file
-    # pipeline — pinning a path would create a file the simulate path never
-    # writes to — and skip the import subprocess (no real RPCs to replay).
-    simulate = sift_client._simulate
-    log_file: str | Path | bool | None = False if simulate else _resolve_log_file(pytestconfig)
+    # Mode → ReportContext flags:
+    #   online (default): log_file=<temp or user path>, replay_log_file=True
+    #   --sift-offline:   log_file=<temp or user path>, replay_log_file=False
+    #   --sift-disabled:  log_file=False,               replay_log_file=False
+    disabled = sift_client._simulate
+    offline = False if disabled else _is_offline(pytestconfig)
+    log_file: str | Path | bool | None = False if disabled else _resolve_log_file(pytestconfig)
     git_metadata = _option_or_ini(pytestconfig, _GIT_METADATA)
     include_git_metadata = True if git_metadata is None else bool(git_metadata)
-    offline = False if simulate else _is_offline(pytestconfig)
     with ReportContext(
         sift_client,
         name=f"{base_name} {datetime.now(timezone.utc).isoformat()}",
         test_case=str(test_case),
         log_file=log_file,
         include_git_metadata=include_git_metadata,
-        offline=offline,
+        replay_log_file=not (disabled or offline),
     ) as context:
         global REPORT_CONTEXT
         REPORT_CONTEXT = context
diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py
index e1d9da6f1..e5b0f811f 100644
--- a/python/lib/sift_client/util/test_results/context_manager.py
+++ b/python/lib/sift_client/util/test_results/context_manager.py
@@ -120,7 +120,7 @@ def __init__(
         test_case: str | None = None,
         log_file: str | Path | bool | None = None,
         include_git_metadata: bool = False,
-        offline: bool = False,
+        replay_log_file: bool = True,
     ):
         """Initialize a new report context.
 
@@ -131,20 +131,25 @@ def __init__(
             system_operator: The operator of the test system. Will default to the current user if not provided.
             test_case: The name of the test case. Will default to the basename of the file containing the test if not provided.
             log_file: If True, create a temp log file. If a path, use that path.
-                All create/update operations will be logged to this file.
+                If False/None, no log file is written and create/update calls
+                the API.
             include_git_metadata: If True, include git metadata in the report.
-            offline: If True, do not spawn the import-test-result-log replay subprocess.
-                A log file is required and will be created as a temp file if not provided.
+            replay_log_file: When True (the default) and ``log_file`` is set,
+                spawn ``import-test-result-log --incremental`` to push log
+                entries to Sift in the background during the session. When
+                False, the log file is just a record and no worker is spawned.
+                Replay happens later via ``replay-test-result-log <path>``.
+                Has no effect when ``log_file`` is None.
         """
         self.client = client
-        self.offline = offline
+        self.replay_log_file = replay_log_file
         self.step_is_open = False
         self.step_stack = []
         self.step_number_at_depth = {}
         self.open_step_results = {}
         self.any_failures = False
 
-        if log_file is True or (offline and not log_file):
+        if log_file is True:
             tmp = tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False)
             self.log_file = Path(tmp.name)
             logger.info(f"Created temporary log file: {self.log_file}")
@@ -170,7 +175,11 @@ def __init__(
         self.report = client.test_results.create(create, log_file=self.log_file)
 
     def _open_import_proc(self):
-        """Open a subprocess to import the log file."""
+        """Open a subprocess to import the log file.
+
+        ``stderr`` is captured so a worker crash mid-session can surface its
+        error at session end via ``__exit__`` rather than failing silently.
+        """
         with _quiet_fork_stderr():
             self._import_proc = subprocess.Popen(
                 [
@@ -186,11 +195,11 @@ def _open_import_proc(self):
                 ],
                 stdin=subprocess.PIPE,
                 stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
             )
 
     def __enter__(self):
-        if self.log_file and not self.offline:
+        if self.log_file and self.replay_log_file:
             self._open_import_proc()
         return self
 
@@ -205,14 +214,41 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.report.update(update)
 
         if self._import_proc is not None:
+            # Three outcomes for the replay worker at session end:
+            #   1. Exits cleanly (returncode 0). Common happy path: stdin EOF
+            #      from communicate() tells the worker to drain and finish.
+            #   2. Still running after the 1s grace window (TimeoutExpired).
+            #      Typically a healthy worker with a large backlog. We kill
+            #      it, surface replay instructions, and raise — preserving
+            #      pre-existing behavior so a known-incomplete delivery
+            #      doesn't silently pass.
+            #   3. Exited with non-zero. This is where connection failures
+            #      and API call errors land: the worker's replay loop has
+            #      no retry, so the first failed RPC crashes the subprocess.
+            #      We log stderr at ERROR with replay instructions but do
+            #      NOT raise — tests already ran and their pass/fail is
+            #      independent of delivery. The local log file is preserved
+            #      for manual `replay-test-result-log <path>` recovery.
             try:
-                self._import_proc.communicate(timeout=1)
+                _, stderr_bytes = self._import_proc.communicate(timeout=1)
             except subprocess.TimeoutExpired:
-                logger.error("Import process did not exit in 10s, killing it")
+                logger.error("Import process did not exit in 1s, killing it")
                 self._import_proc.kill()
                 self._import_proc.wait()
                 log_replay_instructions(self.log_file)
                 raise
+            if self._import_proc.returncode != 0:
+                stderr_text = (
+                    stderr_bytes.decode("utf-8", errors="replace").strip()
+                    if stderr_bytes
+                    else ""
+                )
+                logger.error(
+                    "Import process exited with code %d. stderr: %s",
+                    self._import_proc.returncode,
+                    stderr_text or "<empty>",
+                )
+                log_replay_instructions(self.log_file)
 
         return True
 

From 60d3081e1255e77753a5d388490cd5b864aa6de3 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Thu, 21 May 2026 14:03:09 -0700
Subject: [PATCH 17/18] clean up subprocess test status reporting

---
 python/docs/examples/pytest_plugin.md         | 13 ++-
 .../_tests/util/test_report_context.py        | 95 +++++++++++++++++++
 .../util/test_results/context_manager.py      | 59 +++++++-----
 3 files changed, 134 insertions(+), 33 deletions(-)
 create mode 100644 python/lib/sift_client/_tests/util/test_report_context.py

diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md
index a883601f2..2ac298256 100644
--- a/python/docs/examples/pytest_plugin.md
+++ b/python/docs/examples/pytest_plugin.md
@@ -690,13 +690,12 @@ usually weeks later, which is usually too late.
 With the default `--sift-log-file` setting on, create/update calls are
 written to a JSONL log file during the run and an
 `import-test-result-log --incremental` worker replays them against Sift
-in the background. If the worker hits a connection failure mid-session,
-the session keeps writing to the log file and the failure is logged at
-session end with the worker's stderr and a `replay-test-result-log`
-command for manual recovery. Test outcomes are unaffected — the local
-log file is preserved so you can re-import after fixing connectivity.
-Pass `--sift-log-file=false` to make every create/update synchronous
-against the API instead.
+in the background. If the worker crashes mid-session (connection failure,
+API error) or is still draining its backlog at session end, the failure
+is logged at session end with a `replay-test-result-log` command for
+manual recovery — test outcomes are unaffected and the local log file is
+preserved. Pass `--sift-log-file=false` to make every create/update
+synchronous against the API instead.
 
 #### Overriding the connection check
 
diff --git a/python/lib/sift_client/_tests/util/test_report_context.py b/python/lib/sift_client/_tests/util/test_report_context.py
new file mode 100644
index 000000000..f12247c7a
--- /dev/null
+++ b/python/lib/sift_client/_tests/util/test_report_context.py
@@ -0,0 +1,95 @@
+"""Tier 1 tests for `ReportContext.__exit__`'s replay-worker handling.
+
+Each test substitutes the `import-test-result-log` argv with a tiny Python
+`-c` invocation that produces a controlled end-state (clean exit / hang /
+non-zero exit), then enters and exits a `ReportContext` against a
+simulate-mode `SiftClient`. This validates that real subprocess outcomes
+route to the right branch of `__exit__` without depending on the real
+replay binary or a Sift backend.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from typing import TYPE_CHECKING
+
+from sift_client import SiftClient, SiftConnectionConfig
+from sift_client.util.test_results import ReportContext
+
+if TYPE_CHECKING:
+    import pytest
+
+
+def _make_simulate_client() -> SiftClient:
+    """Build a SiftClient flagged for in-process simulation.
+
+    Constructor URLs are placeholders; nothing dials them because every
+    test-results write short-circuits through the simulate path.
+    """
+    client = SiftClient(
+        connection_config=SiftConnectionConfig(
+            api_key="test",
+            grpc_url="test.invalid:0",
+            rest_url="http://test.invalid",
+        )
+    )
+    client._simulate = True
+    return client
+
+
+def _make_context(command: list[str]) -> ReportContext:
+    """Build a ReportContext whose replay subprocess is the provided command.
+
+    `log_file=True` triggers the temp-file path so `_open_import_proc` fires
+    on `__enter__`. The substitute argv is swapped in via the public-ish
+    `_build_replay_command` hook so the production Popen kwargs stay
+    exercised.
+    """
+    rc = ReportContext(_make_simulate_client(), name="test", log_file=True)
+    rc._build_replay_command = lambda: command  # type: ignore[method-assign]
+    return rc
+
+
+def test_worker_clean_exit_is_silent(caplog: pytest.LogCaptureFixture) -> None:
+    """Worker exits with code 0 → __exit__ is silent (case 1)."""
+    rc = _make_context([sys.executable, "-c", "pass"])
+    with caplog.at_level(logging.ERROR):
+        with rc:
+            pass
+    assert "Import process" not in caplog.text
+    assert "replay-test-result-log" not in caplog.text
+    assert rc._import_proc is not None
+    assert rc._import_proc.returncode == 0
+
+
+def test_worker_timeout_kills_and_logs(caplog: pytest.LogCaptureFixture) -> None:
+    """Worker still running at session end → kill + log, no raise (case 2)."""
+    rc = _make_context([sys.executable, "-c", "import time; time.sleep(30)"])
+    with caplog.at_level(logging.ERROR):
+        with rc:
+            pass
+    assert rc._import_proc is not None
+    # `kill()` + `wait()` were called; process is dead.
+    assert rc._import_proc.poll() is not None
+    assert "did not exit in 1s" in caplog.text
+    assert "replay-test-result-log" in caplog.text
+
+
+def test_worker_nonzero_exit_logs_stderr_no_raise(caplog: pytest.LogCaptureFixture) -> None:
+    """Worker exits non-zero with stderr → log stderr + replay hint, no raise (case 3)."""
+    rc = _make_context(
+        [
+            sys.executable,
+            "-c",
+            "import sys; sys.stderr.write('rpc deadline exceeded'); sys.exit(2)",
+        ]
+    )
+    with caplog.at_level(logging.ERROR):
+        with rc:
+            pass
+    assert rc._import_proc is not None
+    assert rc._import_proc.returncode == 2
+    assert "exited with code 2" in caplog.text
+    assert "rpc deadline exceeded" in caplog.text
+    assert "replay-test-result-log" in caplog.text
diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py
index e5b0f811f..c7ff6628d 100644
--- a/python/lib/sift_client/util/test_results/context_manager.py
+++ b/python/lib/sift_client/util/test_results/context_manager.py
@@ -174,6 +174,25 @@ def __init__(
         )
         self.report = client.test_results.create(create, log_file=self.log_file)
 
+    def _build_replay_command(self) -> list[str]:
+        """Build the argv for the import-test-result-log replay subprocess.
+
+        Factored out for testability — tests substitute commands that exit
+        with controlled returncodes / stderr to exercise the ``__exit__``
+        branches without depending on the real replay binary.
+        """
+        return [
+            "import-test-result-log",
+            "--incremental",
+            str(self.log_file),
+            "--grpc-url",
+            self.client.grpc_client._config.uri,
+            "--rest-url",
+            self.client.rest_client._config.base_url,
+            "--api-key",
+            self.client.grpc_client._config.api_key,
+        ]
+
     def _open_import_proc(self):
         """Open a subprocess to import the log file.
 
@@ -182,17 +201,7 @@ def _open_import_proc(self):
         """
         with _quiet_fork_stderr():
             self._import_proc = subprocess.Popen(
-                [
-                    "import-test-result-log",
-                    "--incremental",
-                    str(self.log_file),
-                    "--grpc-url",
-                    self.client.grpc_client._config.uri,
-                    "--rest-url",
-                    self.client.rest_client._config.base_url,
-                    "--api-key",
-                    self.client.grpc_client._config.api_key,
-                ],
+                self._build_replay_command(),
                 stdin=subprocess.PIPE,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.PIPE,
@@ -214,21 +223,19 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.report.update(update)
 
         if self._import_proc is not None:
-            # Three outcomes for the replay worker at session end:
-            #   1. Exits cleanly (returncode 0). Common happy path: stdin EOF
-            #      from communicate() tells the worker to drain and finish.
+            # Three outcomes for the replay worker at session end. None of
+            # them fail the session — tests already ran and their outcome
+            # is independent of delivery. The local log file is the source
+            # of recovery for both failure modes via
+            # `replay-test-result-log <path>`:
+            #   1. Exits cleanly (returncode 0). Silent.
             #   2. Still running after the 1s grace window (TimeoutExpired).
-            #      Typically a healthy worker with a large backlog. We kill
-            #      it, surface replay instructions, and raise — preserving
-            #      pre-existing behavior so a known-incomplete delivery
-            #      doesn't silently pass.
-            #   3. Exited with non-zero. This is where connection failures
-            #      and API call errors land: the worker's replay loop has
-            #      no retry, so the first failed RPC crashes the subprocess.
-            #      We log stderr at ERROR with replay instructions but do
-            #      NOT raise — tests already ran and their pass/fail is
-            #      independent of delivery. The local log file is preserved
-            #      for manual `replay-test-result-log <path>` recovery.
+            #      Healthy worker with a large backlog; kill and surface
+            #      replay instructions.
+            #   3. Exited with non-zero. Connection failures and API call
+            #      errors land here — the worker's replay loop has no retry,
+            #      so the first failed RPC crashes the subprocess. Log the
+            #      captured stderr at ERROR with replay instructions.
             try:
                 _, stderr_bytes = self._import_proc.communicate(timeout=1)
             except subprocess.TimeoutExpired:
@@ -236,7 +243,7 @@ def __exit__(self, exc_type, exc_value, traceback):
                 self._import_proc.kill()
                 self._import_proc.wait()
                 log_replay_instructions(self.log_file)
-                raise
+                return True # Ensures the session is marked as passed in pytest
             if self._import_proc.returncode != 0:
                 stderr_text = (
                     stderr_bytes.decode("utf-8", errors="replace").strip()

From 1cde80dabb5d7425feba924aa54fec2df1acb5c9 Mon Sep 17 00:00:00 2001
From: Alex Luck <luck@siftstack.com>
Date: Thu, 21 May 2026 14:23:30 -0700
Subject: [PATCH 18/18] format

---
 python/lib/sift_client/util/test_results/context_manager.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py
index c7ff6628d..3d375814a 100644
--- a/python/lib/sift_client/util/test_results/context_manager.py
+++ b/python/lib/sift_client/util/test_results/context_manager.py
@@ -243,12 +243,10 @@ def __exit__(self, exc_type, exc_value, traceback):
                 self._import_proc.kill()
                 self._import_proc.wait()
                 log_replay_instructions(self.log_file)
-                return True # Ensures the session is marked as passed in pytest
+                return True  # Ensures the session is marked as passed in pytest
             if self._import_proc.returncode != 0:
                 stderr_text = (
-                    stderr_bytes.decode("utf-8", errors="replace").strip()
-                    if stderr_bytes
-                    else ""
+                    stderr_bytes.decode("utf-8", errors="replace").strip() if stderr_bytes else ""
                 )
                 logger.error(
                     "Import process exited with code %d. stderr: %s",