DataDog
diff --git a/‎ddtrace/testing/internal/cached_file_provider.py‎
Lines changed: 179 additions & 0 deletions b/‎ddtrace/testing/internal/cached_file_provider.py‎
Lines changed: 179 additions & 0 deletions
diff --git a/‎ddtrace/testing/internal/constants.py‎
Lines changed: 9 additions & 0 deletions b/‎ddtrace/testing/internal/constants.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎ddtrace/testing/internal/env_tags.py‎
Lines changed: 47 additions & 0 deletions b/‎ddtrace/testing/internal/env_tags.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎ddtrace/testing/internal/http.py‎
Lines changed: 51 additions & 0 deletions b/‎ddtrace/testing/internal/http.py‎
Lines changed: 51 additions & 0 deletions
@@ -0,0 +1,179 @@
+"""
+File-based data provider for Bazel offline (manifest) mode.
+
+Defines the ``TestOptDataProvider`` Protocol that both ``APIClient`` and
+``CachedFileDataProvider`` satisfy, so ``SessionManager`` can swap between
+HTTP and file-based data fetching without branching inside each method.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from pathlib import Path
+import typing as t
+
+from ddtrace.testing.internal.constants import ITRSkippingLevel
+from ddtrace.testing.internal.settings_data import Settings
+from ddtrace.testing.internal.settings_data import TestProperties
+from ddtrace.testing.internal.telemetry import TelemetryAPI
+from ddtrace.testing.internal.test_data import ModuleRef
+from ddtrace.testing.internal.test_data import SuiteRef
+from ddtrace.testing.internal.test_data import TestRef
+
+
+log = logging.getLogger(__name__)
+
+
+class TestOptDataProvider(t.Protocol):
+    """
+    Protocol satisfied by both ``APIClient`` (HTTP) and ``CachedFileDataProvider`` (files).
+
+    ``SessionManager`` types its ``api_client`` attribute as this Protocol so
+    mypy catches interface drift between the two implementations.
+    """
+
+    def get_settings(self) -> Settings: ...
+
+    def get_known_tests(self) -> set[TestRef]: ...
+
+    def get_test_management_properties(self) -> dict[TestRef, TestProperties]: ...
+
+    def get_skippable_tests(self) -> tuple[set[t.Union[SuiteRef, TestRef]], t.Optional[str]]: ...
+
+    def get_known_commits(self, latest_commits: list[str]) -> t.Optional[list[str]]: ...
+
+    def send_git_pack_file(self, packfile: Path) -> t.Optional[int]: ...
+
+    def upload_coverage_report(
+        self,
+        coverage_report_bytes: bytes,
+        coverage_format: str,
+        tags: t.Optional[dict[str, str]],
+    ) -> bool: ...
+
+    def close(self) -> None: ...
+
+
+def _read_cache_json(cache_path: str) -> t.Optional[t.Any]:
+    """
+    Read and parse a JSON file from the .testoptimization cache directory.
+
+    Returns the parsed object on success, or None if the file is missing or
+    unreadable. A missing file is treated as an empty response — no HTTP
+    fallback is attempted (Bazel hermeticity requires this hard boundary).
+    """
+    try:
+        with open(cache_path) as f:
+            return json.load(f)
+    except FileNotFoundError:
+        log.debug("Cache file not found: %s — treating as empty response", cache_path)
+        return None
+    except (OSError, json.JSONDecodeError) as e:
+        log.warning("Error reading cache file %s: %s — treating as empty response", cache_path, e)
+        return None
+
+
+class CachedFileDataProvider:
+    """
+    Reads test optimization data from pre-fetched JSON files in the
+    .testoptimization directory (Bazel manifest mode).
+
+    All four fetch methods mirror the structure of the corresponding backend
+    HTTP responses so the same parsing code applies. Methods that are only
+    reachable via ``upload_git_data`` (which is already guarded to skip in
+    offline mode) are implemented as no-ops.
+    """
+
+    def __init__(
+        self,
+        test_optimization_dir: str,
+        itr_skipping_level: ITRSkippingLevel,
+        telemetry_api: TelemetryAPI,
+    ) -> None:
+        self._dir = test_optimization_dir
+        self._itr_skipping_level = itr_skipping_level
+        self._telemetry_api = telemetry_api
+
+    def _cache_path(self, relative: str) -> str:
+        return os.path.join(self._dir, *relative.split("/"))
+
+    def get_settings(self) -> Settings:
+        cached = _read_cache_json(self._cache_path("cache/http/settings.json"))
+        if cached is None:
+            log.debug("No cached settings file — all features disabled in manifest mode")
+            return Settings()
+        try:
+            settings = Settings.from_attributes(cached["data"]["attributes"])
+        except Exception as e:
+            log.warning("Error parsing cached settings file: %s — all features disabled", e)
+            return Settings()
+        self._telemetry_api.record_settings(settings)
+        return settings
+
+    def get_known_tests(self) -> set[TestRef]:
+        cached = _read_cache_json(self._cache_path("cache/http/known_tests.json"))
+        if cached is None:
+            return set()
+        try:
+            known: set[TestRef] = set()
+            for module, suites in cached["data"]["attributes"]["tests"].items():
+                module_ref = ModuleRef(module)
+                for suite, tests in suites.items():
+                    suite_ref = SuiteRef(module_ref, suite)
+                    for test in tests:
+                        known.add(TestRef(suite_ref, test))
+            self._telemetry_api.record_known_tests_count(len(known))
+            return known
+        except Exception as e:
+            log.warning("Error parsing cached known tests file: %s", e)
+            return set()
+
+    def get_test_management_properties(self) -> dict[TestRef, TestProperties]:
+        cached = _read_cache_json(self._cache_path("cache/http/test_management.json"))
+        if cached is None:
+            return {}
+        try:
+            props: dict[TestRef, TestProperties] = {}
+            for module_name, module_data in cached["data"]["attributes"]["modules"].items():
+                module_ref = ModuleRef(module_name)
+                for suite_name, suite_data in module_data["suites"].items():
+                    suite_ref = SuiteRef(module_ref, suite_name)
+                    for test_name, test_data in suite_data["tests"].items():
+                        p = test_data.get("properties", {})
+                        props[TestRef(suite_ref, test_name)] = TestProperties(
+                            quarantined=p.get("quarantined", False),
+                            disabled=p.get("disabled", False),
+                            attempt_to_fix=p.get("attempt_to_fix", False),
+                        )
+            self._telemetry_api.record_test_management_tests_count(len(props))
+            return props
+        except Exception as e:
+            log.warning("Error parsing cached test management file: %s", e)
+            return {}
+
+    def get_skippable_tests(self) -> tuple[set[t.Union[SuiteRef, TestRef]], t.Optional[str]]:
+        # Hard no-op in manifest mode: skippable tests are not applied in hermetic
+        # Bazel runs. This matches the Go implementation which returns an empty set
+        # without reading the cache file.
+        return set(), None
+
+    # --- no-ops for methods unreachable in manifest mode ---
+
+    def get_known_commits(self, latest_commits: list[str]) -> list[str]:
+        return []  # upload_git_data() returns early in manifest mode
+
+    def send_git_pack_file(self, packfile: Path) -> t.Optional[int]:
+        return None  # upload_git_data() returns early in manifest mode
+
+    def upload_coverage_report(
+        self,
+        coverage_report_bytes: bytes,
+        coverage_format: str,
+        tags: t.Optional[dict[str, str]] = None,
+    ) -> bool:
+        return False  # coverage upload is skipped in payload-files mode
+
+    def close(self) -> None:
+        pass
@@ -18,3 +18,12 @@ class ITRSkippingLevel(Enum):
 TAG_FALSE = "false"
 
 EMPTY_NAME = "."
+
+# Bazel / offline mode environment variables
+DD_TEST_OPTIMIZATION_MANIFEST_FILE = "DD_TEST_OPTIMIZATION_MANIFEST_FILE"
+DD_TEST_OPTIMIZATION_PAYLOADS_IN_FILES = "DD_TEST_OPTIMIZATION_PAYLOADS_IN_FILES"
+DD_TEST_OPTIMIZATION_ENV_DATA_FILE = "DD_TEST_OPTIMIZATION_ENV_DATA_FILE"
+TEST_UNDECLARED_OUTPUTS_DIR = "TEST_UNDECLARED_OUTPUTS_DIR"
+
+# The only supported .testoptimization manifest version
+SUPPORTED_MANIFEST_VERSION = 1
@@ -1,15 +1,23 @@
+import json
+import logging
 import os
 import typing as t
 
 from ddtrace.internal.settings import env
 from ddtrace.testing.internal import ci
 from ddtrace.testing.internal import git
 from ddtrace.testing.internal.ci import CITag
+from ddtrace.testing.internal.constants import DD_TEST_OPTIMIZATION_ENV_DATA_FILE
 from ddtrace.testing.internal.git import GitTag
 from ddtrace.testing.internal.git import get_workspace_path
+from ddtrace.testing.internal.offline_mode import get_offline_mode
+from ddtrace.testing.internal.offline_mode import resolve_rlocation
 from ddtrace.testing.internal.utils import _filter_sensitive_info
 
 
+log = logging.getLogger(__name__)
+
+
 _TagDict = dict[str, t.Optional[str]]
 
 
@@ -26,7 +34,42 @@ def merge_tags(target: _TagDict, *tag_dicts: _TagDict) -> None:
                 target[k] = v
 
 
+def _read_env_data_file() -> dict[str, str]:
+    """Read CI/Git tags from the environmental data file if available.
+
+    The Bazel rule provides pre-computed CI and Git context via
+    ``DD_TEST_OPTIMIZATION_ENV_DATA_FILE``.  This replaces local Git CLI
+    enrichment in payload-files mode.
+    """
+
+    path = env.get(DD_TEST_OPTIMIZATION_ENV_DATA_FILE)
+    if not path:
+        return {}
+    path = resolve_rlocation(path)
+    try:
+        with open(path) as f:
+            data = json.load(f)
+        if isinstance(data, dict):
+            return {k: v for k, v in data.items() if isinstance(k, str) and isinstance(v, str)}
+    except (OSError, json.JSONDecodeError, ValueError) as e:
+        log.warning("Error reading env data file %s: %s", path, e)
+    return {}
+
+
 def get_env_tags() -> dict[str, str]:
+    # NOTE: In payload-files mode (Bazel sandbox output), CI/Git/OS/runtime tags
+    # must NOT be populated from the local environment or git CLI. Instead, the
+    # Bazel rule provides pre-computed context via DD_TEST_OPTIMIZATION_ENV_DATA_FILE.
+
+    offline = get_offline_mode()
+    if offline.payload_files_enabled:
+        log.debug("Payload-files mode active: reading tags from env data file instead of local git")
+        env_data_tags = _read_env_data_file()
+        # Bazel provider fallback: if no CI provider was detected, tag as "bazel"
+        if CITag.PROVIDER_NAME not in env_data_tags:
+            env_data_tags[CITag.PROVIDER_NAME] = "bazel"
+        return env_data_tags
+
     tags: _TagDict = {}
 
     merge_tags(
@@ -53,6 +96,10 @@ def get_env_tags() -> dict[str, str]:
     if job_id := env.get("JOB_ID"):
         tags[CITag.JOB_ID] = job_id
 
+    # Bazel provider fallback (manifest-only mode without payload-files)
+    if offline.manifest_enabled and not tags.get(CITag.PROVIDER_NAME):
+        tags[CITag.PROVIDER_NAME] = "bazel"
+
     return {k: v for k, v in tags.items() if v}
 
 
 
@@ -487,6 +487,57 @@ class FileAttachment:
     data: bytes
 
 
+class NoOpBackendConnector:
+    """
+    A connector that makes no network requests.
+
+    Used when the plugin is running in Bazel's hermetic sandbox (manifest mode
+    active), where network access is unavailable. Any call to ``request()`` or
+    its helpers is silently discarded and an empty ``BackendResult`` is returned.
+    Writers and the telemetry API receive this connector but their event
+    delivery is handled via the payload-files code path instead.
+    """
+
+    def close(self) -> None:
+        pass
+
+    def request(
+        self,
+        method: str,
+        path: str,
+        data: t.Optional[bytes] = None,
+        headers: t.Optional[dict[str, str]] = None,
+        send_gzip: bool = False,
+        is_json_response: bool = False,
+        telemetry: t.Any = None,
+        max_attempts: int = 1,
+    ) -> BackendResult:
+        log.debug("NoOp connector: skipping %s %s in offline mode", method, path)
+        return BackendResult()
+
+    def get_json(self, path: str, **kwargs: t.Any) -> BackendResult:
+        return BackendResult()
+
+    def post_json(self, path: str, data: t.Any, **kwargs: t.Any) -> BackendResult:
+        return BackendResult()
+
+    def post_files(self, path: str, files: t.Any, **kwargs: t.Any) -> BackendResult:
+        return BackendResult()
+
+
+class NoOpBackendConnectorSetup(BackendConnectorSetup):
+    """
+    A connector setup for fully offline (Bazel sandbox) mode.
+
+    Returns ``NoOpBackendConnector`` instances for all subdomains so that no
+    network requests are attempted. ``default_env`` falls back to the standard
+    default because there is no agent to query.
+    """
+
+    def get_connector_for_subdomain(self, subdomain: Subdomain) -> "NoOpBackendConnector":  # type: ignore[override]
+        return NoOpBackendConnector()
+
+
 class UnixDomainSocketHTTPConnection(http.client.HTTPConnection):
     """An HTTP connection established over a Unix Domain Socket."""