From 0c93046a02118ba545880b42df5ac2e6db8de251 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 19:26:33 -0700 Subject: [PATCH 01/20] feat: add detect-secrets contrib evaluator --- evaluators/contrib/README.md | 1 + evaluators/contrib/detect_secrets/Makefile | 19 ++ evaluators/contrib/detect_secrets/README.md | 90 ++++++ .../contrib/detect_secrets/pyproject.toml | 60 ++++ .../__init__.py | 18 ++ .../detect_secrets/__init__.py | 13 + .../detect_secrets/config.py | 74 +++++ .../detect_secrets/evaluator.py | 201 +++++++++++++ .../detect_secrets/normalization.py | 211 ++++++++++++++ .../contrib/detect_secrets/tests/__init__.py | 1 + .../tests/detect_secrets/test_evaluator.py | 275 ++++++++++++++++++ 11 files changed, 963 insertions(+) create mode 100644 evaluators/contrib/detect_secrets/Makefile create mode 100644 evaluators/contrib/detect_secrets/README.md create mode 100644 evaluators/contrib/detect_secrets/pyproject.toml create mode 100644 evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/__init__.py create mode 100644 evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/__init__.py create mode 100644 evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py create mode 100644 evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py create mode 100644 evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py create mode 100644 evaluators/contrib/detect_secrets/tests/__init__.py create mode 100644 evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py diff --git a/evaluators/contrib/README.md b/evaluators/contrib/README.md index 38338fad..d71769e8 100644 --- a/evaluators/contrib/README.md +++ b/evaluators/contrib/README.md @@ -3,6 +3,7 @@ Contributed evaluators and templates for extending Agent Control. - `galileo/` — Luna-2 evaluator integration +- `detect_secrets/` — detect-secrets runtime scanner integration - `template/` — Starter template for adding new evaluators Full guide: https://docs.agentcontrol.dev/concepts/evaluators/custom-evaluators diff --git a/evaluators/contrib/detect_secrets/Makefile b/evaluators/contrib/detect_secrets/Makefile new file mode 100644 index 00000000..3d413341 --- /dev/null +++ b/evaluators/contrib/detect_secrets/Makefile @@ -0,0 +1,19 @@ +.PHONY: sync test lint typecheck check build + +sync: + uv sync --group dev + +test: + uv run --group dev pytest + +lint: + uv run --group dev ruff check . + uv run --group dev ruff format --check . + +typecheck: + uv run --group dev mypy . + +check: sync lint typecheck test build + +build: + uv build diff --git a/evaluators/contrib/detect_secrets/README.md b/evaluators/contrib/detect_secrets/README.md new file mode 100644 index 00000000..bd37dbef --- /dev/null +++ b/evaluators/contrib/detect_secrets/README.md @@ -0,0 +1,90 @@ +# Agent Control Evaluator - detect-secrets + +External evaluator that scans selector-selected payloads for likely secrets using +[`detect-secrets-async`](https://pypi.org/project/detect-secrets-async/), which wraps Yelp +`detect-secrets` in a bounded subprocess runtime. + +- Entry point name: `yelp.detect_secrets` +- Transport/runtime: `detect-secrets-async` + +## Installation + +Install the evaluator package: + +```bash +pip install agent-control-evaluator-detect-secrets +``` + +For local development from this repo: + +```bash +uv pip install -e evaluators/contrib/detect_secrets +``` + +## Configuration + +Evaluator config fields: + +- `timeout_ms: int = 10000` +- `on_error: "allow" | "deny" = "allow"` +- `max_bytes: int = 1048576` +- `enabled_plugins: list[str] | None = None` +- `exclude_lines_regex: list[str] = []` + +Notes: + +- `enabled_plugins` takes upstream `detect-secrets` plugin class names such as + `GitHubTokenDetector`. +- If `enabled_plugins` is omitted, the evaluator uses the pinned upstream default plugin set from + `detect-secrets-async`. +- `exclude_lines_regex` uses RE2 syntax and blanks matching lines before scan submission so line + numbering stays stable for plain string payloads. + +## Behavior + +- selector-selected `str` payloads are scanned directly +- selector-selected `dict` / `list` payloads are normalized to deterministic pretty JSON before + scanning +- scalar numbers / booleans are normalized to JSON scalar text +- `None` produces `matched=False` + +Safe metadata: + +- `findings_count` +- `findings[]` with `type`, plus: + - `line_number` for plain selected strings + - `json_pointer` for normalized `dict` / `list` payloads when a finding maps back to a structural + location +- `normalized_payload_type` +- `detect_secrets_version` +- `failure_mode` on evaluator failures +- `fallback_action` on fail-closed paths + +Plaintext secrets, snippets, matching lines, and upstream `hashed_secret` are never surfaced. + +## Usage + +Once installed, the evaluator is auto-discovered: + +```python +from agent_control_evaluators import discover_evaluators, get_evaluator + +discover_evaluators() +DetectSecretsEvaluator = get_evaluator("yelp.detect_secrets") +``` + +Example control fragment: + +```json +{ + "selector": { "path": "output" }, + "evaluator": { + "name": "yelp.detect_secrets", + "config": { + "timeout_ms": 10000, + "on_error": "allow", + "enabled_plugins": ["GitHubTokenDetector"] + } + } +} +``` diff --git a/evaluators/contrib/detect_secrets/pyproject.toml b/evaluators/contrib/detect_secrets/pyproject.toml new file mode 100644 index 00000000..5346601a --- /dev/null +++ b/evaluators/contrib/detect_secrets/pyproject.toml @@ -0,0 +1,60 @@ +[project] +name = "agent-control-evaluator-detect-secrets" +version = "0.1.0" +description = "detect-secrets evaluator for agent-control" +readme = "README.md" +requires-python = ">=3.12" +license = { text = "Apache-2.0" } +authors = [{ name = "Agent Control Team" }] +dependencies = [ + "agent-control-evaluators>=3.0.0", + "agent-control-models>=3.0.0", + "detect-secrets-async>=0.2.0,<0.3.0", + "google-re2>=1.1", + "pydantic>=2.12.4", +] + +[project.entry-points."agent_control.evaluators"] +"yelp.detect_secrets" = "agent_control_evaluator_detect_secrets.detect_secrets:DetectSecretsEvaluator" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/agent_control_evaluator_detect_secrets"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" + +[tool.ruff] +line-length = 100 +target-version = "py312" + +[tool.ruff.lint] +select = ["E", "F", "I", "UP"] + +[tool.mypy] +python_version = "3.12" +strict = true +files = ["src", "tests"] + +[[tool.mypy.overrides]] +module = "re2" +ignore_missing_imports = true + +[tool.uv.sources] +agent-control-evaluators = { path = "../../builtin", editable = true } +agent-control-models = { path = "../../../models", editable = true } + +[tool.uv] +default-groups = ["dev"] + +[dependency-groups] +dev = [ + "mypy>=1.8.0", + "pytest>=8.0.0", + "pytest-asyncio>=0.23.0", + "pytest-cov>=4.0.0", + "ruff>=0.1.0", +] diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/__init__.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/__init__.py new file mode 100644 index 00000000..0efa35b5 --- /dev/null +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/__init__.py @@ -0,0 +1,18 @@ +"""Agent Control evaluator package for detect-secrets.""" + +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("agent-control-evaluator-detect-secrets") +except PackageNotFoundError: + __version__ = "0.0.0.dev" + +from agent_control_evaluator_detect_secrets.detect_secrets import ( + DetectSecretsEvaluator, + DetectSecretsEvaluatorConfig, +) + +__all__ = [ + "DetectSecretsEvaluator", + "DetectSecretsEvaluatorConfig", +] diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/__init__.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/__init__.py new file mode 100644 index 00000000..dcc0f54f --- /dev/null +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/__init__.py @@ -0,0 +1,13 @@ +"""detect-secrets evaluator exports.""" + +from agent_control_evaluator_detect_secrets.detect_secrets.config import ( + DetectSecretsEvaluatorConfig, +) +from agent_control_evaluator_detect_secrets.detect_secrets.evaluator import ( + DetectSecretsEvaluator, +) + +__all__ = [ + "DetectSecretsEvaluator", + "DetectSecretsEvaluatorConfig", +] diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py new file mode 100644 index 00000000..59f5876c --- /dev/null +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py @@ -0,0 +1,74 @@ +"""Configuration for the detect-secrets evaluator.""" + +from __future__ import annotations + +from typing import Literal + +import re2 +from agent_control_evaluators import EvaluatorConfig +from detect_secrets_async import get_runtime_info +from pydantic import Field, field_validator + +DEFAULT_TIMEOUT_MS = 10_000 +DEFAULT_MAX_BYTES = 1_048_576 + + +class DetectSecretsEvaluatorConfig(EvaluatorConfig): + """Typed configuration for the detect-secrets evaluator.""" + + timeout_ms: int = Field( + default=DEFAULT_TIMEOUT_MS, + gt=0, + description="End-to-end timeout in milliseconds for queue wait and scan execution.", + ) + on_error: Literal["allow", "deny"] = Field( + default="allow", + description="Whether evaluator failures should fail open or fail closed.", + ) + max_bytes: int = Field( + default=DEFAULT_MAX_BYTES, + gt=0, + description="Maximum UTF-8 payload size after normalization and line filtering.", + ) + enabled_plugins: list[str] | None = Field( + default=None, + description="Optional explicit upstream detect-secrets plugin class names.", + ) + exclude_lines_regex: list[str] = Field( + default_factory=list, + description="RE2 patterns for lines that should be blanked before scanning.", + ) + + @field_validator("enabled_plugins") + @classmethod + def validate_enabled_plugins(cls, value: list[str] | None) -> list[str] | None: + """Validate explicit upstream plugin names against detect-secrets-async introspection.""" + if value is None: + return None + + available = set(get_runtime_info().available_plugin_names) + normalized: list[str] = [] + seen: set[str] = set() + + for plugin_name in value: + candidate = plugin_name.strip() + if not candidate: + raise ValueError("enabled_plugins entries must be non-empty") + if candidate not in available: + raise ValueError(f"Unknown detect-secrets plugin: {candidate}") + if candidate not in seen: + normalized.append(candidate) + seen.add(candidate) + + return normalized + + @field_validator("exclude_lines_regex") + @classmethod + def validate_exclude_lines_regex(cls, value: list[str]) -> list[str]: + """Validate each configured exclude pattern as a RE2 regex.""" + for pattern in value: + try: + re2.compile(pattern) + except re2.error as exc: + raise ValueError(f"Invalid RE2 pattern '{pattern}': {exc}") from exc + return value diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py new file mode 100644 index 00000000..6573a104 --- /dev/null +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -0,0 +1,201 @@ +"""Agent Control evaluator adapter for detect-secrets-async.""" + +from __future__ import annotations + +from importlib.metadata import PackageNotFoundError, version +from typing import Any + +import re2 +from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator +from agent_control_models import EvaluatorResult +from detect_secrets_async import ( + RuntimeScanError, + ScanConfig, + ScanRequest, + get_runtime, + get_runtime_info, +) + +from agent_control_evaluator_detect_secrets.detect_secrets.config import ( + DetectSecretsEvaluatorConfig, +) +from agent_control_evaluator_detect_secrets.detect_secrets.normalization import ( + NormalizationError, + NormalizedPayload, + apply_line_exclusions, + normalize_payload, +) + +try: + PACKAGE_VERSION = version("agent-control-evaluator-detect-secrets") +except PackageNotFoundError: + PACKAGE_VERSION = "0.0.0.dev" + +FAILURE_MESSAGES: dict[str, str] = { + "invalid_config": "detect-secrets runtime rejected the scan configuration", + "normalization_error": "selected payload could not be normalized for secret scanning", + "payload_too_large": "normalized payload exceeded the configured size limit", + "queue_full": "detect-secrets runtime queue is full", + "queue_timeout": "secret scan timed out while waiting for runtime capacity", + "worker_startup_error": "detect-secrets worker failed to start", + "worker_timeout": "secret scan timed out", + "worker_crash": "detect-secrets worker exited unexpectedly", + "worker_protocol_error": "detect-secrets worker protocol error", + "runtime_error": "detect-secrets runtime error", +} + + +@register_evaluator +class DetectSecretsEvaluator(Evaluator[DetectSecretsEvaluatorConfig]): + """Scan selector-selected content for likely secrets using detect-secrets-async.""" + + metadata = EvaluatorMetadata( + name="yelp.detect_secrets", + version=PACKAGE_VERSION, + description="Potential secret detection via detect-secrets-async", + timeout_ms=10_000, + ) + config_model = DetectSecretsEvaluatorConfig + + def __init__(self, config: DetectSecretsEvaluatorConfig) -> None: + super().__init__(config) + self._exclude_line_patterns = tuple( + re2.compile(pattern) for pattern in config.exclude_lines_regex + ) + + async def evaluate(self, data: Any) -> EvaluatorResult: + """Normalize selector output, run detect-secrets, and map results into EvaluatorResult.""" + runtime_info = get_runtime_info() + + try: + normalized = normalize_payload(data) + except NormalizationError: + return self._failure_result( + failure_mode="normalization_error", + normalized_payload_type=None, + detect_secrets_version=runtime_info.detect_secrets_version, + ) + + if normalized.payload_type == "none": + return self._success_result( + normalized=normalized, + detect_secrets_version=runtime_info.detect_secrets_version, + findings=[], + ) + + assert normalized.text is not None + filtered_text = apply_line_exclusions(normalized.text, self._exclude_line_patterns) + if len(filtered_text.encode("utf-8")) > self.config.max_bytes: + return self._failure_result( + failure_mode="payload_too_large", + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) + + request = ScanRequest( + content=filtered_text, + timeout_ms=self.config.timeout_ms, + config=ScanConfig( + enabled_plugins=tuple(self.config.enabled_plugins) + if self.config.enabled_plugins is not None + else None + ), + ) + + try: + runtime = get_runtime() + scan_result = await runtime.scan(request) + except RuntimeScanError as exc: + failure_mode = exc.code.value + return self._failure_result( + failure_mode=failure_mode, + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) + + findings = self._map_findings(normalized, scan_result.findings) + return self._success_result( + normalized=normalized, + detect_secrets_version=scan_result.detect_secrets_version, + findings=findings, + ) + + def _map_findings( + self, + normalized: NormalizedPayload, + findings: tuple[Any, ...], + ) -> list[dict[str, Any]]: + mapped: list[dict[str, Any]] = [] + + for finding in findings: + finding_metadata: dict[str, Any] = {"type": finding.type} + if normalized.payload_type == "str": + if finding.line_number is not None: + finding_metadata["line_number"] = finding.line_number + elif normalized.payload_type in {"dict", "list"}: + if finding.line_number is not None: + json_pointer = normalized.json_pointers_by_line.get(finding.line_number) + if json_pointer is not None: + finding_metadata["json_pointer"] = json_pointer + + mapped.append(finding_metadata) + + return mapped + + def _success_result( + self, + *, + normalized: NormalizedPayload, + detect_secrets_version: str, + findings: list[dict[str, Any]], + ) -> EvaluatorResult: + matched = bool(findings) + return EvaluatorResult( + matched=matched, + confidence=1.0, + message=( + f"Potential secrets detected ({len(findings)} findings)" + if matched + else "No potential secrets detected" + ), + metadata={ + "findings_count": len(findings), + "findings": findings, + "normalized_payload_type": normalized.payload_type, + "detect_secrets_version": detect_secrets_version, + }, + ) + + def _failure_result( + self, + *, + failure_mode: str, + normalized_payload_type: str | None, + detect_secrets_version: str, + ) -> EvaluatorResult: + detail = FAILURE_MESSAGES.get(failure_mode, FAILURE_MESSAGES["runtime_error"]) + metadata: dict[str, Any] = { + "findings_count": 0, + "findings": [], + "detect_secrets_version": detect_secrets_version, + "failure_mode": failure_mode, + } + if normalized_payload_type is not None: + metadata["normalized_payload_type"] = normalized_payload_type + + if self.config.on_error == "deny": + metadata["fallback_action"] = "deny" + return EvaluatorResult( + matched=True, + confidence=0.0, + message=f"Denied due to evaluator failure ({failure_mode}): {detail}", + metadata=metadata, + ) + + return EvaluatorResult( + matched=False, + confidence=0.0, + message=f"Secret scan failed ({failure_mode}): {detail}; allowing request", + metadata=metadata, + error=f"detect-secrets evaluator failure: {failure_mode}", + ) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py new file mode 100644 index 00000000..61b06621 --- /dev/null +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -0,0 +1,211 @@ +"""Payload normalization helpers for the detect-secrets evaluator.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Any, Literal + +NormalizedPayloadType = Literal["none", "str", "dict", "list", "primitive"] + + +class NormalizationError(ValueError): + """Raised when selector-selected payloads cannot be normalized safely.""" + + +@dataclass(frozen=True, slots=True) +class NormalizedPayload: + """Normalized text payload and line-to-JSON-pointer metadata.""" + + payload_type: NormalizedPayloadType + text: str | None + json_pointers_by_line: dict[int, str] + + +@dataclass(frozen=True, slots=True) +class RenderedLine: + """A rendered JSON line plus optional structural pointer metadata.""" + + text: str + json_pointer: str | None = None + + +def normalize_payload(data: Any) -> NormalizedPayload: + """Normalize selector output to deterministic text for detect-secrets scanning.""" + if data is None: + return NormalizedPayload(payload_type="none", text=None, json_pointers_by_line={}) + + if isinstance(data, str): + return NormalizedPayload(payload_type="str", text=data, json_pointers_by_line={}) + + if isinstance(data, dict): + return _normalize_structured_payload(data, payload_type="dict") + + if isinstance(data, list): + return _normalize_structured_payload(data, payload_type="list") + + if isinstance(data, bool | int | float): + return _normalize_primitive_payload(data) + + raise NormalizationError(f"Unsupported payload type for normalization: {type(data).__name__}") + + +def apply_line_exclusions(text: str, patterns: tuple[Any, ...]) -> str: + """Blank matching lines without changing line numbering.""" + if not patterns: + return text + + filtered_lines = [ + "" if any(pattern.search(line) for pattern in patterns) else line + for line in text.splitlines() + ] + return "\n".join(filtered_lines) + + +def _normalize_structured_payload( + data: dict[Any, Any] | list[Any], + *, + payload_type: Literal["dict", "list"], +) -> NormalizedPayload: + try: + text = json.dumps(data, sort_keys=True, indent=2, ensure_ascii=False, allow_nan=False) + except (TypeError, ValueError) as exc: + raise NormalizationError(f"Failed to normalize structured payload: {exc}") from exc + + try: + rendered_lines = _render_json_lines(data) + except (TypeError, ValueError) as exc: + raise NormalizationError(f"Failed to map structured payload lines: {exc}") from exc + + rendered_text = "\n".join(line.text for line in rendered_lines) + if rendered_text != text: + raise NormalizationError("Structured payload rendering mismatch during normalization") + + json_pointers_by_line = { + line_number: rendered_line.json_pointer + for line_number, rendered_line in enumerate(rendered_lines, start=1) + if rendered_line.json_pointer is not None + } + return NormalizedPayload( + payload_type=payload_type, + text=text, + json_pointers_by_line=json_pointers_by_line, + ) + + +def _normalize_primitive_payload(data: bool | int | float) -> NormalizedPayload: + try: + text = json.dumps(data, ensure_ascii=False, allow_nan=False) + except (TypeError, ValueError) as exc: + raise NormalizationError(f"Failed to normalize scalar payload: {exc}") from exc + + return NormalizedPayload(payload_type="primitive", text=text, json_pointers_by_line={}) + + +def _render_json_lines( + value: Any, + *, + indent_level: int = 0, + prefix: str = "", + pointer: str = "", +) -> list[RenderedLine]: + indent = " " * (indent_level * 2) + + if isinstance(value, dict): + return _render_dict_lines(value, indent_level=indent_level, prefix=prefix, pointer=pointer) + + if isinstance(value, list): + return _render_list_lines(value, indent_level=indent_level, prefix=prefix, pointer=pointer) + + scalar_text = json.dumps(value, ensure_ascii=False, allow_nan=False) + scalar_pointer = pointer or None + return [RenderedLine(text=f"{indent}{prefix}{scalar_text}", json_pointer=scalar_pointer)] + + +def _render_dict_lines( + value: dict[Any, Any], + *, + indent_level: int, + prefix: str, + pointer: str, +) -> list[RenderedLine]: + indent = " " * (indent_level * 2) + if not value: + return [RenderedLine(text=f"{indent}{prefix}{{}}")] + + lines = [RenderedLine(text=f"{indent}{prefix}{{")] + items = sorted(value.items(), key=lambda item: item[0]) + last_index = len(items) - 1 + + for index, (raw_key, child) in enumerate(items): + suffix = "," if index < last_index else "" + key_name = _json_object_key_name(raw_key) + key_literal = json.dumps(key_name, ensure_ascii=False, allow_nan=False) + child_prefix = f"{key_literal}: " + child_pointer = _append_json_pointer(pointer, key_name) + child_lines = _render_json_lines( + child, + indent_level=indent_level + 1, + prefix=child_prefix, + pointer=child_pointer, + ) + child_lines[-1] = RenderedLine( + text=f"{child_lines[-1].text}{suffix}", + json_pointer=child_lines[-1].json_pointer, + ) + lines.extend(child_lines) + + lines.append(RenderedLine(text=f"{indent}}}")) + return lines + + +def _render_list_lines( + value: list[Any], + *, + indent_level: int, + prefix: str, + pointer: str, +) -> list[RenderedLine]: + indent = " " * (indent_level * 2) + if not value: + return [RenderedLine(text=f"{indent}{prefix}[]")] + + lines = [RenderedLine(text=f"{indent}{prefix}[")] + last_index = len(value) - 1 + + for index, child in enumerate(value): + suffix = "," if index < last_index else "" + child_pointer = _append_json_pointer(pointer, str(index)) + child_lines = _render_json_lines( + child, + indent_level=indent_level + 1, + prefix="", + pointer=child_pointer, + ) + child_lines[-1] = RenderedLine( + text=f"{child_lines[-1].text}{suffix}", + json_pointer=child_lines[-1].json_pointer, + ) + lines.extend(child_lines) + + lines.append(RenderedLine(text=f"{indent}]")) + return lines + + +def _json_object_key_name(key: Any) -> str: + if isinstance(key, str): + return key + if key is True: + return "true" + if key is False: + return "false" + if key is None: + return "null" + if isinstance(key, int | float): + return json.dumps(key, ensure_ascii=False, allow_nan=False) + raise TypeError(f"Unsupported JSON object key type: {type(key).__name__}") + + +def _append_json_pointer(pointer: str, segment: str) -> str: + escaped = segment.replace("~", "~0").replace("/", "~1") + return f"{pointer}/{escaped}" if pointer else f"/{escaped}" diff --git a/evaluators/contrib/detect_secrets/tests/__init__.py b/evaluators/contrib/detect_secrets/tests/__init__.py new file mode 100644 index 00000000..aaf932b2 --- /dev/null +++ b/evaluators/contrib/detect_secrets/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the detect-secrets contrib evaluator package.""" diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py new file mode 100644 index 00000000..a78aec9e --- /dev/null +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +from importlib.metadata import entry_points +from typing import Any + +import pytest +from detect_secrets_async import RuntimeScanError, ScanFailureCode, get_runtime_info + +from agent_control_evaluator_detect_secrets.detect_secrets import ( + DetectSecretsEvaluator, + DetectSecretsEvaluatorConfig, +) +from agent_control_evaluator_detect_secrets.detect_secrets.evaluator import FAILURE_MESSAGES +from agent_control_evaluator_detect_secrets.detect_secrets.normalization import normalize_payload + + +@pytest.mark.asyncio +async def test_none_input_returns_no_match() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + result = await evaluator.evaluate(None) + + assert result.matched is False + assert result.error is None + assert result.metadata == { + "findings_count": 0, + "findings": [], + "normalized_payload_type": "none", + "detect_secrets_version": get_runtime_info().detect_secrets_version, + } + + +@pytest.mark.asyncio +async def test_string_secret_matches() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate("github_token = 'ghp_123456789012345678901234567890123456'") + + assert result.matched is True + assert result.confidence == 1.0 + assert result.metadata is not None + assert result.metadata["findings_count"] == 1 + assert result.metadata["normalized_payload_type"] == "str" + assert result.metadata["findings"] == [{"type": "GitHub Token", "line_number": 1}] + + +@pytest.mark.asyncio +async def test_string_without_findings_does_not_match() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate("safe content only") + + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["findings_count"] == 0 + + +@pytest.mark.asyncio +async def test_dict_payload_maps_findings_to_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + "response": { + "headers": { + "authorization": "ghp_123456789012345678901234567890123456", + } + } + } + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "/response/headers/authorization", + } + ] + + +@pytest.mark.asyncio +async def test_list_payload_maps_findings_to_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + [ + {"kind": "safe"}, + {"token": "ghp_123456789012345678901234567890123456"}, + ] + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "list" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "/1/token", + } + ] + + +@pytest.mark.asyncio +async def test_primitive_payload_is_normalized_and_omits_line_numbers() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate(True) + + assert result.matched is False + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "primitive" + assert result.metadata["findings"] == [] + + +@pytest.mark.asyncio +async def test_non_json_serializable_payload_routes_through_on_error_allow() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + result = await evaluator.evaluate({"bad": {1, 2, 3}}) + + assert result.matched is False + assert result.confidence == 0.0 + assert result.error == "detect-secrets evaluator failure: normalization_error" + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + + +@pytest.mark.asyncio +async def test_oversized_payload_routes_through_on_error_allow() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(max_bytes=8)) + + result = await evaluator.evaluate("0123456789") + + assert result.matched is False + assert result.error == "detect-secrets evaluator failure: payload_too_large" + assert result.metadata is not None + assert result.metadata["failure_mode"] == "payload_too_large" + assert result.metadata["normalized_payload_type"] == "str" + + +@pytest.mark.asyncio +async def test_on_error_deny_fails_closed(monkeypatch: pytest.MonkeyPatch) -> None: + class FakeRuntime: + async def scan(self, request: Any) -> Any: + raise RuntimeScanError(ScanFailureCode.WORKER_TIMEOUT) + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda: FakeRuntime(), + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error="deny")) + result = await evaluator.evaluate("hello") + + assert result.matched is True + assert result.confidence == 0.0 + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "worker_timeout" + assert result.metadata["fallback_action"] == "deny" + + +@pytest.mark.asyncio +async def test_explicit_runtime_failure_is_sanitized(monkeypatch: pytest.MonkeyPatch) -> None: + class FakeRuntime: + async def scan(self, request: Any) -> Any: + raise RuntimeScanError(ScanFailureCode.WORKER_CRASH, "raw runtime detail") + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda: FakeRuntime(), + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + result = await evaluator.evaluate("hello") + + assert result.matched is False + assert result.error == "detect-secrets evaluator failure: worker_crash" + assert result.metadata is not None + assert result.metadata["failure_mode"] == "worker_crash" + assert result.message is not None + assert FAILURE_MESSAGES["worker_crash"] in result.message + + +@pytest.mark.asyncio +async def test_exclude_lines_regex_suppresses_findings() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig( + enabled_plugins=["GitHubTokenDetector"], + exclude_lines_regex=["ghp_[A-Za-z0-9]{36}"], + ) + ) + + result = await evaluator.evaluate("github_token = 'ghp_123456789012345678901234567890123456'") + + assert result.matched is False + assert result.metadata is not None + assert result.metadata["findings"] == [] + + +@pytest.mark.asyncio +async def test_exclude_lines_preserves_line_numbers_for_plain_strings() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig( + enabled_plugins=["GitHubTokenDetector"], + exclude_lines_regex=["^ignore me$"], + ) + ) + + content = "\n".join( + [ + "line 1", + "ignore me", + "github_token = 'ghp_123456789012345678901234567890123456'", + ] + ) + result = await evaluator.evaluate(content) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["findings"] == [{"type": "GitHub Token", "line_number": 3}] + + +def test_invalid_regex_is_rejected() -> None: + with pytest.raises(ValueError, match="Invalid RE2 pattern"): + DetectSecretsEvaluatorConfig(exclude_lines_regex=["("]) + + +def test_unknown_plugin_is_rejected() -> None: + with pytest.raises(ValueError, match="Unknown detect-secrets plugin"): + DetectSecretsEvaluatorConfig(enabled_plugins=["NoSuchPlugin"]) + + +@pytest.mark.asyncio +async def test_omitted_enabled_plugins_uses_upstream_defaults() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + sample = "api_key = 'abcdefghijklmnopqrstuvwxyz0123456789ABCDE='" + + result = await evaluator.evaluate(sample) + + assert result.matched is True + assert result.metadata is not None + finding_types = {finding["type"] for finding in result.metadata["findings"]} + assert "Secret Keyword" in finding_types + + +def test_normalize_payload_renders_expected_json_pointer_lines() -> None: + normalized = normalize_payload({"outer": [{"inner": "secret"}]}) + + assert normalized.payload_type == "dict" + assert normalized.json_pointers_by_line[4] == "/outer/0/inner" + + +def test_entry_point_is_registered() -> None: + evaluator_entry_points = { + entry_point.name: entry_point.value + for entry_point in entry_points(group="agent_control.evaluators") + } + + assert evaluator_entry_points["yelp.detect_secrets"] == ( + "agent_control_evaluator_detect_secrets.detect_secrets:DetectSecretsEvaluator" + ) From b328619286242a456fa193391f7050104363a0f0 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 19:36:12 -0700 Subject: [PATCH 02/20] fix: preserve pointers for structured key findings --- .../detect_secrets/normalization.py | 13 ++++++++++ .../tests/detect_secrets/test_evaluator.py | 25 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index 61b06621..b72587e1 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -149,6 +149,7 @@ def _render_dict_lines( prefix=child_prefix, pointer=child_pointer, ) + child_lines = _attach_container_pointer(child, child_lines, child_pointer) child_lines[-1] = RenderedLine( text=f"{child_lines[-1].text}{suffix}", json_pointer=child_lines[-1].json_pointer, @@ -182,6 +183,7 @@ def _render_list_lines( prefix="", pointer=child_pointer, ) + child_lines = _attach_container_pointer(child, child_lines, child_pointer) child_lines[-1] = RenderedLine( text=f"{child_lines[-1].text}{suffix}", json_pointer=child_lines[-1].json_pointer, @@ -206,6 +208,17 @@ def _json_object_key_name(key: Any) -> str: raise TypeError(f"Unsupported JSON object key type: {type(key).__name__}") +def _attach_container_pointer( + child: Any, + child_lines: list[RenderedLine], + child_pointer: str, +) -> list[RenderedLine]: + if isinstance(child, dict | list) and child_lines: + first_line = child_lines[0] + child_lines[0] = RenderedLine(text=first_line.text, json_pointer=child_pointer) + return child_lines + + def _append_json_pointer(pointer: str, segment: str) -> str: escaped = segment.replace("~", "~0").replace("/", "~1") return f"{pointer}/{escaped}" if pointer else f"/{escaped}" diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index a78aec9e..368da3e4 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -87,6 +87,31 @@ async def test_dict_payload_maps_findings_to_json_pointer() -> None: ] +@pytest.mark.asyncio +async def test_dict_key_with_container_value_maps_findings_to_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + "ghp_123456789012345678901234567890123456": { + "nested": "safe", + } + } + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "/ghp_123456789012345678901234567890123456", + } + ] + + @pytest.mark.asyncio async def test_list_payload_maps_findings_to_json_pointer() -> None: evaluator = DetectSecretsEvaluator( From 0eb63a72d47f0726074dcdce9e40aae137f16a7a Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 19:42:14 -0700 Subject: [PATCH 03/20] fix: avoid leaking secret-bearing key paths --- .../detect_secrets/evaluator.py | 61 +++++++++++++- .../detect_secrets/normalization.py | 83 +++++++++++++++---- .../tests/detect_secrets/test_evaluator.py | 32 ++++++- 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 6573a104..66c0fe7a 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -9,6 +9,7 @@ from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator from agent_control_models import EvaluatorResult from detect_secrets_async import ( + DetectSecretsRuntime, RuntimeScanError, ScanConfig, ScanRequest, @@ -20,6 +21,7 @@ DetectSecretsEvaluatorConfig, ) from agent_control_evaluator_detect_secrets.detect_secrets.normalization import ( + LineLocation, NormalizationError, NormalizedPayload, apply_line_exclusions, @@ -113,19 +115,28 @@ async def evaluate(self, data: Any) -> EvaluatorResult: detect_secrets_version=runtime_info.detect_secrets_version, ) - findings = self._map_findings(normalized, scan_result.findings) + findings = await self._map_findings( + normalized=normalized, + findings=scan_result.findings, + runtime=runtime, + scan_config=request.config, + ) return self._success_result( normalized=normalized, detect_secrets_version=scan_result.detect_secrets_version, findings=findings, ) - def _map_findings( + async def _map_findings( self, + *, normalized: NormalizedPayload, findings: tuple[Any, ...], + runtime: DetectSecretsRuntime, + scan_config: ScanConfig, ) -> list[dict[str, Any]]: mapped: list[dict[str, Any]] = [] + key_probe_cache: dict[str, set[str]] = {} for finding in findings: finding_metadata: dict[str, Any] = {"type": finding.type} @@ -134,7 +145,14 @@ def _map_findings( finding_metadata["line_number"] = finding.line_number elif normalized.payload_type in {"dict", "list"}: if finding.line_number is not None: - json_pointer = normalized.json_pointers_by_line.get(finding.line_number) + location = normalized.line_locations_by_line.get(finding.line_number) + json_pointer = await self._safe_structured_pointer( + location=location, + finding_type=finding.type, + runtime=runtime, + scan_config=scan_config, + key_probe_cache=key_probe_cache, + ) if json_pointer is not None: finding_metadata["json_pointer"] = json_pointer @@ -142,6 +160,43 @@ def _map_findings( return mapped + async def _safe_structured_pointer( + self, + *, + location: LineLocation | None, + finding_type: str, + runtime: DetectSecretsRuntime, + scan_config: ScanConfig, + key_probe_cache: dict[str, set[str]], + ) -> str | None: + if location is None: + return None + + if location.key_probe_text is None: + return location.json_pointer + + key_probe_text = location.key_probe_text + probe_findings = key_probe_cache.get(key_probe_text) + if probe_findings is None: + try: + probe_result = await runtime.scan( + ScanRequest( + content=key_probe_text, + timeout_ms=self.config.timeout_ms, + config=scan_config, + ) + ) + except RuntimeScanError: + return location.parent_pointer + + probe_findings = {finding.type for finding in probe_result.findings} + key_probe_cache[key_probe_text] = probe_findings + + if finding_type in probe_findings: + return location.parent_pointer + + return location.json_pointer + def _success_result( self, *, diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index b72587e1..d506c1d5 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -19,7 +19,16 @@ class NormalizedPayload: payload_type: NormalizedPayloadType text: str | None - json_pointers_by_line: dict[int, str] + line_locations_by_line: dict[int, LineLocation] + + +@dataclass(frozen=True, slots=True) +class LineLocation: + """Safe structured-location metadata for a rendered line.""" + + json_pointer: str | None + parent_pointer: str | None = None + key_probe_text: str | None = None @dataclass(frozen=True, slots=True) @@ -27,16 +36,16 @@ class RenderedLine: """A rendered JSON line plus optional structural pointer metadata.""" text: str - json_pointer: str | None = None + location: LineLocation | None = None def normalize_payload(data: Any) -> NormalizedPayload: """Normalize selector output to deterministic text for detect-secrets scanning.""" if data is None: - return NormalizedPayload(payload_type="none", text=None, json_pointers_by_line={}) + return NormalizedPayload(payload_type="none", text=None, line_locations_by_line={}) if isinstance(data, str): - return NormalizedPayload(payload_type="str", text=data, json_pointers_by_line={}) + return NormalizedPayload(payload_type="str", text=data, line_locations_by_line={}) if isinstance(data, dict): return _normalize_structured_payload(data, payload_type="dict") @@ -81,15 +90,15 @@ def _normalize_structured_payload( if rendered_text != text: raise NormalizationError("Structured payload rendering mismatch during normalization") - json_pointers_by_line = { - line_number: rendered_line.json_pointer + line_locations_by_line = { + line_number: rendered_line.location for line_number, rendered_line in enumerate(rendered_lines, start=1) - if rendered_line.json_pointer is not None + if rendered_line.location is not None } return NormalizedPayload( payload_type=payload_type, text=text, - json_pointers_by_line=json_pointers_by_line, + line_locations_by_line=line_locations_by_line, ) @@ -99,7 +108,7 @@ def _normalize_primitive_payload(data: bool | int | float) -> NormalizedPayload: except (TypeError, ValueError) as exc: raise NormalizationError(f"Failed to normalize scalar payload: {exc}") from exc - return NormalizedPayload(payload_type="primitive", text=text, json_pointers_by_line={}) + return NormalizedPayload(payload_type="primitive", text=text, line_locations_by_line={}) def _render_json_lines( @@ -119,7 +128,12 @@ def _render_json_lines( scalar_text = json.dumps(value, ensure_ascii=False, allow_nan=False) scalar_pointer = pointer or None - return [RenderedLine(text=f"{indent}{prefix}{scalar_text}", json_pointer=scalar_pointer)] + return [ + RenderedLine( + text=f"{indent}{prefix}{scalar_text}", + location=LineLocation(json_pointer=scalar_pointer), + ) + ] def _render_dict_lines( @@ -149,10 +163,16 @@ def _render_dict_lines( prefix=child_prefix, pointer=child_pointer, ) - child_lines = _attach_container_pointer(child, child_lines, child_pointer) + child_lines = _attach_dict_child_location( + child=child, + child_lines=child_lines, + child_pointer=child_pointer, + parent_pointer=pointer or None, + key_literal=key_literal, + ) child_lines[-1] = RenderedLine( text=f"{child_lines[-1].text}{suffix}", - json_pointer=child_lines[-1].json_pointer, + location=child_lines[-1].location, ) lines.extend(child_lines) @@ -183,10 +203,10 @@ def _render_list_lines( prefix="", pointer=child_pointer, ) - child_lines = _attach_container_pointer(child, child_lines, child_pointer) + child_lines = _attach_list_child_location(child, child_lines, child_pointer) child_lines[-1] = RenderedLine( text=f"{child_lines[-1].text}{suffix}", - json_pointer=child_lines[-1].json_pointer, + location=child_lines[-1].location, ) lines.extend(child_lines) @@ -208,17 +228,48 @@ def _json_object_key_name(key: Any) -> str: raise TypeError(f"Unsupported JSON object key type: {type(key).__name__}") -def _attach_container_pointer( +def _attach_dict_child_location( + child: Any, + child_lines: list[RenderedLine], + child_pointer: str, + parent_pointer: str | None, + key_literal: str, +) -> list[RenderedLine]: + if child_lines: + first_line = child_lines[0] + child_lines[0] = RenderedLine( + text=first_line.text, + location=LineLocation( + json_pointer=child_pointer, + parent_pointer=parent_pointer, + key_probe_text=_build_key_probe_text(key_literal, child), + ), + ) + return child_lines + + +def _attach_list_child_location( child: Any, child_lines: list[RenderedLine], child_pointer: str, ) -> list[RenderedLine]: if isinstance(child, dict | list) and child_lines: first_line = child_lines[0] - child_lines[0] = RenderedLine(text=first_line.text, json_pointer=child_pointer) + child_lines[0] = RenderedLine( + text=first_line.text, + location=LineLocation(json_pointer=child_pointer), + ) return child_lines +def _build_key_probe_text(key_literal: str, child: Any) -> str: + if isinstance(child, dict): + return f"{key_literal}: {{}}" + if isinstance(child, list): + return f"{key_literal}: []" + return f"{key_literal}: null" + + def _append_json_pointer(pointer: str, segment: str) -> str: escaped = segment.replace("~", "~0").replace("/", "~1") return f"{pointer}/{escaped}" if pointer else f"/{escaped}" diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 368da3e4..1545eae7 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -95,8 +95,10 @@ async def test_dict_key_with_container_value_maps_findings_to_json_pointer() -> result = await evaluator.evaluate( { - "ghp_123456789012345678901234567890123456": { - "nested": "safe", + "outer": { + "ghp_123456789012345678901234567890123456": { + "nested": "safe", + } } } ) @@ -107,7 +109,29 @@ async def test_dict_key_with_container_value_maps_findings_to_json_pointer() -> assert result.metadata["findings"] == [ { "type": "GitHub Token", - "json_pointer": "/ghp_123456789012345678901234567890123456", + "json_pointer": "/outer", + } + ] + + +@pytest.mark.asyncio +async def test_dict_key_with_scalar_value_omits_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + "ghp_123456789012345678901234567890123456": "safe", + } + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", } ] @@ -286,7 +310,7 @@ def test_normalize_payload_renders_expected_json_pointer_lines() -> None: normalized = normalize_payload({"outer": [{"inner": "secret"}]}) assert normalized.payload_type == "dict" - assert normalized.json_pointers_by_line[4] == "/outer/0/inner" + assert normalized.line_locations_by_line[4].json_pointer == "/outer/0/inner" def test_entry_point_is_registered() -> None: From ebe870e10b3e1071e85e4dab31b0fa730ca591ad Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 19:47:54 -0700 Subject: [PATCH 04/20] fix: batch structured key probes --- .../detect_secrets/evaluator.py | 99 ++++++++++++++----- .../tests/detect_secrets/test_evaluator.py | 75 +++++++++++++- 2 files changed, 149 insertions(+), 25 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 66c0fe7a..95ddfd03 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from importlib.metadata import PackageNotFoundError, version from typing import Any @@ -67,6 +68,7 @@ def __init__(self, config: DetectSecretsEvaluatorConfig) -> None: async def evaluate(self, data: Any) -> EvaluatorResult: """Normalize selector output, run detect-secrets, and map results into EvaluatorResult.""" + started_at = time.monotonic() runtime_info = get_runtime_info() try: @@ -120,6 +122,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: findings=scan_result.findings, runtime=runtime, scan_config=request.config, + started_at=started_at, ) return self._success_result( normalized=normalized, @@ -134,9 +137,16 @@ async def _map_findings( findings: tuple[Any, ...], runtime: DetectSecretsRuntime, scan_config: ScanConfig, + started_at: float, ) -> list[dict[str, Any]]: mapped: list[dict[str, Any]] = [] - key_probe_cache: dict[str, set[str]] = {} + key_probe_results = await self._collect_key_probe_results( + normalized=normalized, + findings=findings, + runtime=runtime, + scan_config=scan_config, + started_at=started_at, + ) for finding in findings: finding_metadata: dict[str, Any] = {"type": finding.type} @@ -149,9 +159,7 @@ async def _map_findings( json_pointer = await self._safe_structured_pointer( location=location, finding_type=finding.type, - runtime=runtime, - scan_config=scan_config, - key_probe_cache=key_probe_cache, + key_probe_results=key_probe_results, ) if json_pointer is not None: finding_metadata["json_pointer"] = json_pointer @@ -165,9 +173,7 @@ async def _safe_structured_pointer( *, location: LineLocation | None, finding_type: str, - runtime: DetectSecretsRuntime, - scan_config: ScanConfig, - key_probe_cache: dict[str, set[str]], + key_probe_results: dict[str, set[str]] | None, ) -> str | None: if location is None: return None @@ -175,28 +181,75 @@ async def _safe_structured_pointer( if location.key_probe_text is None: return location.json_pointer - key_probe_text = location.key_probe_text - probe_findings = key_probe_cache.get(key_probe_text) - if probe_findings is None: - try: - probe_result = await runtime.scan( - ScanRequest( - content=key_probe_text, - timeout_ms=self.config.timeout_ms, - config=scan_config, - ) - ) - except RuntimeScanError: - return location.parent_pointer - - probe_findings = {finding.type for finding in probe_result.findings} - key_probe_cache[key_probe_text] = probe_findings + if key_probe_results is None: + return location.parent_pointer + probe_findings = key_probe_results.get(location.key_probe_text, set()) if finding_type in probe_findings: return location.parent_pointer return location.json_pointer + async def _collect_key_probe_results( + self, + *, + normalized: NormalizedPayload, + findings: tuple[Any, ...], + runtime: DetectSecretsRuntime, + scan_config: ScanConfig, + started_at: float, + ) -> dict[str, set[str]] | None: + if normalized.payload_type not in {"dict", "list"}: + return {} + + key_probe_texts = self._collect_unique_key_probe_texts(normalized, findings) + if not key_probe_texts: + return {} + + remaining_ms = int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) + if remaining_ms <= 0: + return None + + try: + probe_result = await runtime.scan( + ScanRequest( + content="\n".join(key_probe_texts), + timeout_ms=remaining_ms, + config=scan_config, + ) + ) + except RuntimeScanError: + return None + + results_by_probe = {probe_text: set[str]() for probe_text in key_probe_texts} + for finding in probe_result.findings: + if finding.line_number is None: + continue + line_index = finding.line_number - 1 + if 0 <= line_index < len(key_probe_texts): + results_by_probe[key_probe_texts[line_index]].add(finding.type) + return results_by_probe + + def _collect_unique_key_probe_texts( + self, + normalized: NormalizedPayload, + findings: tuple[Any, ...], + ) -> list[str]: + key_probe_texts: list[str] = [] + seen: set[str] = set() + + for finding in findings: + if finding.line_number is None: + continue + location = normalized.line_locations_by_line.get(finding.line_number) + key_probe_text = None if location is None else location.key_probe_text + if key_probe_text is None or key_probe_text in seen: + continue + seen.add(key_probe_text) + key_probe_texts.append(key_probe_text) + + return key_probe_texts + def _success_result( self, *, diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 1545eae7..7a398859 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -4,14 +4,24 @@ from typing import Any import pytest -from detect_secrets_async import RuntimeScanError, ScanFailureCode, get_runtime_info +from detect_secrets_async import ( + RuntimeScanError, + ScanFailureCode, + ScanFinding, + ScanResult, + get_runtime_info, +) from agent_control_evaluator_detect_secrets.detect_secrets import ( DetectSecretsEvaluator, DetectSecretsEvaluatorConfig, ) from agent_control_evaluator_detect_secrets.detect_secrets.evaluator import FAILURE_MESSAGES -from agent_control_evaluator_detect_secrets.detect_secrets.normalization import normalize_payload +from agent_control_evaluator_detect_secrets.detect_secrets.normalization import ( + LineLocation, + NormalizedPayload, + normalize_payload, +) @pytest.mark.asyncio @@ -244,6 +254,67 @@ async def scan(self, request: Any) -> Any: assert FAILURE_MESSAGES["worker_crash"] in result.message +@pytest.mark.asyncio +async def test_structured_key_probes_are_batched(monkeypatch: pytest.MonkeyPatch) -> None: + normalized = NormalizedPayload( + payload_type="dict", + text='{"ignored": true}', + line_locations_by_line={ + 1: LineLocation( + json_pointer="/safe-one", + parent_pointer="/parent-one", + key_probe_text='"probe-one": null', + ), + 2: LineLocation( + json_pointer="/safe-two", + parent_pointer="/parent-two", + key_probe_text='"probe-two": null', + ), + }, + ) + + class FakeRuntime: + def __init__(self) -> None: + self.requests: list[Any] = [] + + async def scan(self, request: Any) -> ScanResult: + self.requests.append(request) + if len(self.requests) == 1: + return ScanResult( + findings=( + ScanFinding(type="GitHub Token", line_number=1), + ScanFinding(type="GitHub Token", line_number=2), + ), + detect_secrets_version="1.5.0", + ) + return ScanResult( + findings=(ScanFinding(type="GitHub Token", line_number=1),), + detect_secrets_version="1.5.0", + ) + + fake_runtime = FakeRuntime() + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.normalize_payload", + lambda data: normalized, + ) + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda: fake_runtime, + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + result = await evaluator.evaluate({"ignored": "ignored"}) + + assert result.matched is True + assert result.metadata is not None + assert fake_runtime.requests[1].content == '"probe-one": null\n"probe-two": null' + assert len(fake_runtime.requests) == 2 + assert result.metadata["findings"] == [ + {"type": "GitHub Token", "json_pointer": "/parent-one"}, + {"type": "GitHub Token", "json_pointer": "/safe-two"}, + ] + + @pytest.mark.asyncio async def test_exclude_lines_regex_suppresses_findings() -> None: evaluator = DetectSecretsEvaluator( From f585261578b5b09dbb9737b9fe306a4fd8fc8236 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 19:52:29 -0700 Subject: [PATCH 05/20] fix: apply timeout budget to initial scan --- .../detect_secrets/evaluator.py | 10 ++++- .../tests/detect_secrets/test_evaluator.py | 37 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 95ddfd03..042a8cc4 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -98,7 +98,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: request = ScanRequest( content=filtered_text, - timeout_ms=self.config.timeout_ms, + timeout_ms=self._bounded_remaining_timeout_ms(started_at), config=ScanConfig( enabled_plugins=tuple(self.config.enabled_plugins) if self.config.enabled_plugins is not None @@ -206,7 +206,7 @@ async def _collect_key_probe_results( if not key_probe_texts: return {} - remaining_ms = int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) + remaining_ms = self._remaining_timeout_ms(started_at) if remaining_ms <= 0: return None @@ -250,6 +250,12 @@ def _collect_unique_key_probe_texts( return key_probe_texts + def _remaining_timeout_ms(self, started_at: float) -> int: + return int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) + + def _bounded_remaining_timeout_ms(self, started_at: float) -> int: + return max(1, self._remaining_timeout_ms(started_at)) + def _success_result( self, *, diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 7a398859..2fa4b15b 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -315,6 +315,43 @@ async def scan(self, request: Any) -> ScanResult: ] +@pytest.mark.asyncio +async def test_initial_scan_uses_remaining_timeout_budget( + monkeypatch: pytest.MonkeyPatch, +) -> None: + class FakeRuntime: + def __init__(self) -> None: + self.requests: list[Any] = [] + + async def scan(self, request: Any) -> ScanResult: + self.requests.append(request) + return ScanResult(findings=(), detect_secrets_version="1.5.0") + + fake_runtime = FakeRuntime() + monotonic_values = [100.0, 100.04] + + def fake_monotonic() -> float: + if monotonic_values: + return monotonic_values.pop(0) + return 100.04 + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda: fake_runtime, + ) + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.time.monotonic", + fake_monotonic, + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(timeout_ms=50)) + result = await evaluator.evaluate("safe content only") + + assert result.matched is False + assert len(fake_runtime.requests) == 1 + assert 1 <= fake_runtime.requests[0].timeout_ms < 50 + + @pytest.mark.asyncio async def test_exclude_lines_regex_suppresses_findings() -> None: evaluator = DetectSecretsEvaluator( From e363e0a7137b018994dbd25edea7dade26dd3b4c Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 20:04:46 -0700 Subject: [PATCH 06/20] fix: tighten detect-secrets pointer mapping --- .../contrib/detect_secrets/pyproject.toml | 5 + .../detect_secrets/config.py | 2 + .../detect_secrets/evaluator.py | 192 ++++++++++++------ .../tests/detect_secrets/test_evaluator.py | 71 ++----- 4 files changed, 152 insertions(+), 118 deletions(-) diff --git a/evaluators/contrib/detect_secrets/pyproject.toml b/evaluators/contrib/detect_secrets/pyproject.toml index 5346601a..48a6a70a 100644 --- a/evaluators/contrib/detect_secrets/pyproject.toml +++ b/evaluators/contrib/detect_secrets/pyproject.toml @@ -9,6 +9,7 @@ authors = [{ name = "Agent Control Team" }] dependencies = [ "agent-control-evaluators>=3.0.0", "agent-control-models>=3.0.0", + "detect-secrets==1.5.0", "detect-secrets-async>=0.2.0,<0.3.0", "google-re2>=1.1", "pydantic>=2.12.4", @@ -43,6 +44,10 @@ files = ["src", "tests"] module = "re2" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "detect_secrets.*" +ignore_missing_imports = true + [tool.uv.sources] agent-control-evaluators = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py index 59f5876c..5fe02f2f 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py @@ -67,6 +67,8 @@ def validate_enabled_plugins(cls, value: list[str] | None) -> list[str] | None: def validate_exclude_lines_regex(cls, value: list[str]) -> list[str]: """Validate each configured exclude pattern as a RE2 regex.""" for pattern in value: + if pattern == "": + raise ValueError("exclude_lines_regex entries must be non-empty") try: re2.compile(pattern) except re2.error as exc: diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 042a8cc4..0cb10a3f 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -2,17 +2,23 @@ from __future__ import annotations +import threading import time +from collections import Counter, defaultdict, deque +from contextlib import AbstractContextManager +from dataclasses import dataclass from importlib.metadata import PackageNotFoundError, version from typing import Any import re2 from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator from agent_control_models import EvaluatorResult +from detect_secrets.core.scan import scan_line +from detect_secrets.settings import default_settings, transient_settings from detect_secrets_async import ( - DetectSecretsRuntime, RuntimeScanError, ScanConfig, + ScanFinding, ScanRequest, get_runtime, get_runtime_info, @@ -46,6 +52,15 @@ "worker_protocol_error": "detect-secrets worker protocol error", "runtime_error": "detect-secrets runtime error", } +_LOCAL_SCAN_LOCK = threading.Lock() + + +@dataclass(frozen=True, slots=True) +class _LocalLineMatch: + """A direct detect-secrets match for a single rendered line.""" + + type: str + secret_value: str | None @register_evaluator @@ -119,8 +134,8 @@ async def evaluate(self, data: Any) -> EvaluatorResult: findings = await self._map_findings( normalized=normalized, + scanned_text=filtered_text, findings=scan_result.findings, - runtime=runtime, scan_config=request.config, started_at=started_at, ) @@ -134,16 +149,16 @@ async def _map_findings( self, *, normalized: NormalizedPayload, - findings: tuple[Any, ...], - runtime: DetectSecretsRuntime, + scanned_text: str, + findings: tuple[ScanFinding, ...], scan_config: ScanConfig, started_at: float, ) -> list[dict[str, Any]]: mapped: list[dict[str, Any]] = [] - key_probe_results = await self._collect_key_probe_results( + structured_pointer_assignments = self._build_structured_pointer_assignments( normalized=normalized, + scanned_text=scanned_text, findings=findings, - runtime=runtime, scan_config=scan_config, started_at=started_at, ) @@ -156,10 +171,10 @@ async def _map_findings( elif normalized.payload_type in {"dict", "list"}: if finding.line_number is not None: location = normalized.line_locations_by_line.get(finding.line_number) - json_pointer = await self._safe_structured_pointer( + json_pointer = self._safe_structured_pointer( location=location, - finding_type=finding.type, - key_probe_results=key_probe_results, + finding=finding, + structured_pointer_assignments=structured_pointer_assignments, ) if json_pointer is not None: finding_metadata["json_pointer"] = json_pointer @@ -168,12 +183,12 @@ async def _map_findings( return mapped - async def _safe_structured_pointer( + def _safe_structured_pointer( self, *, location: LineLocation | None, - finding_type: str, - key_probe_results: dict[str, set[str]] | None, + finding: ScanFinding, + structured_pointer_assignments: dict[int, deque[str | None]], ) -> str | None: if location is None: return None @@ -181,74 +196,131 @@ async def _safe_structured_pointer( if location.key_probe_text is None: return location.json_pointer - if key_probe_results is None: - return location.parent_pointer + assert finding.line_number is not None + line_assignments = structured_pointer_assignments.get(finding.line_number) + if line_assignments: + return line_assignments.popleft() - probe_findings = key_probe_results.get(location.key_probe_text, set()) - if finding_type in probe_findings: - return location.parent_pointer + return location.parent_pointer - return location.json_pointer - - async def _collect_key_probe_results( + def _build_structured_pointer_assignments( self, *, normalized: NormalizedPayload, - findings: tuple[Any, ...], - runtime: DetectSecretsRuntime, + scanned_text: str, + findings: tuple[ScanFinding, ...], scan_config: ScanConfig, started_at: float, - ) -> dict[str, set[str]] | None: + ) -> dict[int, deque[str | None]]: if normalized.payload_type not in {"dict", "list"}: return {} - key_probe_texts = self._collect_unique_key_probe_texts(normalized, findings) - if not key_probe_texts: + if self._remaining_timeout_ms(started_at) <= 0: return {} - remaining_ms = self._remaining_timeout_ms(started_at) - if remaining_ms <= 0: - return None + findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) + for finding in findings: + if finding.line_number is None: + continue + findings_by_line[finding.line_number].append(finding) - try: - probe_result = await runtime.scan( - ScanRequest( - content="\n".join(key_probe_texts), - timeout_ms=remaining_ms, - config=scan_config, - ) - ) - except RuntimeScanError: - return None + if not findings_by_line: + return {} - results_by_probe = {probe_text: set[str]() for probe_text in key_probe_texts} - for finding in probe_result.findings: - if finding.line_number is None: + scanned_lines = scanned_text.splitlines() + assignments_by_line: dict[int, deque[str | None]] = {} + for line_number, line_findings in findings_by_line.items(): + location = normalized.line_locations_by_line.get(line_number) + if location is None or location.key_probe_text is None: continue - line_index = finding.line_number - 1 - if 0 <= line_index < len(key_probe_texts): - results_by_probe[key_probe_texts[line_index]].add(finding.type) - return results_by_probe - def _collect_unique_key_probe_texts( - self, - normalized: NormalizedPayload, - findings: tuple[Any, ...], - ) -> list[str]: - key_probe_texts: list[str] = [] - seen: set[str] = set() + if self._remaining_timeout_ms(started_at) <= 0: + break - for finding in findings: - if finding.line_number is None: + line_index = line_number - 1 + if not 0 <= line_index < len(scanned_lines): continue - location = normalized.line_locations_by_line.get(finding.line_number) - key_probe_text = None if location is None else location.key_probe_text - if key_probe_text is None or key_probe_text in seen: + + full_matches = self._scan_line_matches(scanned_lines[line_index], scan_config) + if not full_matches: continue - seen.add(key_probe_text) - key_probe_texts.append(key_probe_text) + probe_matches = self._scan_line_matches(location.key_probe_text, scan_config) + line_assignments = self._assign_structured_line_pointers( + location=location, + line_findings=line_findings, + full_matches=full_matches, + probe_matches=probe_matches, + ) + assignments_by_line[line_number] = deque(line_assignments) + + return assignments_by_line + + def _assign_structured_line_pointers( + self, + *, + location: LineLocation, + line_findings: list[ScanFinding], + full_matches: tuple[_LocalLineMatch, ...], + probe_matches: tuple[_LocalLineMatch, ...], + ) -> list[str | None]: + if len(full_matches) != len(line_findings): + return self._fallback_structured_line_pointers( + location=location, + line_findings=line_findings, + probe_matches=probe_matches, + ) + + probe_match_counts = Counter((match.type, match.secret_value) for match in probe_matches) + line_pointers: list[str | None] = [] + for finding, full_match in zip(line_findings, full_matches, strict=True): + if finding.type != full_match.type: + return self._fallback_structured_line_pointers( + location=location, + line_findings=line_findings, + probe_matches=probe_matches, + ) + + match_key = (full_match.type, full_match.secret_value) + if probe_match_counts[match_key] > 0: + line_pointers.append(location.parent_pointer) + probe_match_counts[match_key] -= 1 + else: + line_pointers.append(location.json_pointer) - return key_probe_texts + return line_pointers + + def _fallback_structured_line_pointers( + self, + *, + location: LineLocation, + line_findings: list[ScanFinding], + probe_matches: tuple[_LocalLineMatch, ...], + ) -> list[str | None]: + probe_types = {match.type for match in probe_matches} + return [ + location.parent_pointer if finding.type in probe_types else location.json_pointer + for finding in line_findings + ] + + def _scan_line_matches( + self, + line: str, + scan_config: ScanConfig, + ) -> tuple[_LocalLineMatch, ...]: + with _LOCAL_SCAN_LOCK: + with self._scan_settings(scan_config): + return tuple( + _LocalLineMatch(type=secret.type, secret_value=secret.secret_value) + for secret in scan_line(line) + ) + + def _scan_settings(self, scan_config: ScanConfig) -> AbstractContextManager[object]: + if scan_config.enabled_plugins is None: + return default_settings() + + return transient_settings( + {"plugins_used": [{"name": plugin_name} for plugin_name in scan_config.enabled_plugins]} + ) def _remaining_timeout_ms(self, started_at: float) -> int: return int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 2fa4b15b..605bae29 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -7,7 +7,6 @@ from detect_secrets_async import ( RuntimeScanError, ScanFailureCode, - ScanFinding, ScanResult, get_runtime_info, ) @@ -17,11 +16,7 @@ DetectSecretsEvaluatorConfig, ) from agent_control_evaluator_detect_secrets.detect_secrets.evaluator import FAILURE_MESSAGES -from agent_control_evaluator_detect_secrets.detect_secrets.normalization import ( - LineLocation, - NormalizedPayload, - normalize_payload, -) +from agent_control_evaluator_detect_secrets.detect_secrets.normalization import normalize_payload @pytest.mark.asyncio @@ -255,63 +250,18 @@ async def scan(self, request: Any) -> Any: @pytest.mark.asyncio -async def test_structured_key_probes_are_batched(monkeypatch: pytest.MonkeyPatch) -> None: - normalized = NormalizedPayload( - payload_type="dict", - text='{"ignored": true}', - line_locations_by_line={ - 1: LineLocation( - json_pointer="/safe-one", - parent_pointer="/parent-one", - key_probe_text='"probe-one": null', - ), - 2: LineLocation( - json_pointer="/safe-two", - parent_pointer="/parent-two", - key_probe_text='"probe-two": null', - ), - }, - ) - - class FakeRuntime: - def __init__(self) -> None: - self.requests: list[Any] = [] - - async def scan(self, request: Any) -> ScanResult: - self.requests.append(request) - if len(self.requests) == 1: - return ScanResult( - findings=( - ScanFinding(type="GitHub Token", line_number=1), - ScanFinding(type="GitHub Token", line_number=2), - ), - detect_secrets_version="1.5.0", - ) - return ScanResult( - findings=(ScanFinding(type="GitHub Token", line_number=1),), - detect_secrets_version="1.5.0", - ) - - fake_runtime = FakeRuntime() - monkeypatch.setattr( - "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.normalize_payload", - lambda data: normalized, - ) - monkeypatch.setattr( - "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", - lambda: fake_runtime, - ) - +async def test_structured_key_probe_disambiguates_same_detector_type() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) - result = await evaluator.evaluate({"ignored": "ignored"}) + + result = await evaluator.evaluate({"secret": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="}) assert result.matched is True assert result.metadata is not None - assert fake_runtime.requests[1].content == '"probe-one": null\n"probe-two": null' - assert len(fake_runtime.requests) == 2 + assert result.metadata["normalized_payload_type"] == "dict" assert result.metadata["findings"] == [ - {"type": "GitHub Token", "json_pointer": "/parent-one"}, - {"type": "GitHub Token", "json_pointer": "/safe-two"}, + {"type": "Hex High Entropy String"}, + {"type": "Hex High Entropy String", "json_pointer": "/secret"}, + {"type": "Secret Keyword", "json_pointer": "/secret"}, ] @@ -396,6 +346,11 @@ def test_invalid_regex_is_rejected() -> None: DetectSecretsEvaluatorConfig(exclude_lines_regex=["("]) +def test_blank_regex_is_rejected() -> None: + with pytest.raises(ValueError, match="exclude_lines_regex entries must be non-empty"): + DetectSecretsEvaluatorConfig(exclude_lines_regex=[""]) + + def test_unknown_plugin_is_rejected() -> None: with pytest.raises(ValueError, match="Unknown detect-secrets plugin"): DetectSecretsEvaluatorConfig(enabled_plugins=["NoSuchPlugin"]) From 4cbcf8a91850930b16667147494a3a01414a1e6a Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Tue, 21 Apr 2026 20:17:09 -0700 Subject: [PATCH 07/20] fix: reuse detect-secrets runtime settings --- .github/workflows/ci.yml | 5 +- Makefile | 6 +- evaluators/contrib/detect_secrets/Makefile | 2 +- .../contrib/detect_secrets/pyproject.toml | 5 - .../detect_secrets/evaluator.py | 147 +++++++++++------- .../tests/detect_secrets/test_evaluator.py | 46 +++++- 6 files changed, 140 insertions(+), 71 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 909b71b4..d5f5f64e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,10 +59,13 @@ jobs: - name: Test with coverage run: make test + - name: Test detect-secrets contrib evaluator + run: make detect-secrets-test + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: - files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml + files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml,coverage-evaluators-detect-secrets.xml fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} diff --git a/Makefile b/Makefile index e11ac6a0..dbd2ac9c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build galileo-test galileo-lint galileo-lint-fix galileo-typecheck galileo-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build galileo-test galileo-lint galileo-lint-fix galileo-typecheck galileo-build detect-secrets-test sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -18,6 +18,7 @@ ENGINE_DIR := engine TELEMETRY_DIR := telemetry EVALUATORS_DIR := evaluators/builtin GALILEO_DIR := evaluators/contrib/galileo +DETECT_SECRETS_DIR := evaluators/contrib/detect_secrets UI_DIR := ui help: @@ -253,6 +254,9 @@ ui-%: galileo-test: $(MAKE) -C $(GALILEO_DIR) test +detect-secrets-test: + $(MAKE) -C $(DETECT_SECRETS_DIR) test + galileo-lint: $(MAKE) -C $(GALILEO_DIR) lint diff --git a/evaluators/contrib/detect_secrets/Makefile b/evaluators/contrib/detect_secrets/Makefile index 3d413341..a68934d4 100644 --- a/evaluators/contrib/detect_secrets/Makefile +++ b/evaluators/contrib/detect_secrets/Makefile @@ -4,7 +4,7 @@ sync: uv sync --group dev test: - uv run --group dev pytest + uv run --group dev pytest --cov=src --cov-report=xml:../../../coverage-evaluators-detect-secrets.xml -q lint: uv run --group dev ruff check . diff --git a/evaluators/contrib/detect_secrets/pyproject.toml b/evaluators/contrib/detect_secrets/pyproject.toml index 48a6a70a..5346601a 100644 --- a/evaluators/contrib/detect_secrets/pyproject.toml +++ b/evaluators/contrib/detect_secrets/pyproject.toml @@ -9,7 +9,6 @@ authors = [{ name = "Agent Control Team" }] dependencies = [ "agent-control-evaluators>=3.0.0", "agent-control-models>=3.0.0", - "detect-secrets==1.5.0", "detect-secrets-async>=0.2.0,<0.3.0", "google-re2>=1.1", "pydantic>=2.12.4", @@ -44,10 +43,6 @@ files = ["src", "tests"] module = "re2" ignore_missing_imports = true -[[tool.mypy.overrides]] -module = "detect_secrets.*" -ignore_missing_imports = true - [tool.uv.sources] agent-control-evaluators = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 0cb10a3f..dd23e6d8 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -2,20 +2,17 @@ from __future__ import annotations -import threading import time from collections import Counter, defaultdict, deque -from contextlib import AbstractContextManager -from dataclasses import dataclass from importlib.metadata import PackageNotFoundError, version from typing import Any import re2 from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator from agent_control_models import EvaluatorResult -from detect_secrets.core.scan import scan_line -from detect_secrets.settings import default_settings, transient_settings from detect_secrets_async import ( + DetectSecretsRuntime, + RuntimeConfigConflictError, RuntimeScanError, ScanConfig, ScanFinding, @@ -52,15 +49,6 @@ "worker_protocol_error": "detect-secrets worker protocol error", "runtime_error": "detect-secrets runtime error", } -_LOCAL_SCAN_LOCK = threading.Lock() - - -@dataclass(frozen=True, slots=True) -class _LocalLineMatch: - """A direct detect-secrets match for a single rendered line.""" - - type: str - secret_value: str | None @register_evaluator @@ -122,8 +110,17 @@ async def evaluate(self, data: Any) -> EvaluatorResult: ) try: - runtime = get_runtime() + configured_runtime = runtime_info.configured_runtime + runtime = ( + get_runtime(configured_runtime) if configured_runtime is not None else get_runtime() + ) scan_result = await runtime.scan(request) + except RuntimeConfigConflictError: + return self._failure_result( + failure_mode="runtime_error", + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) except RuntimeScanError as exc: failure_mode = exc.code.value return self._failure_result( @@ -136,6 +133,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: normalized=normalized, scanned_text=filtered_text, findings=scan_result.findings, + runtime=runtime, scan_config=request.config, started_at=started_at, ) @@ -151,14 +149,16 @@ async def _map_findings( normalized: NormalizedPayload, scanned_text: str, findings: tuple[ScanFinding, ...], + runtime: DetectSecretsRuntime, scan_config: ScanConfig, started_at: float, ) -> list[dict[str, Any]]: mapped: list[dict[str, Any]] = [] - structured_pointer_assignments = self._build_structured_pointer_assignments( + structured_pointer_assignments = await self._build_structured_pointer_assignments( normalized=normalized, scanned_text=scanned_text, findings=findings, + runtime=runtime, scan_config=scan_config, started_at=started_at, ) @@ -203,12 +203,13 @@ def _safe_structured_pointer( return location.parent_pointer - def _build_structured_pointer_assignments( + async def _build_structured_pointer_assignments( self, *, normalized: NormalizedPayload, scanned_text: str, findings: tuple[ScanFinding, ...], + runtime: DetectSecretsRuntime, scan_config: ScanConfig, started_at: float, ) -> dict[int, deque[str | None]]: @@ -228,28 +229,52 @@ def _build_structured_pointer_assignments( return {} scanned_lines = scanned_text.splitlines() - assignments_by_line: dict[int, deque[str | None]] = {} - for line_number, line_findings in findings_by_line.items(): + candidate_lines: list[int] = [] + full_line_batch: list[str] = [] + probe_line_batch: list[str] = [] + + for line_number in sorted(findings_by_line): location = normalized.line_locations_by_line.get(line_number) if location is None or location.key_probe_text is None: continue - if self._remaining_timeout_ms(started_at) <= 0: - break - line_index = line_number - 1 if not 0 <= line_index < len(scanned_lines): continue - full_matches = self._scan_line_matches(scanned_lines[line_index], scan_config) - if not full_matches: - continue - probe_matches = self._scan_line_matches(location.key_probe_text, scan_config) + candidate_lines.append(line_number) + full_line_batch.append(scanned_lines[line_index]) + probe_line_batch.append(location.key_probe_text) + + if not candidate_lines: + return {} + + full_line_findings = await self._scan_line_batch( + runtime=runtime, + lines=full_line_batch, + scan_config=scan_config, + started_at=started_at, + ) + if full_line_findings is None: + return {} + + probe_line_findings = await self._scan_line_batch( + runtime=runtime, + lines=probe_line_batch, + scan_config=scan_config, + started_at=started_at, + ) + if probe_line_findings is None: + return {} + + assignments_by_line: dict[int, deque[str | None]] = {} + for batch_index, line_number in enumerate(candidate_lines, start=1): + location = normalized.line_locations_by_line[line_number] line_assignments = self._assign_structured_line_pointers( location=location, - line_findings=line_findings, - full_matches=full_matches, - probe_matches=probe_matches, + line_findings=findings_by_line[line_number], + full_line_findings=full_line_findings.get(batch_index, []), + probe_line_findings=probe_line_findings.get(batch_index, []), ) assignments_by_line[line_number] = deque(line_assignments) @@ -260,30 +285,29 @@ def _assign_structured_line_pointers( *, location: LineLocation, line_findings: list[ScanFinding], - full_matches: tuple[_LocalLineMatch, ...], - probe_matches: tuple[_LocalLineMatch, ...], + full_line_findings: list[ScanFinding], + probe_line_findings: list[ScanFinding], ) -> list[str | None]: - if len(full_matches) != len(line_findings): + if len(full_line_findings) != len(line_findings): return self._fallback_structured_line_pointers( location=location, line_findings=line_findings, - probe_matches=probe_matches, + probe_line_findings=probe_line_findings, ) - probe_match_counts = Counter((match.type, match.secret_value) for match in probe_matches) + probe_type_counts = Counter(finding.type for finding in probe_line_findings) line_pointers: list[str | None] = [] - for finding, full_match in zip(line_findings, full_matches, strict=True): - if finding.type != full_match.type: + for finding, full_line_finding in zip(line_findings, full_line_findings, strict=True): + if finding.type != full_line_finding.type: return self._fallback_structured_line_pointers( location=location, line_findings=line_findings, - probe_matches=probe_matches, + probe_line_findings=probe_line_findings, ) - match_key = (full_match.type, full_match.secret_value) - if probe_match_counts[match_key] > 0: + if probe_type_counts[full_line_finding.type] > 0: line_pointers.append(location.parent_pointer) - probe_match_counts[match_key] -= 1 + probe_type_counts[full_line_finding.type] -= 1 else: line_pointers.append(location.json_pointer) @@ -294,33 +318,44 @@ def _fallback_structured_line_pointers( *, location: LineLocation, line_findings: list[ScanFinding], - probe_matches: tuple[_LocalLineMatch, ...], + probe_line_findings: list[ScanFinding], ) -> list[str | None]: - probe_types = {match.type for match in probe_matches} + probe_types = {finding.type for finding in probe_line_findings} return [ location.parent_pointer if finding.type in probe_types else location.json_pointer for finding in line_findings ] - def _scan_line_matches( + async def _scan_line_batch( self, - line: str, + *, + runtime: DetectSecretsRuntime, + lines: list[str], scan_config: ScanConfig, - ) -> tuple[_LocalLineMatch, ...]: - with _LOCAL_SCAN_LOCK: - with self._scan_settings(scan_config): - return tuple( - _LocalLineMatch(type=secret.type, secret_value=secret.secret_value) - for secret in scan_line(line) + started_at: float, + ) -> dict[int, list[ScanFinding]] | None: + remaining_ms = self._remaining_timeout_ms(started_at) + if remaining_ms <= 0: + return None + + try: + result = await runtime.scan( + ScanRequest( + content="\n".join(lines), + timeout_ms=max(1, remaining_ms), + config=scan_config, ) + ) + except RuntimeScanError: + return None - def _scan_settings(self, scan_config: ScanConfig) -> AbstractContextManager[object]: - if scan_config.enabled_plugins is None: - return default_settings() + findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) + for finding in result.findings: + if finding.line_number is None: + continue + findings_by_line[finding.line_number].append(finding) - return transient_settings( - {"plugins_used": [{"name": plugin_name} for plugin_name in scan_config.enabled_plugins]} - ) + return findings_by_line def _remaining_timeout_ms(self, started_at: float) -> int: return int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 605bae29..e12fe723 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -5,6 +5,7 @@ import pytest from detect_secrets_async import ( + RuntimeConfig, RuntimeScanError, ScanFailureCode, ScanResult, @@ -213,7 +214,7 @@ async def scan(self, request: Any) -> Any: monkeypatch.setattr( "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", - lambda: FakeRuntime(), + lambda config=None: FakeRuntime(), ) evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error="deny")) @@ -235,7 +236,7 @@ async def scan(self, request: Any) -> Any: monkeypatch.setattr( "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", - lambda: FakeRuntime(), + lambda config=None: FakeRuntime(), ) evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) @@ -258,11 +259,42 @@ async def test_structured_key_probe_disambiguates_same_detector_type() -> None: assert result.matched is True assert result.metadata is not None assert result.metadata["normalized_payload_type"] == "dict" - assert result.metadata["findings"] == [ - {"type": "Hex High Entropy String"}, - {"type": "Hex High Entropy String", "json_pointer": "/secret"}, - {"type": "Secret Keyword", "json_pointer": "/secret"}, + assert result.metadata["findings_count"] == 3 + assert {"type": "Hex High Entropy String"} in result.metadata["findings"] + assert {"type": "Hex High Entropy String", "json_pointer": "/secret"} in result.metadata[ + "findings" ] + assert {"type": "Secret Keyword", "json_pointer": "/secret"} in result.metadata["findings"] + + +@pytest.mark.asyncio +async def test_preconfigured_runtime_is_reused(monkeypatch: pytest.MonkeyPatch) -> None: + class FakeRuntime: + async def scan(self, request: Any) -> ScanResult: + return ScanResult(findings=(), detect_secrets_version="1.5.0") + + runtime_config = RuntimeConfig(pool_size=2, max_queue_depth=6, max_requests_per_worker=40) + runtime_info = get_runtime_info().model_copy(update={"configured_runtime": runtime_config}) + runtime_calls: list[RuntimeConfig | None] = [] + + def fake_get_runtime(config: RuntimeConfig | None = None) -> FakeRuntime: + runtime_calls.append(config) + return FakeRuntime() + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime_info", + lambda: runtime_info, + ) + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + fake_get_runtime, + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + result = await evaluator.evaluate("safe content only") + + assert result.matched is False + assert runtime_calls == [runtime_config] @pytest.mark.asyncio @@ -287,7 +319,7 @@ def fake_monotonic() -> float: monkeypatch.setattr( "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", - lambda: fake_runtime, + lambda config=None: fake_runtime, ) monkeypatch.setattr( "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.time.monotonic", From aa7c61b18c24f480e29a7fe208090847039e14c5 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 13:35:33 -0700 Subject: [PATCH 08/20] fix: align detect-secrets contrib metadata --- evaluators/builtin/tests/test_contrib_packages.py | 1 + evaluators/contrib/detect_secrets/pyproject.toml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/evaluators/builtin/tests/test_contrib_packages.py b/evaluators/builtin/tests/test_contrib_packages.py index 9f25186d..0ff5f3fc 100644 --- a/evaluators/builtin/tests/test_contrib_packages.py +++ b/evaluators/builtin/tests/test_contrib_packages.py @@ -39,6 +39,7 @@ def test_discover_contrib_packages_returns_expected_metadata() -> None: assert [(package.name, package.package, package.extra) for package in packages] == [ ("budget", "agent-control-evaluator-budget", "budget"), ("cisco", "agent-control-evaluator-cisco", "cisco"), + ("detect_secrets", "agent-control-evaluator-detect_secrets", "detect_secrets"), ("galileo", "agent-control-evaluator-galileo", "galileo"), ] diff --git a/evaluators/contrib/detect_secrets/pyproject.toml b/evaluators/contrib/detect_secrets/pyproject.toml index 54c90bda..29892b50 100644 --- a/evaluators/contrib/detect_secrets/pyproject.toml +++ b/evaluators/contrib/detect_secrets/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "agent-control-evaluator-detect_secrets" -version = "0.1.0" +version = "7.6.0" description = "detect-secrets evaluator for agent-control" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-evaluators>=3.0.0", - "agent-control-models>=3.0.0", + "agent-control-evaluators>=7.5.0", + "agent-control-models>=7.5.0", "detect-secrets-async>=0.2.0,<0.3.0", "google-re2>=1.1", "pydantic>=2.12.4", From a4dc7e5e9041cad22b9661e8ab52855d22ba8b9d Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 13:48:51 -0700 Subject: [PATCH 09/20] fix: tighten detect-secrets pointer fallback --- .../detect_secrets/evaluator.py | 2 +- .../detect_secrets/normalization.py | 2 +- .../tests/detect_secrets/test_evaluator.py | 50 +++++++++++++++++++ sdks/python/pyproject.toml | 2 + server/pyproject.toml | 2 + 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 6b28560b..eba99061 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -201,7 +201,7 @@ def _safe_structured_pointer( if line_assignments: return line_assignments.popleft() - return location.parent_pointer + return None async def _build_structured_pointer_assignments( self, diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index d506c1d5..5a1e255d 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -167,7 +167,7 @@ def _render_dict_lines( child=child, child_lines=child_lines, child_pointer=child_pointer, - parent_pointer=pointer or None, + parent_pointer=pointer if pointer else ("" if isinstance(child, dict | list) else None), key_literal=key_literal, ) child_lines[-1] = RenderedLine( diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index e12fe723..341519d6 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -120,6 +120,31 @@ async def test_dict_key_with_container_value_maps_findings_to_json_pointer() -> ] +@pytest.mark.asyncio +async def test_root_dict_key_with_container_value_maps_findings_to_root_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + "ghp_123456789012345678901234567890123456": { + "nested": "safe", + } + } + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "", + } + ] + + @pytest.mark.asyncio async def test_dict_key_with_scalar_value_omits_json_pointer() -> None: evaluator = DetectSecretsEvaluator( @@ -267,6 +292,31 @@ async def test_structured_key_probe_disambiguates_same_detector_type() -> None: assert {"type": "Secret Keyword", "json_pointer": "/secret"} in result.metadata["findings"] +@pytest.mark.asyncio +async def test_structured_pointer_fallback_omits_ambiguous_pointer( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def fail_scan_line_batch(self: DetectSecretsEvaluator, **kwargs: Any) -> None: + return None + + monkeypatch.setattr( + DetectSecretsEvaluator, + "_scan_line_batch", + fail_scan_line_batch, + ) + + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + result = await evaluator.evaluate( + {"outer": {"token": "ghp_123456789012345678901234567890123456"}} + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["findings"] == [{"type": "GitHub Token"}] + + @pytest.mark.asyncio async def test_preconfigured_runtime_is_reused(monkeypatch: pytest.MonkeyPatch) -> None: class FakeRuntime: diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index 036c54e4..844fa64f 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -39,6 +39,7 @@ Repository = "https://github.com/yourusername/agent-control" strands-agents = ["strands-agents>=1.26.0"] google-adk = ["google-adk>=1.0.0"] galileo = ["agent-control-evaluator-galileo>=7.5.0"] +detect_secrets = ["agent-control-evaluator-detect_secrets>=7.5.0"] [dependency-groups] dev = [ @@ -92,3 +93,4 @@ agent-control-telemetry = { workspace = true } agent-control-evaluators = { workspace = true } # For local dev: use local galileo package instead of PyPI agent-control-evaluator-galileo = { path = "../../evaluators/contrib/galileo", editable = true } +agent-control-evaluator-detect_secrets = { path = "../../evaluators/contrib/detect_secrets", editable = true } diff --git a/server/pyproject.toml b/server/pyproject.toml index 6c28a317..c7c80d0a 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -32,6 +32,7 @@ license = {text = "Apache-2.0"} [project.optional-dependencies] galileo = ["agent-control-evaluator-galileo>=7.5.0"] +detect_secrets = ["agent-control-evaluator-detect_secrets>=7.5.0"] [dependency-groups] dev = [ @@ -99,3 +100,4 @@ agent-control-telemetry = { workspace = true } agent-control-evaluators = { workspace = true } # For local dev: use local galileo package instead of PyPI agent-control-evaluator-galileo = { path = "../evaluators/contrib/galileo", editable = true } +agent-control-evaluator-detect_secrets = { path = "../evaluators/contrib/detect_secrets", editable = true } From 93146784f2bd41ba02920c74e9b7f461b8a96c5d Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 13:59:58 -0700 Subject: [PATCH 10/20] fix: honor detect-secrets runtime failures --- .../detect_secrets/evaluator.py | 17 +++++++++++++- .../detect_secrets/normalization.py | 2 +- .../tests/detect_secrets/test_evaluator.py | 22 ++++++++++++++++++- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index eba99061..72f42394 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -72,7 +72,14 @@ def __init__(self, config: DetectSecretsEvaluatorConfig) -> None: async def evaluate(self, data: Any) -> EvaluatorResult: """Normalize selector output, run detect-secrets, and map results into EvaluatorResult.""" started_at = time.monotonic() - runtime_info = get_runtime_info() + try: + runtime_info = get_runtime_info() + except Exception: + return self._failure_result( + failure_mode="runtime_error", + normalized_payload_type=None, + detect_secrets_version="unknown", + ) try: normalized = normalize_payload(data) @@ -128,6 +135,12 @@ async def evaluate(self, data: Any) -> EvaluatorResult: normalized_payload_type=normalized.payload_type, detect_secrets_version=runtime_info.detect_secrets_version, ) + except Exception: + return self._failure_result( + failure_mode="runtime_error", + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) findings = await self._map_findings( normalized=normalized, @@ -348,6 +361,8 @@ async def _scan_line_batch( ) except RuntimeScanError: return None + except Exception: + return None findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) for finding in result.findings: diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index 5a1e255d..d7b776c3 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -167,7 +167,7 @@ def _render_dict_lines( child=child, child_lines=child_lines, child_pointer=child_pointer, - parent_pointer=pointer if pointer else ("" if isinstance(child, dict | list) else None), + parent_pointer=pointer if pointer else "", key_literal=key_literal, ) child_lines[-1] = RenderedLine( diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 341519d6..dbc39281 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -163,6 +163,7 @@ async def test_dict_key_with_scalar_value_omits_json_pointer() -> None: assert result.metadata["findings"] == [ { "type": "GitHub Token", + "json_pointer": "", } ] @@ -285,7 +286,7 @@ async def test_structured_key_probe_disambiguates_same_detector_type() -> None: assert result.metadata is not None assert result.metadata["normalized_payload_type"] == "dict" assert result.metadata["findings_count"] == 3 - assert {"type": "Hex High Entropy String"} in result.metadata["findings"] + assert {"type": "Hex High Entropy String", "json_pointer": ""} in result.metadata["findings"] assert {"type": "Hex High Entropy String", "json_pointer": "/secret"} in result.metadata[ "findings" ] @@ -347,6 +348,25 @@ def fake_get_runtime(config: RuntimeConfig | None = None) -> FakeRuntime: assert runtime_calls == [runtime_config] +@pytest.mark.asyncio +async def test_unexpected_runtime_errors_honor_on_error_deny( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda config=None: (_ for _ in ()).throw(ValueError("boom")), + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error="deny")) + result = await evaluator.evaluate("safe content only") + + assert result.matched is True + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "runtime_error" + assert result.metadata["fallback_action"] == "deny" + + @pytest.mark.asyncio async def test_initial_scan_uses_remaining_timeout_budget( monkeypatch: pytest.MonkeyPatch, From 21b8917ce8d2a01338222b6a2cff03d4d9780361 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 14:13:54 -0700 Subject: [PATCH 11/20] fix: canonicalize structured detect-secrets payloads --- .../detect_secrets/normalization.py | 36 +++++++++++--- .../tests/detect_secrets/test_evaluator.py | 48 +++++++++++++++++++ 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index d7b776c3..87255306 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -50,7 +50,7 @@ def normalize_payload(data: Any) -> NormalizedPayload: if isinstance(data, dict): return _normalize_structured_payload(data, payload_type="dict") - if isinstance(data, list): + if isinstance(data, list | tuple): return _normalize_structured_payload(data, payload_type="list") if isinstance(data, bool | int | float): @@ -72,17 +72,25 @@ def apply_line_exclusions(text: str, patterns: tuple[Any, ...]) -> str: def _normalize_structured_payload( - data: dict[Any, Any] | list[Any], + data: dict[Any, Any] | list[Any] | tuple[Any, ...], *, payload_type: Literal["dict", "list"], ) -> NormalizedPayload: + normalized_data = _normalize_json_value(data) + try: - text = json.dumps(data, sort_keys=True, indent=2, ensure_ascii=False, allow_nan=False) + text = json.dumps( + normalized_data, + sort_keys=True, + indent=2, + ensure_ascii=False, + allow_nan=False, + ) except (TypeError, ValueError) as exc: raise NormalizationError(f"Failed to normalize structured payload: {exc}") from exc try: - rendered_lines = _render_json_lines(data) + rendered_lines = _render_json_lines(normalized_data) except (TypeError, ValueError) as exc: raise NormalizationError(f"Failed to map structured payload lines: {exc}") from exc @@ -111,6 +119,20 @@ def _normalize_primitive_payload(data: bool | int | float) -> NormalizedPayload: return NormalizedPayload(payload_type="primitive", text=text, line_locations_by_line={}) +def _normalize_json_value(value: Any) -> Any: + """Convert supported Python payloads into a deterministic JSON-compatible shape.""" + if isinstance(value, dict): + return { + _json_object_key_name(raw_key): _normalize_json_value(child) + for raw_key, child in value.items() + } + + if isinstance(value, list | tuple): + return [_normalize_json_value(child) for child in value] + + return value + + def _render_json_lines( value: Any, *, @@ -123,7 +145,7 @@ def _render_json_lines( if isinstance(value, dict): return _render_dict_lines(value, indent_level=indent_level, prefix=prefix, pointer=pointer) - if isinstance(value, list): + if isinstance(value, list | tuple): return _render_list_lines(value, indent_level=indent_level, prefix=prefix, pointer=pointer) scalar_text = json.dumps(value, ensure_ascii=False, allow_nan=False) @@ -148,7 +170,7 @@ def _render_dict_lines( return [RenderedLine(text=f"{indent}{prefix}{{}}")] lines = [RenderedLine(text=f"{indent}{prefix}{{")] - items = sorted(value.items(), key=lambda item: item[0]) + items = sorted(value.items(), key=lambda item: _json_object_key_name(item[0])) last_index = len(items) - 1 for index, (raw_key, child) in enumerate(items): @@ -181,7 +203,7 @@ def _render_dict_lines( def _render_list_lines( - value: list[Any], + value: list[Any] | tuple[Any, ...], *, indent_level: int, prefix: str, diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index dbc39281..23af08c0 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -192,6 +192,54 @@ async def test_list_payload_maps_findings_to_json_pointer() -> None: ] +@pytest.mark.asyncio +async def test_tuple_payload_maps_findings_like_a_list() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + ( + {"token": "ghp_123456789012345678901234567890123456"}, + {"kind": "safe"}, + ) + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "list" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "/0/token", + } + ] + + +@pytest.mark.asyncio +async def test_mixed_key_types_still_normalize_and_scan() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + 1: "ghp_123456789012345678901234567890123456", + "kind": "safe", + } + ) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [ + { + "type": "GitHub Token", + "json_pointer": "/1", + } + ] + + @pytest.mark.asyncio async def test_primitive_payload_is_normalized_and_omits_line_numbers() -> None: evaluator = DetectSecretsEvaluator( From 56bbc415fb4a3c9cb0e6c32bd0c4740b4822213e Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 14:19:51 -0700 Subject: [PATCH 12/20] fix: reject colliding detect-secrets keys --- .../detect_secrets/normalization.py | 16 +++++++++++----- .../tests/detect_secrets/test_evaluator.py | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index 87255306..c3455bc3 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -76,7 +76,10 @@ def _normalize_structured_payload( *, payload_type: Literal["dict", "list"], ) -> NormalizedPayload: - normalized_data = _normalize_json_value(data) + try: + normalized_data = _normalize_json_value(data) + except (TypeError, ValueError) as exc: + raise NormalizationError(f"Failed to normalize structured payload: {exc}") from exc try: text = json.dumps( @@ -122,10 +125,13 @@ def _normalize_primitive_payload(data: bool | int | float) -> NormalizedPayload: def _normalize_json_value(value: Any) -> Any: """Convert supported Python payloads into a deterministic JSON-compatible shape.""" if isinstance(value, dict): - return { - _json_object_key_name(raw_key): _normalize_json_value(child) - for raw_key, child in value.items() - } + normalized_object: dict[str, Any] = {} + for raw_key, child in value.items(): + normalized_key = _json_object_key_name(raw_key) + if normalized_key in normalized_object: + raise ValueError(f"JSON key collision after normalization: {normalized_key!r}") + normalized_object[normalized_key] = _normalize_json_value(child) + return normalized_object if isinstance(value, list | tuple): return [_normalize_json_value(child) for child in value] diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 23af08c0..bb89a778 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -240,6 +240,25 @@ async def test_mixed_key_types_still_normalize_and_scan() -> None: ] +@pytest.mark.asyncio +async def test_colliding_normalized_keys_route_through_normalization_error() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate( + { + 1: "ghp_123456789012345678901234567890123456", + "1": "safe", + } + ) + + assert result.matched is False + assert result.error == "detect-secrets evaluator failure: normalization_error" + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + + @pytest.mark.asyncio async def test_primitive_payload_is_normalized_and_omits_line_numbers() -> None: evaluator = DetectSecretsEvaluator( From 1f65d22cb9bfc73c8d8640760db8457745dc0673 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 14:55:29 -0700 Subject: [PATCH 13/20] fix: harden detect-secrets result mapping --- .../detect_secrets/evaluator.py | 136 +++++++++++++----- .../detect_secrets/normalization.py | 4 + .../tests/detect_secrets/test_evaluator.py | 41 ++++++ 3 files changed, 146 insertions(+), 35 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 72f42394..44fea128 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -4,6 +4,7 @@ import time from collections import Counter, defaultdict, deque +from dataclasses import dataclass from importlib.metadata import PackageNotFoundError, version from typing import Any @@ -50,6 +51,16 @@ "runtime_error": "detect-secrets runtime error", } +IDENTIFIER_LIKE_KEY_PATTERN = re2.compile(r"^[A-Za-z_][A-Za-z0-9_.:-]{0,127}$") + + +@dataclass(frozen=True, slots=True) +class StructuredPointerAssignments: + """Structured-line pointer assignments plus secret-bearing key ancestry.""" + + by_line: dict[int, deque[str | None]] + secret_key_pointers: set[str] + @register_evaluator class DetectSecretsEvaluator(Evaluator[DetectSecretsEvaluatorConfig]): @@ -72,31 +83,31 @@ def __init__(self, config: DetectSecretsEvaluatorConfig) -> None: async def evaluate(self, data: Any) -> EvaluatorResult: """Normalize selector output, run detect-secrets, and map results into EvaluatorResult.""" started_at = time.monotonic() - try: - runtime_info = get_runtime_info() - except Exception: - return self._failure_result( - failure_mode="runtime_error", - normalized_payload_type=None, - detect_secrets_version="unknown", - ) - try: normalized = normalize_payload(data) except NormalizationError: return self._failure_result( failure_mode="normalization_error", normalized_payload_type=None, - detect_secrets_version=runtime_info.detect_secrets_version, + detect_secrets_version=self._runtime_version_or_unknown(), ) if normalized.payload_type == "none": return self._success_result( normalized=normalized, - detect_secrets_version=runtime_info.detect_secrets_version, + detect_secrets_version=self._runtime_version_or_unknown(), findings=[], ) + try: + runtime_info = get_runtime_info() + except Exception: + return self._failure_result( + failure_mode="runtime_error", + normalized_payload_type=None, + detect_secrets_version="unknown", + ) + assert normalized.text is not None filtered_text = apply_line_exclusions(normalized.text, self._exclude_line_patterns) if len(filtered_text.encode("utf-8")) > self.config.max_bytes: @@ -187,7 +198,8 @@ async def _map_findings( json_pointer = self._safe_structured_pointer( location=location, finding=finding, - structured_pointer_assignments=structured_pointer_assignments, + structured_pointer_assignments=structured_pointer_assignments.by_line, + secret_key_pointers=structured_pointer_assignments.secret_key_pointers, ) if json_pointer is not None: finding_metadata["json_pointer"] = json_pointer @@ -202,19 +214,24 @@ def _safe_structured_pointer( location: LineLocation | None, finding: ScanFinding, structured_pointer_assignments: dict[int, deque[str | None]], + secret_key_pointers: set[str], ) -> str | None: if location is None: return None if location.key_probe_text is None: - return location.json_pointer - - assert finding.line_number is not None - line_assignments = structured_pointer_assignments.get(finding.line_number) - if line_assignments: - return line_assignments.popleft() + pointer = location.json_pointer + else: + assert finding.line_number is not None + line_assignments = structured_pointer_assignments.get(finding.line_number) + if line_assignments: + pointer = line_assignments.popleft() + else: + return None - return None + if self._pointer_traverses_secret_key(pointer, secret_key_pointers): + return None + return pointer async def _build_structured_pointer_assignments( self, @@ -225,12 +242,12 @@ async def _build_structured_pointer_assignments( runtime: DetectSecretsRuntime, scan_config: ScanConfig, started_at: float, - ) -> dict[int, deque[str | None]]: + ) -> StructuredPointerAssignments: if normalized.payload_type not in {"dict", "list"}: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) if self._remaining_timeout_ms(started_at) <= 0: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) for finding in findings: @@ -239,7 +256,7 @@ async def _build_structured_pointer_assignments( findings_by_line[finding.line_number].append(finding) if not findings_by_line: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) scanned_lines = scanned_text.splitlines() candidate_lines: list[int] = [] @@ -260,7 +277,7 @@ async def _build_structured_pointer_assignments( probe_line_batch.append(location.key_probe_text) if not candidate_lines: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) full_line_findings = await self._scan_line_batch( runtime=runtime, @@ -269,7 +286,7 @@ async def _build_structured_pointer_assignments( started_at=started_at, ) if full_line_findings is None: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) probe_line_findings = await self._scan_line_batch( runtime=runtime, @@ -278,20 +295,25 @@ async def _build_structured_pointer_assignments( started_at=started_at, ) if probe_line_findings is None: - return {} + return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) assignments_by_line: dict[int, deque[str | None]] = {} + secret_key_pointers: set[str] = set() for batch_index, line_number in enumerate(candidate_lines, start=1): location = normalized.line_locations_by_line[line_number] - line_assignments = self._assign_structured_line_pointers( + line_assignments, line_secret_key_pointers = self._assign_structured_line_pointers( location=location, line_findings=findings_by_line[line_number], full_line_findings=full_line_findings.get(batch_index, []), probe_line_findings=probe_line_findings.get(batch_index, []), ) assignments_by_line[line_number] = deque(line_assignments) + secret_key_pointers.update(line_secret_key_pointers) - return assignments_by_line + return StructuredPointerAssignments( + by_line=assignments_by_line, + secret_key_pointers=secret_key_pointers, + ) def _assign_structured_line_pointers( self, @@ -300,7 +322,7 @@ def _assign_structured_line_pointers( line_findings: list[ScanFinding], full_line_findings: list[ScanFinding], probe_line_findings: list[ScanFinding], - ) -> list[str | None]: + ) -> tuple[list[str | None], set[str]]: if len(full_line_findings) != len(line_findings): return self._fallback_structured_line_pointers( location=location, @@ -310,6 +332,7 @@ def _assign_structured_line_pointers( probe_type_counts = Counter(finding.type for finding in probe_line_findings) line_pointers: list[str | None] = [] + secret_key_pointers: set[str] = set() for finding, full_line_finding in zip(line_findings, full_line_findings, strict=True): if finding.type != full_line_finding.type: return self._fallback_structured_line_pointers( @@ -320,11 +343,15 @@ def _assign_structured_line_pointers( if probe_type_counts[full_line_finding.type] > 0: line_pointers.append(location.parent_pointer) + if location.json_pointer is not None and self._key_name_is_secret_like( + location.key_name + ): + secret_key_pointers.add(location.json_pointer) probe_type_counts[full_line_finding.type] -= 1 else: line_pointers.append(location.json_pointer) - return line_pointers + return line_pointers, secret_key_pointers def _fallback_structured_line_pointers( self, @@ -332,12 +359,51 @@ def _fallback_structured_line_pointers( location: LineLocation, line_findings: list[ScanFinding], probe_line_findings: list[ScanFinding], - ) -> list[str | None]: + ) -> tuple[list[str | None], set[str]]: probe_types = {finding.type for finding in probe_line_findings} - return [ - location.parent_pointer if finding.type in probe_types else location.json_pointer - for finding in line_findings - ] + secret_key_pointers = ( + {location.json_pointer} + if location.json_pointer + and probe_types + and self._key_name_is_secret_like(location.key_name) + else set() + ) + return ( + [ + location.parent_pointer if finding.type in probe_types else location.json_pointer + for finding in line_findings + ], + secret_key_pointers, + ) + + def _pointer_traverses_secret_key( + self, pointer: str | None, secret_key_pointers: set[str] + ) -> bool: + if pointer is None: + return False + + return any( + pointer == secret_key_pointer or pointer.startswith(f"{secret_key_pointer}/") + for secret_key_pointer in secret_key_pointers + ) + + def _key_name_is_secret_like(self, key_name: str | None) -> bool: + if key_name is None: + return False + + if not IDENTIFIER_LIKE_KEY_PATTERN.fullmatch(key_name): + return True + + has_alpha = any(character.isalpha() for character in key_name) + has_digit = any(character.isdigit() for character in key_name) + has_token_separator = any(character in "._:-" for character in key_name) + return len(key_name) >= 20 and has_alpha and (has_digit or has_token_separator) + + def _runtime_version_or_unknown(self) -> str: + try: + return get_runtime_info().detect_secrets_version + except Exception: + return "unknown" async def _scan_line_batch( self, diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index c3455bc3..592bf0c2 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -29,6 +29,7 @@ class LineLocation: json_pointer: str | None parent_pointer: str | None = None key_probe_text: str | None = None + key_name: str | None = None @dataclass(frozen=True, slots=True) @@ -197,6 +198,7 @@ def _render_dict_lines( child_pointer=child_pointer, parent_pointer=pointer if pointer else "", key_literal=key_literal, + key_name=key_name, ) child_lines[-1] = RenderedLine( text=f"{child_lines[-1].text}{suffix}", @@ -262,6 +264,7 @@ def _attach_dict_child_location( child_pointer: str, parent_pointer: str | None, key_literal: str, + key_name: str, ) -> list[RenderedLine]: if child_lines: first_line = child_lines[0] @@ -271,6 +274,7 @@ def _attach_dict_child_location( json_pointer=child_pointer, parent_pointer=parent_pointer, key_probe_text=_build_key_probe_text(key_literal, child), + key_name=key_name, ), ) return child_lines diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index bb89a778..587446a5 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -36,6 +36,28 @@ async def test_none_input_returns_no_match() -> None: } +@pytest.mark.asyncio +async def test_none_input_short_circuits_runtime_failures( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime_info", + lambda: (_ for _ in ()).throw(RuntimeError("boom")), + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error="deny")) + result = await evaluator.evaluate(None) + + assert result.matched is False + assert result.error is None + assert result.metadata == { + "findings_count": 0, + "findings": [], + "normalized_payload_type": "none", + "detect_secrets_version": "unknown", + } + + @pytest.mark.asyncio async def test_string_secret_matches() -> None: evaluator = DetectSecretsEvaluator( @@ -192,6 +214,25 @@ async def test_list_payload_maps_findings_to_json_pointer() -> None: ] +@pytest.mark.asyncio +async def test_secret_bearing_object_keys_do_not_leak_through_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + key_secret = "ghp_123456789012345678901234567890123456" + value_secret = "ghp_abcdefabcdefabcdefabcdefabcdefabcdef" + result = await evaluator.evaluate({key_secret: {"nested": value_secret}}) + + assert result.matched is True + assert result.metadata is not None + assert {"type": "GitHub Token", "json_pointer": ""} in result.metadata["findings"] + assert {"type": "GitHub Token"} in result.metadata["findings"] + assert all( + key_secret not in finding.get("json_pointer", "") for finding in result.metadata["findings"] + ) + + @pytest.mark.asyncio async def test_tuple_payload_maps_findings_like_a_list() -> None: evaluator = DetectSecretsEvaluator( From 03ef785cd53ff41ce0555f769423021239525514 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 15:02:46 -0700 Subject: [PATCH 14/20] fix: honor detect-secrets fail-open semantics --- .../detect_secrets/config.py | 7 ++++- .../detect_secrets/evaluator.py | 2 +- .../tests/detect_secrets/test_evaluator.py | 27 ++++++++++++++++--- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py index 5fe02f2f..7b36dec1 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/config.py @@ -46,7 +46,12 @@ def validate_enabled_plugins(cls, value: list[str] | None) -> list[str] | None: if value is None: return None - available = set(get_runtime_info().available_plugin_names) + try: + available = set(get_runtime_info().available_plugin_names) + except Exception as exc: + raise ValueError( + "Unable to validate detect-secrets plugins because runtime introspection failed" + ) from exc normalized: list[str] = [] seen: set[str] = set() diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 44fea128..2d1eb518 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -494,10 +494,10 @@ def _failure_result( metadata=metadata, ) + metadata["fallback_action"] = "allow" return EvaluatorResult( matched=False, confidence=0.0, message=f"Secret scan failed ({failure_mode}): {detail}; allowing request", metadata=metadata, - error=f"detect-secrets evaluator failure: {failure_mode}", ) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 587446a5..a43eb65f 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -295,9 +295,10 @@ async def test_colliding_normalized_keys_route_through_normalization_error() -> ) assert result.matched is False - assert result.error == "detect-secrets evaluator failure: normalization_error" + assert result.error is None assert result.metadata is not None assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" @pytest.mark.asyncio @@ -322,9 +323,10 @@ async def test_non_json_serializable_payload_routes_through_on_error_allow() -> assert result.matched is False assert result.confidence == 0.0 - assert result.error == "detect-secrets evaluator failure: normalization_error" + assert result.error is None assert result.metadata is not None assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" @pytest.mark.asyncio @@ -334,10 +336,11 @@ async def test_oversized_payload_routes_through_on_error_allow() -> None: result = await evaluator.evaluate("0123456789") assert result.matched is False - assert result.error == "detect-secrets evaluator failure: payload_too_large" + assert result.error is None assert result.metadata is not None assert result.metadata["failure_mode"] == "payload_too_large" assert result.metadata["normalized_payload_type"] == "str" + assert result.metadata["fallback_action"] == "allow" @pytest.mark.asyncio @@ -377,9 +380,10 @@ async def scan(self, request: Any) -> Any: result = await evaluator.evaluate("hello") assert result.matched is False - assert result.error == "detect-secrets evaluator failure: worker_crash" + assert result.error is None assert result.metadata is not None assert result.metadata["failure_mode"] == "worker_crash" + assert result.metadata["fallback_action"] == "allow" assert result.message is not None assert FAILURE_MESSAGES["worker_crash"] in result.message @@ -566,6 +570,21 @@ def test_unknown_plugin_is_rejected() -> None: DetectSecretsEvaluatorConfig(enabled_plugins=["NoSuchPlugin"]) +def test_plugin_validation_runtime_failures_are_wrapped( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.config.get_runtime_info", + lambda: (_ for _ in ()).throw(RuntimeError("boom")), + ) + + with pytest.raises( + ValueError, + match="Unable to validate detect-secrets plugins because runtime introspection failed", + ): + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + + @pytest.mark.asyncio async def test_omitted_enabled_plugins_uses_upstream_defaults() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) From dba17d58b4b3b88e4e4968387a6f3838723350ba Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 16:25:20 -0700 Subject: [PATCH 15/20] refactor: simplify detect-secrets pointer mapping --- evaluators/contrib/detect_secrets/README.md | 3 +- .../detect_secrets/evaluator.py | 275 +++--------------- .../tests/detect_secrets/test_evaluator.py | 72 ++--- 3 files changed, 78 insertions(+), 272 deletions(-) diff --git a/evaluators/contrib/detect_secrets/README.md b/evaluators/contrib/detect_secrets/README.md index 9fb31780..1611cd4f 100644 --- a/evaluators/contrib/detect_secrets/README.md +++ b/evaluators/contrib/detect_secrets/README.md @@ -54,7 +54,8 @@ Safe metadata: - `findings[]` with `type`, plus: - `line_number` for plain selected strings - `json_pointer` for normalized `dict` / `list` payloads when a finding maps back to a structural - location + location; pointers are conservatively truncated to the nearest safe ancestor when a key segment + looks secret-like - `normalized_payload_type` - `detect_secrets_version` - `failure_mode` on evaluator failures diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 2d1eb518..6191cfda 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -3,8 +3,6 @@ from __future__ import annotations import time -from collections import Counter, defaultdict, deque -from dataclasses import dataclass from importlib.metadata import PackageNotFoundError, version from typing import Any @@ -12,7 +10,6 @@ from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator from agent_control_models import EvaluatorResult from detect_secrets_async import ( - DetectSecretsRuntime, RuntimeConfigConflictError, RuntimeScanError, ScanConfig, @@ -52,14 +49,9 @@ } IDENTIFIER_LIKE_KEY_PATTERN = re2.compile(r"^[A-Za-z_][A-Za-z0-9_.:-]{0,127}$") - - -@dataclass(frozen=True, slots=True) -class StructuredPointerAssignments: - """Structured-line pointer assignments plus secret-bearing key ancestry.""" - - by_line: dict[int, deque[str | None]] - secret_key_pointers: set[str] +JSON_SCALAR_LIKE_KEY_PATTERN = re2.compile( + r"^(?:-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?|true|false|null)$" +) @register_evaluator @@ -153,39 +145,20 @@ async def evaluate(self, data: Any) -> EvaluatorResult: detect_secrets_version=runtime_info.detect_secrets_version, ) - findings = await self._map_findings( - normalized=normalized, - scanned_text=filtered_text, - findings=scan_result.findings, - runtime=runtime, - scan_config=request.config, - started_at=started_at, - ) + findings = self._map_findings(normalized=normalized, findings=scan_result.findings) return self._success_result( normalized=normalized, detect_secrets_version=scan_result.detect_secrets_version, findings=findings, ) - async def _map_findings( + def _map_findings( self, *, normalized: NormalizedPayload, - scanned_text: str, findings: tuple[ScanFinding, ...], - runtime: DetectSecretsRuntime, - scan_config: ScanConfig, - started_at: float, ) -> list[dict[str, Any]]: mapped: list[dict[str, Any]] = [] - structured_pointer_assignments = await self._build_structured_pointer_assignments( - normalized=normalized, - scanned_text=scanned_text, - findings=findings, - runtime=runtime, - scan_config=scan_config, - started_at=started_at, - ) for finding in findings: finding_metadata: dict[str, Any] = {"type": finding.type} @@ -195,12 +168,7 @@ async def _map_findings( elif normalized.payload_type in {"dict", "list"}: if finding.line_number is not None: location = normalized.line_locations_by_line.get(finding.line_number) - json_pointer = self._safe_structured_pointer( - location=location, - finding=finding, - structured_pointer_assignments=structured_pointer_assignments.by_line, - secret_key_pointers=structured_pointer_assignments.secret_key_pointers, - ) + json_pointer = self._safe_structured_pointer(location=location) if json_pointer is not None: finding_metadata["json_pointer"] = json_pointer @@ -208,189 +176,53 @@ async def _map_findings( return mapped - def _safe_structured_pointer( - self, - *, - location: LineLocation | None, - finding: ScanFinding, - structured_pointer_assignments: dict[int, deque[str | None]], - secret_key_pointers: set[str], - ) -> str | None: + def _safe_structured_pointer(self, *, location: LineLocation | None) -> str | None: if location is None: return None - if location.key_probe_text is None: - pointer = location.json_pointer - else: - assert finding.line_number is not None - line_assignments = structured_pointer_assignments.get(finding.line_number) - if line_assignments: - pointer = line_assignments.popleft() - else: - return None - - if self._pointer_traverses_secret_key(pointer, secret_key_pointers): - return None - return pointer + pointer = location.json_pointer + if location.key_name is not None and self._key_name_is_secret_like(location.key_name): + pointer = location.parent_pointer - async def _build_structured_pointer_assignments( - self, - *, - normalized: NormalizedPayload, - scanned_text: str, - findings: tuple[ScanFinding, ...], - runtime: DetectSecretsRuntime, - scan_config: ScanConfig, - started_at: float, - ) -> StructuredPointerAssignments: - if normalized.payload_type not in {"dict", "list"}: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) + return self._truncate_pointer_at_secret_like_segment(pointer) - if self._remaining_timeout_ms(started_at) <= 0: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) + def _truncate_pointer_at_secret_like_segment(self, pointer: str | None) -> str | None: + if pointer is None or pointer == "": + return pointer - findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) - for finding in findings: - if finding.line_number is None: - continue - findings_by_line[finding.line_number].append(finding) - - if not findings_by_line: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) - - scanned_lines = scanned_text.splitlines() - candidate_lines: list[int] = [] - full_line_batch: list[str] = [] - probe_line_batch: list[str] = [] - - for line_number in sorted(findings_by_line): - location = normalized.line_locations_by_line.get(line_number) - if location is None or location.key_probe_text is None: - continue - - line_index = line_number - 1 - if not 0 <= line_index < len(scanned_lines): - continue - - candidate_lines.append(line_number) - full_line_batch.append(scanned_lines[line_index]) - probe_line_batch.append(location.key_probe_text) - - if not candidate_lines: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) - - full_line_findings = await self._scan_line_batch( - runtime=runtime, - lines=full_line_batch, - scan_config=scan_config, - started_at=started_at, - ) - if full_line_findings is None: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) - - probe_line_findings = await self._scan_line_batch( - runtime=runtime, - lines=probe_line_batch, - scan_config=scan_config, - started_at=started_at, - ) - if probe_line_findings is None: - return StructuredPointerAssignments(by_line={}, secret_key_pointers=set()) - - assignments_by_line: dict[int, deque[str | None]] = {} - secret_key_pointers: set[str] = set() - for batch_index, line_number in enumerate(candidate_lines, start=1): - location = normalized.line_locations_by_line[line_number] - line_assignments, line_secret_key_pointers = self._assign_structured_line_pointers( - location=location, - line_findings=findings_by_line[line_number], - full_line_findings=full_line_findings.get(batch_index, []), - probe_line_findings=probe_line_findings.get(batch_index, []), - ) - assignments_by_line[line_number] = deque(line_assignments) - secret_key_pointers.update(line_secret_key_pointers) + safe_segments: list[str] = [] + for encoded_segment in pointer.split("/")[1:]: + segment = self._decode_json_pointer_segment(encoded_segment) + if self._pointer_segment_is_secret_like(segment): + break + safe_segments.append(segment) + else: + return pointer - return StructuredPointerAssignments( - by_line=assignments_by_line, - secret_key_pointers=secret_key_pointers, - ) + if not safe_segments: + return "" - def _assign_structured_line_pointers( - self, - *, - location: LineLocation, - line_findings: list[ScanFinding], - full_line_findings: list[ScanFinding], - probe_line_findings: list[ScanFinding], - ) -> tuple[list[str | None], set[str]]: - if len(full_line_findings) != len(line_findings): - return self._fallback_structured_line_pointers( - location=location, - line_findings=line_findings, - probe_line_findings=probe_line_findings, - ) - - probe_type_counts = Counter(finding.type for finding in probe_line_findings) - line_pointers: list[str | None] = [] - secret_key_pointers: set[str] = set() - for finding, full_line_finding in zip(line_findings, full_line_findings, strict=True): - if finding.type != full_line_finding.type: - return self._fallback_structured_line_pointers( - location=location, - line_findings=line_findings, - probe_line_findings=probe_line_findings, - ) - - if probe_type_counts[full_line_finding.type] > 0: - line_pointers.append(location.parent_pointer) - if location.json_pointer is not None and self._key_name_is_secret_like( - location.key_name - ): - secret_key_pointers.add(location.json_pointer) - probe_type_counts[full_line_finding.type] -= 1 - else: - line_pointers.append(location.json_pointer) - - return line_pointers, secret_key_pointers - - def _fallback_structured_line_pointers( - self, - *, - location: LineLocation, - line_findings: list[ScanFinding], - probe_line_findings: list[ScanFinding], - ) -> tuple[list[str | None], set[str]]: - probe_types = {finding.type for finding in probe_line_findings} - secret_key_pointers = ( - {location.json_pointer} - if location.json_pointer - and probe_types - and self._key_name_is_secret_like(location.key_name) - else set() - ) - return ( - [ - location.parent_pointer if finding.type in probe_types else location.json_pointer - for finding in line_findings - ], - secret_key_pointers, - ) + encoded_segments = [self._encode_json_pointer_segment(segment) for segment in safe_segments] + return "/" + "/".join(encoded_segments) - def _pointer_traverses_secret_key( - self, pointer: str | None, secret_key_pointers: set[str] - ) -> bool: - if pointer is None: + def _pointer_segment_is_secret_like(self, segment: str) -> bool: + if segment.isdigit(): return False + return self._key_name_is_secret_like(segment) - return any( - pointer == secret_key_pointer or pointer.startswith(f"{secret_key_pointer}/") - for secret_key_pointer in secret_key_pointers - ) + def _decode_json_pointer_segment(self, segment: str) -> str: + return segment.replace("~1", "/").replace("~0", "~") + + def _encode_json_pointer_segment(self, segment: str) -> str: + return segment.replace("~", "~0").replace("/", "~1") def _key_name_is_secret_like(self, key_name: str | None) -> bool: if key_name is None: return False + if JSON_SCALAR_LIKE_KEY_PATTERN.fullmatch(key_name): + return False + if not IDENTIFIER_LIKE_KEY_PATTERN.fullmatch(key_name): return True @@ -405,39 +237,6 @@ def _runtime_version_or_unknown(self) -> str: except Exception: return "unknown" - async def _scan_line_batch( - self, - *, - runtime: DetectSecretsRuntime, - lines: list[str], - scan_config: ScanConfig, - started_at: float, - ) -> dict[int, list[ScanFinding]] | None: - remaining_ms = self._remaining_timeout_ms(started_at) - if remaining_ms <= 0: - return None - - try: - result = await runtime.scan( - ScanRequest( - content="\n".join(lines), - timeout_ms=max(1, remaining_ms), - config=scan_config, - ) - ) - except RuntimeScanError: - return None - except Exception: - return None - - findings_by_line: dict[int, list[ScanFinding]] = defaultdict(list) - for finding in result.findings: - if finding.line_number is None: - continue - findings_by_line[finding.line_number].append(finding) - - return findings_by_line - def _remaining_timeout_ms(self, started_at: float) -> int: return int(self.config.timeout_ms - ((time.monotonic() - started_at) * 1000)) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index a43eb65f..4892cb94 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -226,8 +226,27 @@ async def test_secret_bearing_object_keys_do_not_leak_through_json_pointer() -> assert result.matched is True assert result.metadata is not None - assert {"type": "GitHub Token", "json_pointer": ""} in result.metadata["findings"] - assert {"type": "GitHub Token"} in result.metadata["findings"] + assert all(finding.get("json_pointer", "") == "" for finding in result.metadata["findings"]) + assert all( + key_secret not in finding.get("json_pointer", "") for finding in result.metadata["findings"] + ) + + +@pytest.mark.asyncio +async def test_nested_findings_under_secret_like_key_truncate_to_safe_ancestor() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + key_secret = "ghp_123456789012345678901234567890123456" + value_secret = "ghp_abcdefabcdefabcdefabcdefabcdefabcdef" + result = await evaluator.evaluate({"outer": {key_secret: {"nested": value_secret}}}) + + assert result.matched is True + assert result.metadata is not None + assert all( + finding.get("json_pointer", "") == "/outer" for finding in result.metadata["findings"] + ) assert all( key_secret not in finding.get("json_pointer", "") for finding in result.metadata["findings"] ) @@ -389,7 +408,7 @@ async def scan(self, request: Any) -> Any: @pytest.mark.asyncio -async def test_structured_key_probe_disambiguates_same_detector_type() -> None: +async def test_structured_same_line_findings_map_to_field_pointer_without_probing() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) result = await evaluator.evaluate({"secret": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="}) @@ -397,39 +416,10 @@ async def test_structured_key_probe_disambiguates_same_detector_type() -> None: assert result.matched is True assert result.metadata is not None assert result.metadata["normalized_payload_type"] == "dict" - assert result.metadata["findings_count"] == 3 - assert {"type": "Hex High Entropy String", "json_pointer": ""} in result.metadata["findings"] - assert {"type": "Hex High Entropy String", "json_pointer": "/secret"} in result.metadata[ - "findings" - ] + assert all(finding.get("json_pointer") == "/secret" for finding in result.metadata["findings"]) assert {"type": "Secret Keyword", "json_pointer": "/secret"} in result.metadata["findings"] -@pytest.mark.asyncio -async def test_structured_pointer_fallback_omits_ambiguous_pointer( - monkeypatch: pytest.MonkeyPatch, -) -> None: - async def fail_scan_line_batch(self: DetectSecretsEvaluator, **kwargs: Any) -> None: - return None - - monkeypatch.setattr( - DetectSecretsEvaluator, - "_scan_line_batch", - fail_scan_line_batch, - ) - - evaluator = DetectSecretsEvaluator( - DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) - ) - result = await evaluator.evaluate( - {"outer": {"token": "ghp_123456789012345678901234567890123456"}} - ) - - assert result.matched is True - assert result.metadata is not None - assert result.metadata["findings"] == [{"type": "GitHub Token"}] - - @pytest.mark.asyncio async def test_preconfigured_runtime_is_reused(monkeypatch: pytest.MonkeyPatch) -> None: class FakeRuntime: @@ -605,6 +595,22 @@ def test_normalize_payload_renders_expected_json_pointer_lines() -> None: assert normalized.line_locations_by_line[4].json_pointer == "/outer/0/inner" +@pytest.mark.parametrize( + ("key_name", "expected"), + [ + ("github_token_key_name", True), + ("MyVeryLongFunctionName", False), + ("api_key_v2", False), + ("github_pat_11ABCDEFG1234567890123", True), + ("0", False), + ], +) +def test_key_name_is_secret_like_heuristic(key_name: str, expected: bool) -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + assert evaluator._key_name_is_secret_like(key_name) is expected + + def test_entry_point_is_registered() -> None: evaluator_entry_points = { entry_point.name: entry_point.value From 8035455a4ddf5aab5130235f5464304dc966ac41 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 16:42:17 -0700 Subject: [PATCH 16/20] fix: tighten detect-secrets error handling --- .../detect_secrets/evaluator.py | 9 +++- .../tests/detect_secrets/test_evaluator.py | 45 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 6191cfda..db237d55 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -77,7 +77,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: started_at = time.monotonic() try: normalized = normalize_payload(data) - except NormalizationError: + except (NormalizationError, RecursionError): return self._failure_result( failure_mode="normalization_error", normalized_payload_type=None, @@ -109,6 +109,13 @@ async def evaluate(self, data: Any) -> EvaluatorResult: detect_secrets_version=runtime_info.detect_secrets_version, ) + if self._remaining_timeout_ms(started_at) <= 0: + return self._failure_result( + failure_mode="queue_timeout", + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) + request = ScanRequest( content=filtered_text, timeout_ms=self._bounded_remaining_timeout_ms(started_at), diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 4892cb94..ccb5cc1e 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -348,6 +348,21 @@ async def test_non_json_serializable_payload_routes_through_on_error_allow() -> assert result.metadata["fallback_action"] == "allow" +@pytest.mark.asyncio +async def test_recursive_payload_routes_through_normalization_error() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + payload: dict[str, Any] = {} + payload["self"] = payload + + result = await evaluator.evaluate(payload) + + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" + + @pytest.mark.asyncio async def test_oversized_payload_routes_through_on_error_allow() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(max_bytes=8)) @@ -362,6 +377,36 @@ async def test_oversized_payload_routes_through_on_error_allow() -> None: assert result.metadata["fallback_action"] == "allow" +@pytest.mark.asyncio +async def test_exhausted_timeout_budget_short_circuits_before_runtime( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monotonic_values = [100.0, 100.02] + + def fake_monotonic() -> float: + if monotonic_values: + return monotonic_values.pop(0) + return 100.02 + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.time.monotonic", + fake_monotonic, + ) + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda config=None: pytest.fail("runtime should not be invoked"), + ) + + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(timeout_ms=10)) + result = await evaluator.evaluate("safe content only") + + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "queue_timeout" + assert result.metadata["fallback_action"] == "allow" + + @pytest.mark.asyncio async def test_on_error_deny_fails_closed(monkeypatch: pytest.MonkeyPatch) -> None: class FakeRuntime: From 7434836aad5576fd407c2bb02d4e36186108d361 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Wed, 22 Apr 2026 17:11:48 -0700 Subject: [PATCH 17/20] test: close detect-secrets evaluator testing gaps Adds the 10 gap categories flagged in review, with given/when/then behavioral style: - parametric failure-mode matrix: every ScanFailureCode x {allow, deny} plus evaluator-layer failures (normalization_error, payload_too_large) - FAILURE_MESSAGES drift pin against ScanFailureCode enum - normalization edge cases: top-level set, NaN/+-inf primitives, empty dict / list, boolean/None dict keys, tuple dict keys - runtime-error paths: get_runtime_info failure during non-None evaluate (previously only reached via None short-circuit), RuntimeConfigConflictError from get_runtime - exclude_lines_regex on structured payloads: blanking suppresses findings on matched lines and preserves pointers for unmatched ones - max_bytes boundary: exactly-at-limit accepted, one-byte-over rejected - multi-line string with distinct findings preserves line numbers - list with scalar element maps pointer to index - concurrent evaluate() on one cached instance stays correct - _safe_structured_pointer returns None for missing location - _key_name_is_secret_like for None and non-identifier/scalar-like keys - entry-point .load() round-trips to DetectSecretsEvaluator - config validator edges: explicit None enabled_plugins, whitespace-only entry rejected, whitespace strip + dedup, positive-int bounds on timeout_ms / max_bytes, Literal validation on on_error Coverage: 93% -> 98% (config 96 -> 100, evaluator 94 -> 98, normalization 92 -> 98). 39 -> 90 passing tests. --- .../tests/detect_secrets/test_evaluator.py | 538 +++++++++++++++++- 1 file changed, 537 insertions(+), 1 deletion(-) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index ccb5cc1e..09771ed9 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -1,11 +1,13 @@ from __future__ import annotations +import asyncio from importlib.metadata import entry_points -from typing import Any +from typing import Any, Literal import pytest from detect_secrets_async import ( RuntimeConfig, + RuntimeConfigConflictError, RuntimeScanError, ScanFailureCode, ScanResult, @@ -665,3 +667,537 @@ def test_entry_point_is_registered() -> None: assert evaluator_entry_points["yelp.detect_secrets"] == ( "agent_control_evaluator_detect_secrets.detect_secrets:DetectSecretsEvaluator" ) + + +def test_entry_point_load_returns_evaluator_class() -> None: + # Given: the registered yelp.detect_secrets entry point + evaluator_entry_points = { + entry_point.name: entry_point + for entry_point in entry_points(group="agent_control.evaluators") + } + entry_point = evaluator_entry_points["yelp.detect_secrets"] + + # When: the entry point is loaded + loaded_class = entry_point.load() + + # Then: it resolves to the DetectSecretsEvaluator class + assert loaded_class is DetectSecretsEvaluator + + +# --------------------------------------------------------------------------- +# Failure-mode matrix: every ScanFailureCode x {allow, deny} combination, +# plus evaluator-layer failures, plus a drift pin for FAILURE_MESSAGES. +# --------------------------------------------------------------------------- + +_RUNTIME_FAILURE_CODES: tuple[ScanFailureCode, ...] = ( + ScanFailureCode.INVALID_CONFIG, + ScanFailureCode.QUEUE_FULL, + ScanFailureCode.QUEUE_TIMEOUT, + ScanFailureCode.WORKER_STARTUP_ERROR, + ScanFailureCode.WORKER_TIMEOUT, + ScanFailureCode.WORKER_CRASH, + ScanFailureCode.WORKER_PROTOCOL_ERROR, + ScanFailureCode.RUNTIME_ERROR, +) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("failure_code", _RUNTIME_FAILURE_CODES) +@pytest.mark.parametrize("on_error", ["allow", "deny"]) +async def test_runtime_failure_routes_through_on_error_for_each_code( + failure_code: ScanFailureCode, + on_error: Literal["allow", "deny"], + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given: a runtime that raises the given ScanFailureCode on every scan + class FakeRuntime: + async def scan(self, request: Any) -> Any: + raise RuntimeScanError(failure_code) + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + lambda config=None: FakeRuntime(), + ) + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error=on_error)) + + # When: a valid string payload is evaluated + result = await evaluator.evaluate("safe content only") + + # Then: the failure_mode reflects the code and fallback_action mirrors on_error + assert result.error is None + assert result.confidence == 0.0 + assert result.metadata is not None + assert result.metadata["failure_mode"] == failure_code.value + assert result.metadata["fallback_action"] == on_error + assert result.message is not None + assert FAILURE_MESSAGES[failure_code.value] in result.message + assert result.matched is (on_error == "deny") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("on_error", ["allow", "deny"]) +@pytest.mark.parametrize( + ("failure_mode", "config_kwargs", "payload"), + [ + ("normalization_error", {}, {"bad": {1, 2, 3}}), + ("payload_too_large", {"max_bytes": 8}, "0123456789"), + ], + ids=["normalization_error", "payload_too_large"], +) +async def test_evaluator_layer_failure_routes_through_on_error( + failure_mode: str, + config_kwargs: dict[str, Any], + payload: Any, + on_error: Literal["allow", "deny"], +) -> None: + # Given: an evaluator configured to hit the given evaluator-layer failure + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(on_error=on_error, **config_kwargs) + ) + + # When: the triggering payload is evaluated + result = await evaluator.evaluate(payload) + + # Then: metadata carries the expected failure_mode and fallback_action for both modes + assert result.error is None + assert result.confidence == 0.0 + assert result.metadata is not None + assert result.metadata["failure_mode"] == failure_mode + assert result.metadata["fallback_action"] == on_error + assert result.matched is (on_error == "deny") + + +def test_failure_messages_cover_all_runtime_scan_failure_codes() -> None: + # Given: the detect-secrets-async ScanFailureCode enum and the evaluator's two + # evaluator-layer failure modes + runtime_code_values = {code.value for code in ScanFailureCode} + evaluator_layer_codes = {"normalization_error", "payload_too_large"} + + # When: the expected key set is formed + expected_keys = runtime_code_values | evaluator_layer_codes + + # Then: FAILURE_MESSAGES has exactly that set, with a non-empty message for each + assert set(FAILURE_MESSAGES) == expected_keys + assert all(message for message in FAILURE_MESSAGES.values()) + + +# --------------------------------------------------------------------------- +# Normalization edge cases (top-level types, NaN, empty containers, +# non-string dict keys). +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_top_level_set_routes_through_normalization_error() -> None: + # Given: a payload whose top-level type is not supported (a plain set) + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the evaluator evaluates it + result = await evaluator.evaluate({"abc", "def"}) + + # Then: the failure is classified as normalization_error + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("bad_scalar", [float("nan"), float("inf"), float("-inf")]) +async def test_nan_or_infinity_primitive_routes_through_normalization_error( + bad_scalar: float, +) -> None: + # Given: a non-finite float which json.dumps(allow_nan=False) rejects + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the evaluator evaluates it + result = await evaluator.evaluate(bad_scalar) + + # Then: normalization fails safely through on_error=allow + assert result.matched is False + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" + + +@pytest.mark.asyncio +async def test_empty_dict_payload_yields_no_findings() -> None: + # Given: an empty dict + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the evaluator evaluates it + result = await evaluator.evaluate({}) + + # Then: it scans successfully with no findings + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [] + assert result.metadata["findings_count"] == 0 + + +@pytest.mark.asyncio +async def test_empty_list_payload_yields_no_findings() -> None: + # Given: an empty list + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the evaluator evaluates it + result = await evaluator.evaluate([]) + + # Then: it scans successfully with no findings + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "list" + assert result.metadata["findings"] == [] + assert result.metadata["findings_count"] == 0 + + +@pytest.mark.asyncio +async def test_boolean_and_none_dict_keys_are_normalized_as_scalar_strings() -> None: + # Given: a dict keyed by True/False/None alongside a GitHub token under the True key + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + # When: the evaluator normalizes and scans + result = await evaluator.evaluate( + { + True: "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + False: "safe value", + None: "also safe", + } + ) + + # Then: the True key normalizes to "true" and the finding resolves to /true + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "dict" + assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/true"}] + + +@pytest.mark.asyncio +async def test_unsupported_dict_key_type_routes_through_normalization_error() -> None: + # Given: a dict keyed by a tuple (unsupported JSON key type) + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the evaluator evaluates it + result = await evaluator.evaluate({("a", "b"): "value"}) + + # Then: normalization fails safely + assert result.matched is False + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "allow" + + +# --------------------------------------------------------------------------- +# Runtime-side failure paths that aren't exercised elsewhere. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_runtime_info_failure_during_non_none_evaluate_returns_runtime_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given: a get_runtime_info reference inside the evaluator module that always raises + def raise_runtime_error() -> Any: + raise RuntimeError("boom") + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime_info", + raise_runtime_error, + ) + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: a non-None payload is evaluated (the None short-circuit doesn't apply) + result = await evaluator.evaluate("safe content only") + + # Then: the evaluator returns runtime_error with unknown detect-secrets version + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "runtime_error" + assert result.metadata["fallback_action"] == "allow" + assert result.metadata["detect_secrets_version"] == "unknown" + assert "normalized_payload_type" not in result.metadata + + +@pytest.mark.asyncio +async def test_runtime_config_conflict_routes_through_runtime_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given: get_runtime that raises RuntimeConfigConflictError + def raise_conflict(config: Any = None) -> Any: + raise RuntimeConfigConflictError("conflict") + + monkeypatch.setattr( + "agent_control_evaluator_detect_secrets.detect_secrets.evaluator.get_runtime", + raise_conflict, + ) + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: a valid payload is evaluated + result = await evaluator.evaluate("safe content only") + + # Then: the conflict is sanitized to a runtime_error failure + assert result.matched is False + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "runtime_error" + assert result.metadata["fallback_action"] == "allow" + assert result.metadata["normalized_payload_type"] == "str" + + +# --------------------------------------------------------------------------- +# exclude_lines_regex on structured payloads. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_exclude_lines_regex_on_dict_payload_blanks_matching_line() -> None: + # Given: an evaluator configured to exclude JSON lines that contain "authorization" + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig( + enabled_plugins=["GitHubTokenDetector"], + exclude_lines_regex=[r'"authorization"'], + ) + ) + + # When: a dict carries the secret on the excluded line + result = await evaluator.evaluate( + { + "authorization": "ghp_123456789012345678901234567890123456", + "other": "safe", + } + ) + + # Then: the excluded line is blanked and no finding is surfaced + assert result.matched is False + assert result.metadata is not None + assert result.metadata["findings"] == [] + + +@pytest.mark.asyncio +async def test_exclude_lines_regex_on_dict_payload_preserves_pointers_for_other_findings() -> None: + # Given: a dict where one line matches the exclusion and a DIFFERENT line has a secret + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig( + enabled_plugins=["GitHubTokenDetector"], + exclude_lines_regex=[r'"skip"'], + ) + ) + + # When: the evaluator scans + result = await evaluator.evaluate( + { + "skip": "ghp_abcdefabcdefabcdefabcdefabcdefabcdef", + "keep": "ghp_111111111111111111111111111111111111", + } + ) + + # Then: line-number blanking does not disturb the surviving finding's pointer + assert result.matched is True + assert result.metadata is not None + assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/keep"}] + + +# --------------------------------------------------------------------------- +# max_bytes boundary behavior. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_payload_exactly_at_max_bytes_is_accepted() -> None: + # Given: a payload whose UTF-8 byte length exactly equals max_bytes + payload = "a" * 64 + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(max_bytes=64)) + + # When: the evaluator evaluates it + result = await evaluator.evaluate(payload) + + # Then: the scan proceeds without tripping payload_too_large + assert result.metadata is not None + assert result.metadata.get("failure_mode") is None + assert result.metadata["normalized_payload_type"] == "str" + + +@pytest.mark.asyncio +async def test_payload_one_byte_over_max_bytes_is_rejected() -> None: + # Given: a payload one byte over the configured max_bytes + payload = "a" * 65 + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(max_bytes=64)) + + # When: the evaluator evaluates it + result = await evaluator.evaluate(payload) + + # Then: failure_mode is payload_too_large + assert result.matched is False + assert result.metadata is not None + assert result.metadata["failure_mode"] == "payload_too_large" + assert result.metadata["fallback_action"] == "allow" + + +# --------------------------------------------------------------------------- +# Scan-mapping edge cases and concurrency. +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_multi_line_string_preserves_distinct_line_numbers() -> None: + # Given: a multi-line string with GitHub tokens on lines 1 and 3 + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + content = "\n".join( + [ + "first = 'ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'", + "safe middle line", + "third = 'ghp_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'", + ] + ) + + # When: the evaluator evaluates it + result = await evaluator.evaluate(content) + + # Then: findings carry their respective original line numbers + assert result.matched is True + assert result.metadata is not None + line_numbers = sorted(finding["line_number"] for finding in result.metadata["findings"]) + assert line_numbers == [1, 3] + + +@pytest.mark.asyncio +async def test_list_with_scalar_elements_maps_pointer_to_index() -> None: + # Given: a list whose element at index 1 is a bare secret string + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + # When: the evaluator scans it + result = await evaluator.evaluate(["safe", "ghp_123456789012345678901234567890123456"]) + + # Then: the finding pointer names the list index + assert result.matched is True + assert result.metadata is not None + assert result.metadata["normalized_payload_type"] == "list" + assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/1"}] + + +@pytest.mark.asyncio +async def test_evaluate_is_safe_under_concurrent_calls() -> None: + # Given: a single evaluator instance and several distinct secret-bearing payloads + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + payloads = [ + f"github_token_{index} = 'ghp_{str(index).zfill(2)}3456789012345678901234567890123456'" + for index in range(5) + ] + + # When: many evaluate() calls run in parallel on the cached instance + results = await asyncio.gather(*(evaluator.evaluate(payload) for payload in payloads)) + + # Then: every call produces the correct finding with line_number=1 and no cross-talk + assert all(result.matched for result in results) + assert all( + result.metadata is not None + and result.metadata["findings"] == [{"type": "GitHub Token", "line_number": 1}] + for result in results + ) + + +def test_safe_structured_pointer_returns_none_for_missing_location() -> None: + # Given: an evaluator instance and no location metadata + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When: the pointer helper is invoked without a location + pointer = evaluator._safe_structured_pointer(location=None) + + # Then: the helper returns None so the finding is emitted without a pointer + assert pointer is None + + +# --------------------------------------------------------------------------- +# Additional _key_name_is_secret_like coverage (None, non-identifier keys). +# --------------------------------------------------------------------------- + + +def test_key_name_is_secret_like_returns_false_for_none() -> None: + # Given: an evaluator instance + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When/Then: a None key name is treated as not secret-like + assert evaluator._key_name_is_secret_like(None) is False + + +@pytest.mark.parametrize( + ("key_name", "expected"), + [ + # Starts with digit then letters -> fails IDENTIFIER_LIKE_KEY_PATTERN -> secret-like. + ("12abcd", True), + # Starts with symbol -> fails IDENTIFIER_LIKE_KEY_PATTERN -> secret-like. + ("!bang", True), + # Matches JSON_SCALAR_LIKE_KEY_PATTERN -> not secret-like. + ("true", False), + ("-1.5e10", False), + ], +) +def test_key_name_is_secret_like_for_non_identifier_and_scalar_keys( + key_name: str, expected: bool +) -> None: + # Given: an evaluator instance + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) + + # When/Then: the heuristic honors the non-identifier and JSON-scalar branches + assert evaluator._key_name_is_secret_like(key_name) is expected + + +# --------------------------------------------------------------------------- +# Config validator edge cases. +# --------------------------------------------------------------------------- + + +def test_explicit_none_enabled_plugins_is_accepted() -> None: + # Given: enabled_plugins explicitly set to None + config = DetectSecretsEvaluatorConfig(enabled_plugins=None) + + # Then: the config is accepted and enabled_plugins stays None + assert config.enabled_plugins is None + + +def test_whitespace_only_enabled_plugin_name_is_rejected() -> None: + # Given: a plugin list containing only whitespace + # When/Then: construction raises a non-empty validation error + with pytest.raises(ValueError, match="non-empty"): + DetectSecretsEvaluatorConfig(enabled_plugins=[" "]) + + +def test_enabled_plugins_strips_whitespace_and_dedups() -> None: + # Given: duplicate and whitespace-padded plugin names + config = DetectSecretsEvaluatorConfig( + enabled_plugins=[" GitHubTokenDetector ", "GitHubTokenDetector"] + ) + + # Then: names are stripped and duplicates removed in first-seen order + assert config.enabled_plugins == ["GitHubTokenDetector"] + + +def test_zero_timeout_ms_is_rejected() -> None: + # Given/When/Then: timeout_ms must be strictly positive + with pytest.raises(ValueError): + DetectSecretsEvaluatorConfig(timeout_ms=0) + + +def test_zero_max_bytes_is_rejected() -> None: + # Given/When/Then: max_bytes must be strictly positive + with pytest.raises(ValueError): + DetectSecretsEvaluatorConfig(max_bytes=0) + + +def test_invalid_on_error_value_is_rejected() -> None: + # Given/When/Then: on_error only accepts "allow" or "deny" + with pytest.raises(ValueError): + DetectSecretsEvaluatorConfig(on_error="maybe") # type: ignore[arg-type] From 655e43924899c2e39e38abaa9bf7837da70c6376 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Fri, 24 Apr 2026 21:40:22 -0700 Subject: [PATCH 18/20] fix: handle invalid unicode in detect-secrets evaluator --- .../detect_secrets/evaluator.py | 11 ++++++++++- .../tests/detect_secrets/test_evaluator.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index db237d55..756e5fea 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -102,7 +102,16 @@ async def evaluate(self, data: Any) -> EvaluatorResult: assert normalized.text is not None filtered_text = apply_line_exclusions(normalized.text, self._exclude_line_patterns) - if len(filtered_text.encode("utf-8")) > self.config.max_bytes: + try: + filtered_bytes = filtered_text.encode("utf-8") + except UnicodeError: + return self._failure_result( + failure_mode="normalization_error", + normalized_payload_type=normalized.payload_type, + detect_secrets_version=runtime_info.detect_secrets_version, + ) + + if len(filtered_bytes) > self.config.max_bytes: return self._failure_result( failure_mode="payload_too_large", normalized_payload_type=normalized.payload_type, diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 09771ed9..380f7f46 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -350,6 +350,19 @@ async def test_non_json_serializable_payload_routes_through_on_error_allow() -> assert result.metadata["fallback_action"] == "allow" +@pytest.mark.asyncio +async def test_invalid_unicode_payload_routes_through_on_error_deny() -> None: + evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig(on_error="deny")) + + result = await evaluator.evaluate("\ud800") + + assert result.matched is True + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "deny" + + @pytest.mark.asyncio async def test_recursive_payload_routes_through_normalization_error() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) From 2584c788d79bb2c3ea5bdf3adb93add5c24332a8 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Fri, 24 Apr 2026 21:56:03 -0700 Subject: [PATCH 19/20] fix: harden detect-secrets unicode handling --- .../detect_secrets/evaluator.py | 7 ++--- .../detect_secrets/normalization.py | 16 +++++++--- .../tests/detect_secrets/test_evaluator.py | 31 ++++++++++++++++++- 3 files changed, 43 insertions(+), 11 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py index 756e5fea..e3f1c430 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/evaluator.py @@ -101,8 +101,8 @@ async def evaluate(self, data: Any) -> EvaluatorResult: ) assert normalized.text is not None - filtered_text = apply_line_exclusions(normalized.text, self._exclude_line_patterns) try: + filtered_text = apply_line_exclusions(normalized.text, self._exclude_line_patterns) filtered_bytes = filtered_text.encode("utf-8") except UnicodeError: return self._failure_result( @@ -242,10 +242,7 @@ def _key_name_is_secret_like(self, key_name: str | None) -> bool: if not IDENTIFIER_LIKE_KEY_PATTERN.fullmatch(key_name): return True - has_alpha = any(character.isalpha() for character in key_name) - has_digit = any(character.isdigit() for character in key_name) - has_token_separator = any(character in "._:-" for character in key_name) - return len(key_name) >= 20 and has_alpha and (has_digit or has_token_separator) + return len(key_name) >= 20 def _runtime_version_or_unknown(self) -> str: try: diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index 592bf0c2..b3a3a590 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -40,6 +40,12 @@ class RenderedLine: location: LineLocation | None = None +def _json_dumps(value: Any, **kwargs: Any) -> str: + """Serialize JSON while keeping Unicode line separators escaped on one logical line.""" + dumped = json.dumps(value, **kwargs) + return dumped.replace("\u2028", "\\u2028").replace("\u2029", "\\u2029") + + def normalize_payload(data: Any) -> NormalizedPayload: """Normalize selector output to deterministic text for detect-secrets scanning.""" if data is None: @@ -83,7 +89,7 @@ def _normalize_structured_payload( raise NormalizationError(f"Failed to normalize structured payload: {exc}") from exc try: - text = json.dumps( + text = _json_dumps( normalized_data, sort_keys=True, indent=2, @@ -116,7 +122,7 @@ def _normalize_structured_payload( def _normalize_primitive_payload(data: bool | int | float) -> NormalizedPayload: try: - text = json.dumps(data, ensure_ascii=False, allow_nan=False) + text = _json_dumps(data, ensure_ascii=False, allow_nan=False) except (TypeError, ValueError) as exc: raise NormalizationError(f"Failed to normalize scalar payload: {exc}") from exc @@ -155,7 +161,7 @@ def _render_json_lines( if isinstance(value, list | tuple): return _render_list_lines(value, indent_level=indent_level, prefix=prefix, pointer=pointer) - scalar_text = json.dumps(value, ensure_ascii=False, allow_nan=False) + scalar_text = _json_dumps(value, ensure_ascii=False, allow_nan=False) scalar_pointer = pointer or None return [ RenderedLine( @@ -183,7 +189,7 @@ def _render_dict_lines( for index, (raw_key, child) in enumerate(items): suffix = "," if index < last_index else "" key_name = _json_object_key_name(raw_key) - key_literal = json.dumps(key_name, ensure_ascii=False, allow_nan=False) + key_literal = _json_dumps(key_name, ensure_ascii=False, allow_nan=False) child_prefix = f"{key_literal}: " child_pointer = _append_json_pointer(pointer, key_name) child_lines = _render_json_lines( @@ -254,7 +260,7 @@ def _json_object_key_name(key: Any) -> str: if key is None: return "null" if isinstance(key, int | float): - return json.dumps(key, ensure_ascii=False, allow_nan=False) + return _json_dumps(key, ensure_ascii=False, allow_nan=False) raise TypeError(f"Unsupported JSON object key type: {type(key).__name__}") diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 380f7f46..076fd334 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -216,6 +216,19 @@ async def test_list_payload_maps_findings_to_json_pointer() -> None: ] +@pytest.mark.asyncio +async def test_structured_unicode_line_separator_preserves_json_pointer() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(enabled_plugins=["GitHubTokenDetector"]) + ) + + result = await evaluator.evaluate({"x": "prefix\u2028ghp_123456789012345678901234567890123456"}) + + assert result.matched is True + assert result.metadata is not None + assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/x"}] + + @pytest.mark.asyncio async def test_secret_bearing_object_keys_do_not_leak_through_json_pointer() -> None: evaluator = DetectSecretsEvaluator( @@ -363,6 +376,21 @@ async def test_invalid_unicode_payload_routes_through_on_error_deny() -> None: assert result.metadata["fallback_action"] == "deny" +@pytest.mark.asyncio +async def test_invalid_unicode_with_exclusions_routes_through_on_error_deny() -> None: + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig(on_error="deny", exclude_lines_regex=["x"]) + ) + + result = await evaluator.evaluate("\ud800") + + assert result.matched is True + assert result.error is None + assert result.metadata is not None + assert result.metadata["failure_mode"] == "normalization_error" + assert result.metadata["fallback_action"] == "deny" + + @pytest.mark.asyncio async def test_recursive_payload_routes_through_normalization_error() -> None: evaluator = DetectSecretsEvaluator(DetectSecretsEvaluatorConfig()) @@ -659,10 +687,11 @@ def test_normalize_payload_renders_expected_json_pointer_lines() -> None: ("key_name", "expected"), [ ("github_token_key_name", True), - ("MyVeryLongFunctionName", False), + ("MyVeryLongFunctionName", True), ("api_key_v2", False), ("github_pat_11ABCDEFG1234567890123", True), ("0", False), + ("abcdefghijklmnopqrstuvwxyzabcdef", True), ], ) def test_key_name_is_secret_like_heuristic(key_name: str, expected: bool) -> None: From abfd49ddbe0f55d1432c5de3871deee73507c93d Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Fri, 24 Apr 2026 22:31:31 -0700 Subject: [PATCH 20/20] fix: preserve detect-secrets line mapping under exclusions --- .../detect_secrets/normalization.py | 8 +++++-- .../tests/detect_secrets/test_evaluator.py | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py index b3a3a590..ba7f317c 100644 --- a/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py +++ b/evaluators/contrib/detect_secrets/src/agent_control_evaluator_detect_secrets/detect_secrets/normalization.py @@ -43,7 +43,11 @@ class RenderedLine: def _json_dumps(value: Any, **kwargs: Any) -> str: """Serialize JSON while keeping Unicode line separators escaped on one logical line.""" dumped = json.dumps(value, **kwargs) - return dumped.replace("\u2028", "\\u2028").replace("\u2029", "\\u2029") + return ( + dumped.replace("\u0085", "\\u0085") + .replace("\u2028", "\\u2028") + .replace("\u2029", "\\u2029") + ) def normalize_payload(data: Any) -> NormalizedPayload: @@ -73,7 +77,7 @@ def apply_line_exclusions(text: str, patterns: tuple[Any, ...]) -> str: filtered_lines = [ "" if any(pattern.search(line) for pattern in patterns) else line - for line in text.splitlines() + for line in text.split("\n") ] return "\n".join(filtered_lines) diff --git a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py index 076fd334..e64846f7 100644 --- a/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py +++ b/evaluators/contrib/detect_secrets/tests/detect_secrets/test_evaluator.py @@ -1046,6 +1046,30 @@ async def test_exclude_lines_regex_on_dict_payload_preserves_pointers_for_other_ assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/keep"}] +@pytest.mark.asyncio +async def test_exclude_lines_regex_does_not_treat_unicode_nel_as_line_break() -> None: + # Given: a structured payload with a secret after U+0085 and an unrelated excluded line + evaluator = DetectSecretsEvaluator( + DetectSecretsEvaluatorConfig( + enabled_plugins=["GitHubTokenDetector"], + exclude_lines_regex=[r'"skip"'], + ) + ) + + # When: exclusions are applied before scanning + result = await evaluator.evaluate( + { + "skip": "safe", + "keep": "prefix\u0085ghp_123456789012345678901234567890123456", + } + ) + + # Then: only actual JSON newlines affect line numbering, so the pointer stays on /keep + assert result.matched is True + assert result.metadata is not None + assert result.metadata["findings"] == [{"type": "GitHub Token", "json_pointer": "/keep"}] + + # --------------------------------------------------------------------------- # max_bytes boundary behavior. # ---------------------------------------------------------------------------