Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,8 @@ test-rbac-lint: ## Lint the RBAC matrix + realm-config-extras (T009/T011/T012).
fi
@echo "[test-rbac-lint] running keycloak init-script symlink guard…"
@bash scripts/check-keycloak-init-symlinks.sh
@echo "[test-rbac-lint] running FGA create-path ownership linter (spec 2026-06-04 Layer 3)…"
@PYTHONPATH=. uv run python scripts/validate-fga-create-paths.py

test-rbac-up: ## Boot the e2e stack (Keycloak + UI + supervisor + agents + mongo) and seed personas via init-idp.sh.
@echo "[test-rbac-up] starting stack with profiles: $(E2E_PROFILES)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
"""

import logging
from functools import lru_cache
from pathlib import Path

import yaml
from pymongo import ASCENDING, MongoClient
from pymongo.collection import Collection
from pymongo.database import Database
Expand All @@ -20,6 +23,58 @@

logger = logging.getLogger(__name__)

# Packaged seed config: the authoritative declaration of how built-in MCP
# servers authenticate upstream (see services/config.yaml, same directory).
_SEED_CONFIG_PATH = Path(__file__).with_name("config.yaml")


@lru_cache(maxsize=1)
def _builtin_credential_sources() -> dict[str, list[dict]]:
"""Built-in MCP ``credential_sources`` keyed by server id.

Read once from the packaged ``config.yaml``. AgentGateway discovery (the
UI's MCP-server provisioning path) historically persisted ``mcp_servers``
documents *without* ``credential_sources``; transform-based gateway routes
then emitted an empty Bearer and the upstream returned 401 (most visibly
``knowledge-base``/RAG). Using the seed config as the source of truth keeps
this in sync with the runtime declaration the gateway transforms rely on.

Best-effort: a missing/unreadable config yields an empty map (no injection),
so a packaging hiccup can never break reads.
"""
try:
with _SEED_CONFIG_PATH.open() as fh:
data = yaml.safe_load(fh) or {}
except (OSError, yaml.YAMLError) as exc: # pragma: no cover - defensive
logger.warning(
"Could not load built-in MCP credential sources from %s: %s",
_SEED_CONFIG_PATH,
exc,
)
return {}
result: dict[str, list[dict]] = {}
for server in data.get("mcp_servers") or []:
server_id = server.get("id")
sources = server.get("credential_sources")
if server_id and sources:
result[server_id] = sources
return result


def _inject_builtin_credential_sources(doc: dict) -> dict:
"""Self-heal: fill ``credential_sources`` for known built-in MCP servers.

Read-time defense-in-depth for documents persisted before discovery
attached ``credential_sources``. Only fills when the stored value is
absent/empty, so an operator-customized list is never overwritten.
"""
if doc.get("credential_sources"):
return doc
builtin = _builtin_credential_sources().get(doc.get("_id"))
if builtin:
doc = {**doc, "credential_sources": builtin}
return doc


def _strip_nulls(doc: dict) -> dict:
"""Strip None values from a MongoDB document before pydantic construction.
Expand Down Expand Up @@ -119,13 +174,16 @@ def get_server(self, server_id: str) -> MCPServerConfig | None:
"""Get an MCP server config by ID."""
doc = self._get_servers_collection().find_one({"_id": server_id})
if doc:
return MCPServerConfig(**_strip_nulls(doc))
return MCPServerConfig(**_strip_nulls(_inject_builtin_credential_sources(doc)))
return None

def get_servers_by_ids(self, server_ids: list[str]) -> list[MCPServerConfig]:
"""Get multiple MCP servers by their IDs."""
docs = self._get_servers_collection().find({"_id": {"$in": server_ids}})
return [MCPServerConfig(**_strip_nulls(doc)) for doc in docs]
return [
MCPServerConfig(**_strip_nulls(_inject_builtin_credential_sources(doc)))
for doc in docs
]

def get_agent_mcp_servers(self, agent: DynamicAgentConfig) -> list[MCPServerConfig]:
"""Get MCP servers for an agent AND all its subagents.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Copyright 2025 CNOE Contributors
# SPDX-License-Identifier: Apache-2.0
"""Tests for read-time ``credential_sources`` self-heal in MongoDBService.

AgentGateway discovery historically persisted ``mcp_servers`` documents
without ``credential_sources``; transform-based gateway routes then emitted an
empty Bearer and the upstream returned 401 (most visibly ``knowledge-base``).
``get_server`` / ``get_servers_by_ids`` fill the built-in sources at read time
for known servers when the stored value is absent/empty, without overwriting an
operator-customized list.
"""

from __future__ import annotations

from unittest.mock import MagicMock

from dynamic_agents.services.mongo import (
MongoDBService,
_builtin_credential_sources,
)


def _make_service() -> MongoDBService:
service = MongoDBService.__new__(MongoDBService)
service.settings = MagicMock()
service._db = MagicMock()
return service


def _mock_servers(service: MongoDBService, *, find_one=None, find=None) -> None:
collection = MagicMock()
collection.find_one.return_value = find_one
collection.find.return_value = find or []
service._get_servers_collection = MagicMock(return_value=collection)


def _kb_doc(**overrides) -> dict:
base = {
"_id": "knowledge-base",
"name": "Knowledge Base",
"transport": "http",
"endpoint": "http://rag-server:9446/mcp",
"enabled": True,
}
base.update(overrides)
return base


def test_builtin_map_includes_knowledge_base_caller_token():
"""The packaged seed config must declare the KB caller_token source."""
builtin = _builtin_credential_sources()
assert "knowledge-base" in builtin
kb = builtin["knowledge-base"]
assert kb == [
{
"kind": "caller_token",
"name": "X-CAIPE-Provider-Token",
"target": "header",
"fallback_client_credentials": True,
}
]


def test_get_server_injects_builtin_when_missing():
"""A built-in server stored without credential_sources is self-healed."""
service = _make_service()
_mock_servers(service, find_one=_kb_doc())

server = service.get_server("knowledge-base")

assert server is not None
assert server.credential_sources is not None
assert server.credential_sources[0].kind == "caller_token"
assert server.credential_sources[0].name == "X-CAIPE-Provider-Token"
assert server.credential_sources[0].fallback_client_credentials is True


def test_get_server_injects_builtin_when_empty_list():
"""An explicit empty credential_sources is treated as missing and healed."""
service = _make_service()
_mock_servers(service, find_one=_kb_doc(credential_sources=[]))

server = service.get_server("knowledge-base")

assert server.credential_sources
assert server.credential_sources[0].kind == "caller_token"


def test_get_server_preserves_operator_customized_sources():
"""A non-empty stored list is never overwritten by the built-in default."""
custom = [
{"kind": "secret_ref", "name": "Authorization", "target": "header", "secret_ref": "my-secret"}
]
service = _make_service()
_mock_servers(service, find_one=_kb_doc(credential_sources=custom))

server = service.get_server("knowledge-base")

assert len(server.credential_sources) == 1
assert server.credential_sources[0].kind == "secret_ref"
assert server.credential_sources[0].secret_ref == "my-secret"


def test_get_server_unknown_id_left_untouched():
"""A non-built-in server without sources stays without sources."""
service = _make_service()
_mock_servers(
service,
find_one={
"_id": "custom-thing",
"name": "Custom",
"transport": "http",
"endpoint": "http://example:1234/mcp",
"enabled": True,
},
)

server = service.get_server("custom-thing")

assert server is not None
assert server.credential_sources is None


def test_get_servers_by_ids_injects_per_document():
"""Batch reads self-heal each known built-in independently."""
service = _make_service()
_mock_servers(
service,
find=[
_kb_doc(),
{
"_id": "argocd",
"name": "ArgoCD",
"transport": "http",
"endpoint": "http://argocd:18002/mcp",
"enabled": True,
},
],
)

servers = {s.id: s for s in service.get_servers_by_ids(["knowledge-base", "argocd"])}

# knowledge-base is healed; argocd has no built-in sources in config.yaml.
assert servers["knowledge-base"].credential_sources[0].kind == "caller_token"
assert servers["argocd"].credential_sources is None
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ class UrlIngestRequest(BaseModel):
description: str = Field("", description="Description for this data source")
settings: ScrapySettings = Field(default_factory=lambda: ScrapySettings(), description="Scraping configuration (crawl mode, JS rendering, rate limiting, etc.)")
reload_interval: Optional[int] = Field(None, description="Auto-reload interval in seconds. If not specified, uses global WEBLOADER_RELOAD_INTERVAL (default 24h). Minimum: 60 seconds.")
# Owning team for the new data source (spec 2026-06-03-explicit-ingest-capability).
# Required for non-org-admin authors; the server authorizes creation against
# the org `can_ingest` capability + owning-team membership and writes ownership
# tuples so the team gets read/ingest. None means personal/admin-owned.
owner_team_slug: Optional[str] = Field(None, description="Slug of the team that will own this new data source. Required for non-org-admin authors.")

# DEPRECATED fields - will be removed in a future version.
# Use 'settings' object instead.
Expand Down Expand Up @@ -124,6 +129,10 @@ class ConfluenceIngestRequest(BaseModel):
get_child_pages: bool = Field(False, description="Whether to ingest direct child pages of this page")
allowed_title_patterns: Optional[List[str]] = Field(None, description="Regex patterns for page titles to include (whitelist). If set, only pages whose title matches at least one pattern are ingested.")
denied_title_patterns: Optional[List[str]] = Field(None, description="Regex patterns for page titles to exclude (blacklist). Pages whose title matches any pattern are skipped. Checked after allowed_title_patterns.")
# Owning team for a NEW Confluence space data source (spec 2026-06-03).
# Required for non-org-admin authors when the space is created for the first
# time; ignored when appending pages to an existing space. None = personal.
owner_team_slug: Optional[str] = Field(None, description="Slug of the team that will own this new data source. Required for non-org-admin authors creating a new Confluence space.")


class ConfluenceReloadRequest(BaseModel):
Expand Down
Loading
Loading