Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/basic_memory/mcp/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@
# Schema tools
from basic_memory.mcp.tools.schema import schema_validate, schema_infer, schema_diff

# Diagnostics tool
from basic_memory.mcp.tools.basic_memory_diagnostics import basic_memory_diagnostics

__all__ = [
"basic_memory_diagnostics",
"build_context",
"canvas",
"cloud_info",
Expand Down
129 changes: 129 additions & 0 deletions src/basic_memory/mcp/tools/basic_memory_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Diagnostic tool for Basic Memory version and system information."""

import json
import platform
import sys
from urllib.parse import urlparse, urlunparse

import basic_memory
from basic_memory.config import CONFIG_FILE_NAME, ConfigManager
from basic_memory.mcp.server import mcp

# Fields in BasicMemoryConfig that contain secrets and must never be surfaced.
_SECRET_FIELDS = frozenset({"cloud_api_key"})

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Redact database credentials from diagnostics

When config.json contains database_url for a Postgres deployment, BasicMemoryConfig allows URLs with user:pass@host, but this tool only removes cloud_api_key and then dumps the remaining config. In that scenario, calling basic_memory_diagnostics exposes the database password to any MCP client or copied support transcript; redact database_url or at least strip URI userinfo before serializing the config.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — this was a real gap. Fixed in cbc2d5f.

What changed:

  • Added _redact_url() helper that strips the user:password userinfo component from a URL using urlparse/urlunparse, leaving host, port, and database name intact for diagnostic value.
  • Added _URL_FIELDS = frozenset({"database_url"}) alongside the existing _SECRET_FIELDS, so future credential-bearing URL fields are easy to add.
  • _redact_config() now routes URL fields through _redact_url() rather than dropping them entirely.

Tests added: _redact_url unit tests (password stripped, port/host preserved, no-credential URL unchanged, non-URL path unchanged), _redact_config tests for database_url, and an end-to-end test_diagnostics_redacts_database_url_password that confirms a Postgres password never surfaces in the full diagnostic output.


# Fields whose values are URLs that may embed user:password credentials.
# The userinfo component is stripped before surfacing.
_URL_FIELDS = frozenset({"database_url"})


def _redact_url(url: str) -> str:
"""Strip the userinfo (user:password) from a URL string.

Replaces any credentials with *** so the host/path remain visible for
diagnostics (e.g. ``postgresql://***@localhost/mydb``). If the value
cannot be parsed as a URL it is returned unchanged.
"""
try:
parsed = urlparse(url)
except Exception: # pragma: no cover — urlparse is very permissive
return url

if not parsed.hostname:
# Not a meaningful URL (e.g. a bare file path); leave it alone.
return url

if parsed.username or parsed.password:
# Rebuild with credentials replaced by a placeholder.
netloc = f"***@{parsed.hostname}"
if parsed.port:
netloc = f"{netloc}:{parsed.port}"
redacted = parsed._replace(netloc=netloc)
return urlunparse(redacted)
Comment on lines +36 to +42

return url


def _redact_config(raw: dict) -> dict:
"""Return a copy of the raw config dict with secret fields removed.

- Keys in ``_SECRET_FIELDS`` are dropped entirely.
- Keys in ``_URL_FIELDS`` have their userinfo component stripped so that
host and database name remain visible for diagnostics.

Only top-level keys are processed. Nested keys within project entries are
not currently credential-bearing, but the two sets make the pattern easy
to extend.
"""
result: dict = {}
for k, v in raw.items():
if k in _SECRET_FIELDS:
# Drop entirely — value has no diagnostic value.
continue
if k in _URL_FIELDS and isinstance(v, str):
result[k] = _redact_url(v)
else:
result[k] = v
return result


@mcp.tool(
"basic_memory_diagnostics",
annotations={"readOnlyHint": True, "openWorldHint": False},
)
def basic_memory_diagnostics() -> str:
"""Return version, system, and configuration diagnostics for Basic Memory.

Provides:
- Basic Memory package version and API version
- Python version and platform details
- Config file path and its contents (secrets redacted)

Useful for troubleshooting installations and gathering information for
support requests. Read-only; never emits secrets or API keys.
"""
# --- Version information ---
bm_version = basic_memory.__version__
api_version = basic_memory.__api_version__

# --- System information ---
python_version = sys.version
platform_info = platform.platform()
machine = platform.machine()

# --- Configuration ---
manager = ConfigManager()
config_file = manager.config_dir / CONFIG_FILE_NAME
config_exists = config_file.exists()
Comment on lines +98 to +101

if config_exists:
try:
raw_config = json.loads(config_file.read_text(encoding="utf-8"))
safe_config = _redact_config(raw_config)
config_dump = json.dumps(safe_config, indent=2, default=str)
except Exception as exc: # pragma: no cover
config_dump = f"<error reading config: {exc}>"
else:
config_dump = "<config file not found>"

lines = [
"# Basic Memory Diagnostics",
"",
"## Version",
f"- basic-memory: {bm_version}",
f"- API version: {api_version}",
"",
Comment on lines +116 to +118
"## System",
f"- Python: {python_version}",
f"- Platform: {platform_info}",
f"- Architecture: {machine}",
"",
"## Configuration",
f"- Config path: {config_file}",
f"- Config exists: {config_exists}",
"",
"```json",
config_dump,
"```",
]
return "\n".join(lines)
256 changes: 256 additions & 0 deletions tests/mcp/test_tool_basic_memory_diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
"""Tests for the basic_memory_diagnostics MCP tool."""

import json
import platform
import sys
from unittest.mock import MagicMock, patch

import basic_memory
from basic_memory.mcp.tools.basic_memory_diagnostics import (
_redact_config,
_redact_url,
basic_memory_diagnostics,
)
Comment on lines +3 to +13


# ---------------------------------------------------------------------------
# Unit tests for _redact_config helper
# ---------------------------------------------------------------------------


def test_redact_config_removes_cloud_api_key():
raw = {"cloud_api_key": "bmc_secret", "default_project": "main", "projects": {}}
result = _redact_config(raw)
assert "cloud_api_key" not in result
assert result["default_project"] == "main"
assert "projects" in result


def test_redact_config_passes_through_safe_fields():
raw = {"default_project": "main", "log_level": "INFO", "env": "dev"}
result = _redact_config(raw)
assert result == raw


def test_redact_config_empty_dict():
assert _redact_config({}) == {}


# ---------------------------------------------------------------------------
# Tests for the basic_memory_diagnostics tool
# ---------------------------------------------------------------------------


def test_diagnostics_returns_string():
result = basic_memory_diagnostics()
assert isinstance(result, str)


def test_diagnostics_includes_version():
result = basic_memory_diagnostics()
assert basic_memory.__version__ in result
assert basic_memory.__api_version__ in result


def test_diagnostics_includes_python_version():
result = basic_memory_diagnostics()
# sys.version can be multi-line; just check the version tuple prefix
major_minor = f"{sys.version_info.major}.{sys.version_info.minor}"
assert major_minor in result


def test_diagnostics_includes_platform():
result = basic_memory_diagnostics()
assert platform.machine() in result


def test_diagnostics_includes_config_path(tmp_path):
"""Config path section should appear in output."""
with patch("basic_memory.mcp.tools.basic_memory_diagnostics.ConfigManager") as MockMgr:
mock_mgr = MagicMock()
mock_mgr.config_dir = tmp_path
MockMgr.return_value = mock_mgr

config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"default_project": "main", "projects": {}}))

result = basic_memory_diagnostics()

assert str(tmp_path) in result
assert "Config path:" in result


def test_diagnostics_config_exists_with_valid_json(tmp_path):
"""When config file exists, its safe contents should appear as JSON."""
config_data = {
"default_project": "research",
"projects": {"research": {"path": str(tmp_path / "research")}},
}
with patch("basic_memory.mcp.tools.basic_memory_diagnostics.ConfigManager") as MockMgr:
mock_mgr = MagicMock()
mock_mgr.config_dir = tmp_path
MockMgr.return_value = mock_mgr

config_file = tmp_path / "config.json"
config_file.write_text(json.dumps(config_data))

result = basic_memory_diagnostics()

assert "research" in result
assert "```json" in result


def test_diagnostics_redacts_cloud_api_key(tmp_path):
"""cloud_api_key must never appear in diagnostic output."""
config_data = {
"default_project": "main",
"cloud_api_key": "bmc_super_secret_token",
"projects": {},
}
with patch("basic_memory.mcp.tools.basic_memory_diagnostics.ConfigManager") as MockMgr:
mock_mgr = MagicMock()
mock_mgr.config_dir = tmp_path
MockMgr.return_value = mock_mgr

config_file = tmp_path / "config.json"
config_file.write_text(json.dumps(config_data))

result = basic_memory_diagnostics()

assert "bmc_super_secret_token" not in result
assert "cloud_api_key" not in result


def test_diagnostics_config_missing(tmp_path):
"""When config file does not exist, output should say so."""
with patch("basic_memory.mcp.tools.basic_memory_diagnostics.ConfigManager") as MockMgr:
mock_mgr = MagicMock()
mock_mgr.config_dir = tmp_path
MockMgr.return_value = mock_mgr

# Ensure no config.json is present
config_file = tmp_path / "config.json"
assert not config_file.exists()

result = basic_memory_diagnostics()

assert "Config exists: False" in result
assert "<config file not found>" in result


def test_diagnostics_output_sections():
"""All expected section headers should be present."""
result = basic_memory_diagnostics()
assert "# Basic Memory Diagnostics" in result
assert "## Version" in result
assert "## System" in result
assert "## Configuration" in result


# ---------------------------------------------------------------------------
# Unit tests for _redact_url helper
# ---------------------------------------------------------------------------


def test_redact_url_strips_password():
url = "postgresql://user:secret@localhost/mydb"
result = _redact_url(url)
assert "secret" not in result
assert "user" not in result
assert "localhost" in result
assert "mydb" in result
assert "***" in result


def test_redact_url_strips_only_password_when_no_username():
# password-only userinfo (unusual but valid per RFC)
url = "postgresql://:secret@db.example.com/app"
result = _redact_url(url)
assert "secret" not in result
assert "db.example.com" in result

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High test

The string
db.example.com
may be at an arbitrary position in the sanitized URL.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed


def test_redact_url_preserves_port():
url = "postgresql://admin:pw@db.internal:5432/prod"
result = _redact_url(url)
assert "pw" not in result
assert "5432" in result
assert "db.internal" in result


def test_redact_url_no_credentials_unchanged():
url = "postgresql://db.internal:5432/prod"
assert _redact_url(url) == url


def test_redact_url_non_url_string_unchanged():
# Bare file paths / non-URL values must not be mangled.
path = "/home/user/.local/share/basic-memory/main.db"
assert _redact_url(path) == path


# ---------------------------------------------------------------------------
# _redact_config tests for database_url
# ---------------------------------------------------------------------------


def test_redact_config_scrubs_database_url_credentials():
raw = {
"default_project": "main",
"database_url": "postgresql://dbuser:dbpass@host.example.com:5432/bm",
"projects": {},
}
result = _redact_config(raw)
assert "dbpass" not in result["database_url"]
assert "dbuser" not in result["database_url"]
# Host and db should still be present for diagnostic value.
assert "host.example.com" in result["database_url"]

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High test

The string
host.example.com
may be at an arbitrary position in the sanitized URL.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
assert "bm" in result["database_url"]


def test_redact_config_leaves_database_url_without_credentials():
raw = {"database_url": "sqlite:////tmp/basic-memory/main.db"}
result = _redact_config(raw)
assert result["database_url"] == "sqlite:////tmp/basic-memory/main.db"


def test_redact_config_drops_secret_fields_independently():
raw = {
"cloud_api_key": "bmc_top_secret",
"database_url": "postgresql://dbuser:dbpassword@host/db",
"default_project": "main",
}
result = _redact_config(raw)
assert "cloud_api_key" not in result
assert "dbpassword" not in result["database_url"]
assert "dbuser" not in result["database_url"]
assert "main" == result["default_project"]


# ---------------------------------------------------------------------------
# Integration: database_url redaction surfaces in diagnostic output
# ---------------------------------------------------------------------------


def test_diagnostics_redacts_database_url_password(tmp_path):
"""Postgres password in database_url must not appear in diagnostic output."""
config_data = {
"default_project": "main",
"database_url": "postgresql://pguser:supersecret@db.internal:5432/basicmemory",
"projects": {},
}
with patch("basic_memory.mcp.tools.basic_memory_diagnostics.ConfigManager") as MockMgr:
mock_mgr = MagicMock()
mock_mgr.config_dir = tmp_path
MockMgr.return_value = mock_mgr

config_file = tmp_path / "config.json"
config_file.write_text(json.dumps(config_data))

result = basic_memory_diagnostics()

assert "supersecret" not in result
assert "pguser" not in result
# Host and port remain visible for diagnostics.
assert "db.internal" in result
assert "5432" in result
Loading