Skip to content

Commit ead8b6c

Browse files
committed
Merge remote-tracking branch 'origin/enterprise' into feat/TG-1029-mcp-hygiene-issues
2 parents 3c6d005 + 27a43d3 commit ead8b6c

35 files changed

Lines changed: 463 additions & 440 deletions

deploy/build_mcp_docs.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""Export the TestGen MCP server as a Markdown reference page.
2+
3+
Usage:
4+
python deploy/build_mcp_docs.py [--output PATH]
5+
6+
Introspects the FastMCP instance built by ``build_mcp_server()`` and emits
7+
a single Markdown page listing prompts, tools, and resources. Tools are
8+
grouped by the ``_DOC_GROUP`` constant defined on each tool module — when
9+
adding a new tool module, declare ``_DOC_GROUP = "..."`` so the new tools
10+
land under the right heading automatically.
11+
"""
12+
13+
import argparse
14+
import re
15+
import sys
16+
import textwrap
17+
from pathlib import Path
18+
from typing import Any
19+
20+
from testgen.mcp.server import build_mcp_server
21+
from testgen.mcp.tools.common import DocGroup
22+
23+
_DEFAULT_OUTPUT = Path("docs/mcp/supported-tools.md")
24+
_ARGS_HEADER_RE = re.compile(r"^\s*Args:\s*$", re.MULTILINE)
25+
26+
# Order in which tool groups appear on the page. Each entry is a ``DocGroup``
27+
# member; tools whose module declares a ``_DOC_GROUP`` not in this list are
28+
# appended after these in the order they are first seen.
29+
_GROUP_ORDER: list[DocGroup] = [
30+
DocGroup.DISCOVER,
31+
DocGroup.INVESTIGATE,
32+
DocGroup.BROWSE_PROFILING,
33+
DocGroup.TRIGGER,
34+
]
35+
_FALLBACK_GROUP = "Other tools"
36+
37+
38+
def _short_description(docstring: str) -> str:
39+
"""Return the first prose paragraph of a docstring, stripped of Args/Returns sections."""
40+
if not docstring:
41+
return ""
42+
text = textwrap.dedent(docstring).strip()
43+
match = _ARGS_HEADER_RE.search(text)
44+
if match:
45+
text = text[: match.start()].rstrip()
46+
first_paragraph = text.split("\n\n", 1)[0]
47+
return " ".join(line.strip() for line in first_paragraph.splitlines())
48+
49+
50+
def _entry_name(item: Any) -> str:
51+
"""Display name for a tool, resource, or prompt."""
52+
return str(getattr(item, "uri", None) or item.name)
53+
54+
55+
def _render_entry(item: Any) -> str:
56+
description = _short_description(item.description or "")
57+
return f"- **`{_entry_name(item)}`** — {description}"
58+
59+
60+
def _group_for_tool(tool: Any) -> str:
61+
"""Resolve a tool's display group via its module's ``_DOC_GROUP`` constant."""
62+
module = sys.modules.get(tool.fn.__module__)
63+
group = getattr(module, "_DOC_GROUP", None)
64+
return str(group) if group is not None else _FALLBACK_GROUP
65+
66+
67+
def _group_tools(tools: list[Any]) -> list[tuple[str, list[Any]]]:
68+
"""Bucket tools by their module's ``_DOC_GROUP``, ordered by ``_GROUP_ORDER``."""
69+
buckets: dict[str, list[Any]] = {}
70+
for tool in tools:
71+
buckets.setdefault(_group_for_tool(tool), []).append(tool)
72+
73+
ordered: list[tuple[str, list[Any]]] = []
74+
for group in _GROUP_ORDER:
75+
title = str(group)
76+
if title in buckets:
77+
ordered.append((title, sorted(buckets.pop(title), key=lambda t: t.name)))
78+
for title, bucket in buckets.items():
79+
ordered.append((title, sorted(bucket, key=lambda t: t.name)))
80+
return ordered
81+
82+
83+
def _build_markdown(mcp: Any) -> str:
84+
tools = mcp._tool_manager.list_tools()
85+
resources = sorted(mcp._resource_manager.list_resources(), key=lambda r: str(r.uri))
86+
prompts = sorted(mcp._prompt_manager.list_prompts(), key=lambda p: p.name)
87+
grouped_tools = _group_tools(list(tools))
88+
89+
parts: list[str] = [
90+
"# Supported Tools",
91+
"",
92+
"The TestGen MCP server exposes the prompts, tools, and resources listed below.",
93+
"",
94+
"For setup instructions, see [Set up the MCP Server](setup.md).",
95+
"For example questions to ask an assistant, see [MCP Server](index.md#what-you-can-ask).",
96+
"",
97+
"## Prompts",
98+
"",
99+
(
100+
"Prompts are pre-built workflows you can invoke directly through your AI client — typically "
101+
"as a slash command (for example, `/testgen:table_health` in Claude Code) or "
102+
"from a quick-action menu. They orchestrate several tool calls behind the scenes for common "
103+
"investigations. Exact UX varies by client."
104+
),
105+
"",
106+
]
107+
parts.extend(_render_entry(prompt) for prompt in prompts)
108+
parts.append("")
109+
110+
parts.extend(["## Tools", "", "Tools are operations the assistant calls during a conversation, picked based on what you ask.", ""])
111+
for heading, bucket in grouped_tools:
112+
parts.append(f"### {heading}")
113+
parts.append("")
114+
parts.extend(_render_entry(tool) for tool in bucket)
115+
parts.append("")
116+
117+
parts.extend(
118+
[
119+
"## Resources",
120+
"",
121+
"Resources are static reference documents that AI clients can fetch by URI.",
122+
"",
123+
]
124+
)
125+
parts.extend(_render_entry(resource) for resource in resources)
126+
127+
return "\n".join(parts).rstrip() + "\n"
128+
129+
130+
def main() -> None:
131+
parser = argparse.ArgumentParser(description="Export the TestGen MCP server as a Markdown reference.")
132+
parser.add_argument(
133+
"--output",
134+
type=Path,
135+
default=_DEFAULT_OUTPUT,
136+
help=f"Output Markdown file path (default: {_DEFAULT_OUTPUT}, relative to cwd)",
137+
)
138+
args = parser.parse_args()
139+
140+
mcp = build_mcp_server(api_base_url="https://testgen.example.com")
141+
markdown = _build_markdown(mcp)
142+
143+
output: Path = args.output
144+
output.parent.mkdir(parents=True, exist_ok=True)
145+
frontmatter = "---\nsearch:\n boost: 0.5\n---\n"
146+
output.write_text(frontmatter + markdown, encoding="utf-8")
147+
print(f"Exported MCP supported tools -> {output}")
148+
149+
150+
if __name__ == "__main__":
151+
main()

invocations/dev.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__all__ = ["build_api_docs", "build_public_image", "clean", "install", "lint"]
1+
__all__ = ["build_api_docs", "build_mcp_docs", "build_public_image", "clean", "install", "lint"]
22

33
import re
44
from os.path import exists, join
@@ -83,6 +83,15 @@ def build_api_docs(ctx: Context, version: str = "", output: str = "") -> None:
8383
ctx.run(f"python deploy/build_api_docs.py {' '.join(args)}")
8484

8585

86+
@task(name="build-mcp-docs", pre=(install,))
87+
def build_mcp_docs(ctx: Context, output: str = "") -> None:
88+
"""Exports the MCP supported-tools page from the FastMCP server."""
89+
args = []
90+
if output:
91+
args.append(f"--output {output}")
92+
ctx.run(f"python deploy/build_mcp_docs.py {' '.join(args)}")
93+
94+
8695
@task(
8796
pre=(required_tools, prep_dk_builer),
8897
iterable=["label"],

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "dataops-testgen"
11-
version = "5.9.5"
11+
version = "5.32.2"
1212
description = "DataKitchen's Data Quality DataOps TestGen"
1313
authors = [
1414
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },

testgen/__main__.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,45 @@
1+
# Silence streamlit's "missing ScriptRunContext" / "No runtime found" /
2+
# "Session state does not function" warnings, which fire whenever streamlit-
3+
# decorated code runs outside an active script run (our CLI, scheduler, server,
4+
# and any import that touches @st.cache_data). Must run before the first
5+
# streamlit-using import, so it sits at the top of the module.
6+
#
7+
# We replace ``set_log_level`` itself, after seeding it to "error". Streamlit's
8+
# own ``_update_logger`` callback fires on config parse and would otherwise
9+
# downgrade us back to "info"; the cap floors any later call at ERROR.
10+
def _silence_streamlit_logs() -> None:
11+
import logging as _logging
12+
13+
try:
14+
from streamlit import logger as _st_logger
15+
except ImportError:
16+
return
17+
18+
_original = _st_logger.set_log_level
19+
_original("error")
20+
21+
def _capped(level):
22+
if isinstance(level, str):
23+
try:
24+
level_num = getattr(_logging, level.upper())
25+
except AttributeError:
26+
_original(level)
27+
return
28+
else:
29+
level_num = level
30+
_original(max(level_num, _logging.ERROR))
31+
32+
_st_logger.set_log_level = _capped
33+
34+
35+
_silence_streamlit_logs()
36+
37+
138
import base64
239
import importlib
340
import logging
441
import os
42+
import pathlib
543
import platform
644
import secrets
745
import signal
@@ -77,6 +115,16 @@
77115
VERSION_DATA = version_service.get_version()
78116
CHILDREN_POLL_INTERVAL = 10
79117

118+
119+
def _forward_signal_to_child(child: subprocess.Popen, signum: int) -> None:
120+
# On POSIX, forward the signal verbatim. On Windows, subprocess.send_signal
121+
# rejects everything except SIGTERM / CTRL_C_EVENT / CTRL_BREAK_EVENT, so
122+
# fall back to terminate() — equivalent to TerminateProcess().
123+
if sys.platform == "win32":
124+
child.terminate()
125+
else:
126+
child.send_signal(signum)
127+
80128
@dataclass
81129
class Configuration:
82130
verbose: bool = field(default=False)
@@ -94,6 +142,7 @@ def invoke(self, ctx: Context):
94142
raise
95143
except Exception:
96144
LOG.exception("There was an unexpected error")
145+
sys.exit(1)
97146

98147
def format_epilog(self, _ctx: Context, formatter: click.HelpFormatter) -> None:
99148
# Schema revision is a DB round-trip; defer until `--help` is actually
@@ -551,9 +600,24 @@ def generate_secret(length: int = 12) -> str:
551600
"TG_TARGET_DB_TRUST_SERVER_CERTIFICATE=yes",
552601
"TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL=no",
553602
]
603+
604+
# Persist caller-supplied runtime overrides (ports, TLS) so they apply to
605+
# subsequent `testgen run-app` invocations.
606+
persisted_env_vars = ("TG_UI_PORT", "TG_API_PORT", "TESTGEN_LOG_FILE_PATH", "SSL_CERT_FILE", "SSL_KEY_FILE")
607+
persisted_lines = [f"{name}={os.environ[name]}" for name in persisted_env_vars if os.environ.get(name)]
608+
if persisted_lines:
609+
config_lines.extend(["", "# Runtime overrides from installer", *persisted_lines])
610+
554611
config_path.write_text("\n".join(config_lines) + "\n")
555612
click.echo(f"Config written to {config_path}")
556613

614+
# `getenv` resolves env vars before config.env, so a pre-existing
615+
# TESTGEN_USERNAME / TESTGEN_PASSWORD in the shell would override the
616+
# CLI-supplied values and get seeded into the DB. Force the CLI args
617+
# to win for the rest of this process.
618+
os.environ["TESTGEN_USERNAME"] = username
619+
os.environ["TESTGEN_PASSWORD"] = password
620+
557621
# Reload settings — the module was already evaluated at import time
558622
# before the config file existed. Reloading re-reads the new file
559623
# and re-evaluates all module-level variables.
@@ -564,6 +628,14 @@ def generate_secret(length: int = 12) -> str:
564628
from testgen.ui.scripts.patch_streamlit import patch as patch_streamlit
565629
patch_streamlit(dev=True)
566630

631+
# Seed Streamlit's first-run credentials file so `run-app` doesn't block
632+
# on the interactive email prompt. We don't care about the value — just
633+
# that the file exists so Streamlit skips the prompt.
634+
streamlit_creds = pathlib.Path.home() / ".streamlit" / "credentials.toml"
635+
if not streamlit_creds.exists():
636+
streamlit_creds.parent.mkdir(parents=True, exist_ok=True)
637+
streamlit_creds.write_text('[general]\nemail = ""\n')
638+
567639
# Start embedded PostgreSQL (standalone mode is now active via config)
568640
start_standalone_postgres()
569641

@@ -860,14 +932,18 @@ def init_ui():
860932
child_env = {**os.environ, "TG_JOB_SOURCE": "UI", STANDALONE_URI_ENV_VAR: server_uri}
861933

862934
process= subprocess.Popen(
863-
[ # noqa: S607
935+
[
936+
sys.executable,
937+
"-m",
864938
"streamlit",
865939
"run",
866940
app_file,
867941
"--browser.gatherUsageStats=false",
942+
f"--logger.level={'debug' if settings.IS_DEBUG else 'error'}",
868943
"--client.showErrorDetails=none",
869944
"--client.toolbarMode=minimal",
870945
"--server.enableStaticServing=true",
946+
f"--server.port={settings.UI_PORT}",
871947
f"--server.sslCertFile={settings.SSL_CERT_FILE}" if use_ssl else "",
872948
f"--server.sslKeyFile={settings.SSL_KEY_FILE}" if use_ssl else "",
873949
"--",
@@ -877,7 +953,7 @@ def init_ui():
877953
)
878954
def term_ui(signum, _):
879955
LOG.info(f"Sending termination signal {signum} to Testgen UI")
880-
process.send_signal(signum)
956+
_forward_signal_to_child(process, signum)
881957
signal.signal(signal.SIGINT, term_ui)
882958
signal.signal(signal.SIGTERM, term_ui)
883959
status_code = process.wait()
@@ -905,13 +981,13 @@ def run_app(module):
905981

906982
case "all":
907983
children = [
908-
subprocess.Popen([sys.executable, sys.argv[0], "run-app", m], start_new_session=True)
984+
subprocess.Popen([sys.executable, "-m", "testgen", "run-app", m], start_new_session=True)
909985
for m in APP_MODULES
910986
]
911987

912988
def term_children(signum, _):
913989
for child in children:
914-
child.send_signal(signum)
990+
_forward_signal_to_child(child, signum)
915991

916992
signal.signal(signal.SIGINT, term_children)
917993
signal.signal(signal.SIGTERM, term_children)

testgen/api/oauth/metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""RFC 8414 — OAuth 2.0 Authorization Server Metadata."""
1+
"""RFC 8414 — OAuth 2.1 Authorization Server Metadata."""
22

33
from fastapi import APIRouter
44
from fastapi.responses import JSONResponse
@@ -10,7 +10,7 @@
1010

1111
@router.get("/.well-known/oauth-authorization-server")
1212
def authorization_server_metadata():
13-
"""Return OAuth 2.0 Authorization Server Metadata per RFC 8414.
13+
"""Return OAuth 2.1 Authorization Server Metadata per RFC 8414.
1414
1515
MCP clients use this for server discovery.
1616
"""

testgen/commands/run_launch_db_config.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from testgen.common.models import with_database_session
1010
from testgen.common.read_file import get_template_files
1111
from testgen.common.read_yaml_metadata_records import import_metadata_records_from_yaml
12-
from testgen.common.standalone_postgres import get_home_dir, is_standalone_mode
12+
from testgen.common.standalone_postgres import get_target_host_port, is_standalone_mode
1313

1414
LOG = logging.getLogger("testgen")
1515

@@ -24,10 +24,13 @@ def _get_params_mapping() -> dict:
2424
ui_user_encrypted_password = encrypt_ui_password(settings.PASSWORD)
2525

2626
project_host = settings.PROJECT_DATABASE_HOST
27+
project_port = settings.PROJECT_DATABASE_PORT
2728
project_user = settings.PROJECT_DATABASE_USER
2829
project_password = settings.PROJECT_DATABASE_PASSWORD
2930
if is_standalone_mode():
30-
project_host = str(get_home_dir() / "pgdata")
31+
project_host, server_port = get_target_host_port()
32+
if server_port:
33+
project_port = server_port
3134
project_user = "postgres"
3235
project_password = ""
3336

@@ -43,7 +46,7 @@ def _get_params_mapping() -> dict:
4346
"PROJECT_NAME": settings.PROJECT_NAME,
4447
"PROJECT_DB": settings.PROJECT_DATABASE_NAME,
4548
"PROJECT_USER": project_user,
46-
"PROJECT_PORT": settings.PROJECT_DATABASE_PORT,
49+
"PROJECT_PORT": project_port,
4750
"PROJECT_HOST": project_host,
4851
"PROJECT_PW_ENCRYPTED": EncryptText(project_password),
4952
"PROJECT_HTTP_PATH": "",

0 commit comments

Comments
 (0)