Skip to content

Commit a6f871f

Browse files
make an mcp app
Signed-off-by: Matthew Khouzam <matthew.khouzam@ericsson.com>
1 parent 5d65cb0 commit a6f871f

4 files changed

Lines changed: 254 additions & 36 deletions

File tree

readme.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,17 @@ TMLL provides an MCP (Model Context Protocol) server that exposes trace analysis
114114
./tracecompass-server -vmargs -Dtraceserver.port=8080
115115
```
116116

117-
3. Configure in your MCP client (e.g., `~/.config/kiro-cli/mcp.json`):
117+
3. Configure in your MCP client (e.g., `~/.config/kiro-cli/mcp.json`). Point `command` at the Python interpreter of the environment where TMLL is installed, and set `PYTHONPATH` so the `tmll` package is importable:
118118
```json
119119
{
120120
"mcpServers": {
121121
"tmll": {
122-
"command": "python3",
123-
"args": ["/path/to/tmll/mcp_server_cli.py"]
122+
"type": "stdio",
123+
"command": "/path/to/tmll/venv/bin/python",
124+
"args": ["-m", "tmll.mcp.server"],
125+
"env": {
126+
"PYTHONPATH": "/path/to/tmll"
127+
}
124128
}
125129
}
126130
}
@@ -138,7 +142,6 @@ TMLL provides an MCP (Model Context Protocol) server that exposes trace analysis
138142
- `analyze_correlation`: Perform root cause correlation analysis
139143
- `detect_idle_resources`: Identify underutilized resources
140144
- `plan_capacity`: Run capacity planning predictions
141-
- `cluster_data`: Perform clustering analysis
142145

143146
## CLI Usage
144147

tmll/mcp/server.py

Lines changed: 205 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,25 @@
11
#!/usr/bin/env python3
22
"""MCP server for TMLL CLI - exposes all CLI commands as MCP tools."""
33

4+
import base64
5+
import contextlib
6+
import functools
7+
import io
8+
import json
49
import subprocess
510
import sys
11+
import traceback as _tb
612
import urllib.request
713
from pathlib import Path
814
from typing import Optional
915

16+
import matplotlib
17+
matplotlib.use("Agg")
18+
import matplotlib.pyplot as plt
19+
import pandas as pd
20+
1021
from mcp.server.fastmcp import FastMCP
22+
from mcp.types import ImageContent, TextContent
1123

1224
mcp = FastMCP("tmll-cli-mcp-server")
1325

@@ -17,12 +29,57 @@
1729
DEFAULT_PORT = 8080
1830

1931

32+
# ---------------------------------------------------------------------------
33+
# Debug helpers
34+
# ---------------------------------------------------------------------------
35+
36+
def _log(msg: str) -> None:
37+
"""Write debug message to stderr (safe for MCP stdio transport)."""
38+
print(f"[tmll-mcp-debug] {msg}", file=sys.stderr, flush=True)
39+
40+
41+
@contextlib.contextmanager
42+
def _protect_stdout():
43+
"""Temporarily redirect stdout→stderr so stray print() cannot corrupt the MCP stdio transport."""
44+
old = sys.stdout
45+
sys.stdout = sys.stderr
46+
try:
47+
yield
48+
finally:
49+
sys.stdout = old
50+
51+
52+
def _safe_tool(fn):
53+
"""Decorator applied to every tool: protects stdout, logs entry/exit/errors."""
54+
@functools.wraps(fn)
55+
def wrapper(*args, **kwargs):
56+
name = fn.__name__
57+
_log(f">>> TOOL CALL {name} args={args!r} kwargs={kwargs!r}")
58+
with _protect_stdout():
59+
try:
60+
result = fn(*args, **kwargs)
61+
preview = repr(result)[:300]
62+
_log(f"<<< TOOL OK {name} result_preview={preview}")
63+
return result
64+
except Exception as exc:
65+
tb = _tb.format_exc()
66+
_log(f"!!! TOOL ERROR {name} {type(exc).__name__}: {exc}\n{tb}")
67+
raise
68+
return wrapper
69+
70+
71+
# ---------------------------------------------------------------------------
72+
# Server health
73+
# ---------------------------------------------------------------------------
74+
2075
def _server_is_running(host: str = DEFAULT_HOST, port: int = DEFAULT_PORT) -> bool:
2176
"""Check if the trace server is reachable."""
77+
url = f"http://{host}:{port}/tsp/api/health"
2278
try:
23-
urllib.request.urlopen(f"http://{host}:{port}/tsp/api/health", timeout=3)
79+
urllib.request.urlopen(url, timeout=3)
2480
return True
25-
except Exception:
81+
except Exception as exc:
82+
_log(f"Server health check failed ({url}): {exc}")
2683
return False
2784

2885

@@ -45,14 +102,46 @@ def ensure_server(host: str = DEFAULT_HOST, port: int = DEFAULT_PORT) -> str:
45102
return "Trace server was launched but is not yet responding. It may need more time to start."
46103

47104

105+
# ---------------------------------------------------------------------------
106+
# CLI runner
107+
# ---------------------------------------------------------------------------
108+
48109
def run_cli(*args: str) -> str:
49110
"""Run a tmll_cli.py command and return output."""
50-
result = subprocess.run(
51-
[sys.executable, CLI_PATH, "--log-stderr", *args],
52-
capture_output=True, text=True, timeout=120
53-
)
111+
cmd = [sys.executable, CLI_PATH, "--log-stderr", *args]
112+
_log(f"run_cli: executing {' '.join(cmd)}")
113+
try:
114+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
115+
except subprocess.TimeoutExpired as exc:
116+
msg = (
117+
f"CLI timed out after 120s\n"
118+
f" command: {' '.join(cmd)}\n"
119+
f" partial stdout: {exc.stdout!r}\n"
120+
f" partial stderr: {exc.stderr!r}"
121+
)
122+
_log(f"run_cli TIMEOUT: {msg}")
123+
raise RuntimeError(msg)
124+
except Exception as exc:
125+
msg = (
126+
f"Failed to launch CLI: {type(exc).__name__}: {exc}\n"
127+
f" command: {' '.join(cmd)}"
128+
)
129+
_log(f"run_cli LAUNCH ERROR: {msg}")
130+
raise RuntimeError(msg)
131+
132+
_log(f"run_cli: exit_code={result.returncode} stdout_len={len(result.stdout)} stderr_len={len(result.stderr)}")
133+
if result.stderr.strip():
134+
_log(f"run_cli stderr:\n{result.stderr.strip()}")
135+
54136
if result.returncode != 0:
55-
raise RuntimeError(result.stderr or f"CLI exited with code {result.returncode}")
137+
msg = (
138+
f"CLI exited with code {result.returncode}\n"
139+
f" command: {' '.join(cmd)}\n"
140+
f" stdout: {result.stdout.strip()}\n"
141+
f" stderr: {result.stderr.strip()}"
142+
)
143+
_log(f"run_cli FAILED: {msg}")
144+
raise RuntimeError(msg)
56145
return result.stdout.strip()
57146

58147

@@ -81,33 +170,42 @@ def _global_args(host: Optional[str], port: Optional[int]) -> list[str]:
81170
return args
82171

83172

173+
# ---------------------------------------------------------------------------
174+
# Tools
175+
# ---------------------------------------------------------------------------
176+
84177
@mcp.tool()
178+
@_safe_tool
85179
def create_experiment(traces: list[str], experiment_name: str, host: Optional[str] = None, port: Optional[int] = None) -> str:
86180
"""Create a trace experiment from LTTng trace files or directories."""
87181
return run_cli(*_global_args(host, port), "create", *traces, "-n", experiment_name)
88182

89183

90184
@mcp.tool()
185+
@_safe_tool
91186
def list_experiments() -> str:
92187
"""List all open experiments."""
93188
return run_cli("list")
94189

95190

96191
@mcp.tool()
192+
@_safe_tool
97193
def list_outputs(experiment_id: str, keywords: Optional[list[str]] = None) -> str:
98194
"""List available outputs for an experiment."""
99195
args = build_args({"keywords": ("-k", keywords)})
100196
return run_cli("list-outputs", experiment_id, *args)
101197

102198

103199
@mcp.tool()
200+
@_safe_tool
104201
def fetch_data(experiment_id: str, keywords: Optional[list[str]] = None, output_file: Optional[str] = None) -> str:
105202
"""Fetch data from experiment outputs."""
106203
args = build_args({"keywords": ("-k", keywords or ["cpu usage"]), "output_file": ("-o", output_file)})
107204
return run_cli("fetch-data", experiment_id, *args)
108205

109206

110207
@mcp.tool()
208+
@_safe_tool
111209
def delete_experiment(experiment_id: str) -> str:
112210
"""Delete an experiment."""
113211
return run_cli("delete", experiment_id)
@@ -121,6 +219,7 @@ def detect_anomalies(experiment_id: str, keywords: Optional[list[str]] = None, m
121219

122220

123221
@mcp.tool()
222+
@_safe_tool
124223
def detect_memory_leak(experiment_id: str, keywords: Optional[list[str]] = None) -> str:
125224
"""Detect memory leaks in trace data."""
126225
args = build_args({"keywords": ("-k", keywords or ["memory"])})
@@ -135,6 +234,7 @@ def detect_changepoints(experiment_id: str, keywords: Optional[list[str]] = None
135234

136235

137236
@mcp.tool()
237+
@_safe_tool
138238
def analyze_correlation(experiment_id: str, keywords: Optional[list[str]] = None, method: Optional[str] = None) -> str:
139239
"""Analyze correlation between outputs for root cause analysis (pearson, kendall, spearman)."""
140240
args = build_args({"keywords": ("-k", keywords or ["cpu", "memory"]), "method": ("-m", method or "pearson")})
@@ -157,11 +257,109 @@ def detect_idle_resources(experiment_id: str, keywords: Optional[list[str]] = No
157257

158258

159259
@mcp.tool()
260+
@_safe_tool
160261
def plan_capacity(experiment_id: str, keywords: Optional[list[str]] = None, horizon: Optional[int] = None) -> str:
161262
"""Perform capacity planning with predictive models."""
162263
args = build_args({"keywords": ("-k", keywords or ["cpu usage"]), "horizon": ("-H", horizon or 100)})
163264
return run_cli("capacity", experiment_id, *args)
164265

165266

267+
@mcp.tool()
268+
@_safe_tool
269+
def plot_xy_with_anomalies(
270+
experiment_id: str,
271+
keywords: Optional[list[str]] = None,
272+
method: Optional[str] = None,
273+
host: Optional[str] = None,
274+
port: Optional[int] = None,
275+
resample_freq: Optional[str] = None,
276+
) -> list[TextContent | ImageContent]:
277+
"""Fetch XY data from an experiment, run anomaly detection, and return an annotated plot image with a text summary."""
278+
from tmll.tmll_client import TMLLClient
279+
from tmll.common.models.experiment import Experiment
280+
from tmll.ml.modules.anomaly_detection.anomaly_detection_module import AnomalyDetection
281+
282+
h = host or DEFAULT_HOST
283+
p = port or DEFAULT_PORT
284+
keywords = keywords or ["cpu usage"]
285+
method = method or "iforest"
286+
287+
client = TMLLClient(h, p)
288+
289+
resp = client.tsp_client.fetch_experiment(experiment_id)
290+
if resp.status_code != 200:
291+
return [TextContent(type="text", text=f"Experiment {experiment_id} not found (status={resp.status_code}).")]
292+
experiment = Experiment.from_tsp_experiment(resp.model)
293+
experiment.assign_outputs(client._fetch_outputs(experiment))
294+
295+
outputs = experiment.find_outputs(keyword=keywords, type=["xy"])
296+
if not outputs:
297+
return [TextContent(type="text", text="No XY outputs found matching keywords.")]
298+
299+
ad_kwargs = {}
300+
if resample_freq:
301+
ad_kwargs["resample_freq"] = resample_freq
302+
ad = AnomalyDetection(client, experiment, outputs, **ad_kwargs)
303+
result = ad.find_anomalies(method=method)
304+
if not result or not result.anomalies:
305+
return [TextContent(type="text", text="Anomaly detection returned no results.")]
306+
307+
colors = plt.colormaps.get_cmap("tab10")
308+
contents: list[TextContent | ImageContent] = []
309+
total_anomalies = 0
310+
311+
for idx, (name, dataframe) in enumerate(ad.dataframes.items()):
312+
anomaly_df = result.anomalies.get(name, pd.DataFrame())
313+
periods = result.anomaly_periods.get(name, [])
314+
315+
fig, ax = plt.subplots(figsize=(14, 4), dpi=120)
316+
ax.plot(dataframe.index, dataframe.iloc[:, 0], color=colors(idx), linewidth=1.2, label=name)
317+
318+
for i, (start, end) in enumerate(periods):
319+
ax.axvspan(start, end, color="red", alpha=0.2, label="Anomaly Period" if i == 0 else None)
320+
321+
if not anomaly_df.empty:
322+
is_anomaly_cols = anomaly_df.filter(regex="_is_anomaly$")
323+
if not is_anomaly_cols.empty:
324+
is_anomaly = is_anomaly_cols.any(axis=1)
325+
else:
326+
is_anomaly = anomaly_df.any(axis=1)
327+
n_anomaly_points = int(is_anomaly.sum())
328+
total_anomalies += n_anomaly_points
329+
330+
# Scatter points not already inside a shaded period
331+
for point in anomaly_df[is_anomaly].index:
332+
if any(s <= point <= e for s, e in periods):
333+
continue
334+
if point in dataframe.index:
335+
ax.scatter(point, dataframe.loc[point].values[0], color="red", s=40, zorder=5)
336+
337+
ax.set_title(f"Anomaly Detection: {name} ({method})")
338+
ax.set_xlabel("Time")
339+
ax.set_ylabel(name)
340+
ax.legend(loc="upper right", fontsize=8)
341+
fig.tight_layout()
342+
343+
buf = io.BytesIO()
344+
fig.savefig(buf, format="png")
345+
plt.close(fig)
346+
buf.seek(0)
347+
contents.append(ImageContent(type="image", data=base64.b64encode(buf.read()).decode(), mimeType="image/png"))
348+
349+
period_summary = []
350+
for name, periods in result.anomaly_periods.items():
351+
for start, end in periods:
352+
period_summary.append(f" {name}: {start}{end}")
353+
354+
summary = f"Found {total_anomalies} anomalies across {len(ad.dataframes)} outputs using '{method}'."
355+
if period_summary:
356+
summary += "\n\nAnomaly periods:\n" + "\n".join(period_summary)
357+
358+
contents.insert(0, TextContent(type="text", text=summary))
359+
return contents
360+
361+
362+
166363
if __name__ == "__main__":
364+
_log(f"MCP server starting — CLI_PATH={CLI_PATH} python={sys.executable}")
167365
mcp.run()

tmll/ml/modules/anomaly_detection/anomaly_detection_module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,9 @@ def __init__(self, client: TMLLClient, experiment: Experiment,
8080
self._process(outputs, **kwargs)
8181

8282
def _process(self, outputs: Optional[List[Output]] = None, **kwargs) -> None:
83+
kwargs.setdefault("min_size", MINIMUM_REQUIRED_DATAPOINTS)
8384
super()._process(outputs=outputs,
8485
normalize=False,
85-
min_size=kwargs.get("min_size", MINIMUM_REQUIRED_DATAPOINTS),
8686
**kwargs)
8787

8888
def _post_process(self, **kwargs) -> None:

0 commit comments

Comments
 (0)