Skip to content

Commit a409cf7

Browse files
committed
Run MCP smoke tests with Gemini CLI
1 parent 36c6d14 commit a409cf7

7 files changed

Lines changed: 345 additions & 74 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# MCP AI Smoke Harness
2+
3+
This harness powers `.github/workflows/mcp-ai-smoke.yml`.
4+
5+
## Stages
6+
7+
1. `setup-matomo-omnifixture.sh`
8+
- Imports `tests/resources/OmniFixture-dump.sql` into CI MySQL.
9+
- Writes Matomo config with empty DB prefix (matching OmniFixture tables).
10+
- Enables MCP tool-call logging.
11+
- Starts a local PHP server.
12+
- Creates a superuser app token and discovers fixture IDs used by prompts.
13+
- Emits `skip_cases` in state when fixture-backed entities are missing.
14+
15+
2. `run-gemini-smoke.sh`
16+
- Executes one Gemini prompt per configured tool case.
17+
- Uses pass/fail evidence from Matomo MCP call logs.
18+
- Supports per-case timeout via `CASE_TIMEOUT_SECONDS` (default `120`).
19+
- Additional MCP tool calls are accepted if the expected tool succeeds at least once.
20+
- Handles empty `cases.json` safely and emits an empty `results.json`.
21+
22+
## Files
23+
24+
- `cases.json`: prototype smoke cases (`site_get`, `site_list`, `report_processed`).
25+
- `.state.json`: runtime discovery state from setup, including optional `skip_cases`.
26+
- `prompts/*.txt`: prompt templates used by configured cases.
27+
- `artifacts/`: generated at runtime in CI.
28+
29+
## Notes
30+
31+
- The Gemini invocation is intentionally configurable via `GEMINI_CLI_CMD` and `GEMINI_EXTRA_ARGS`.
32+
- MCP connection is configured through a temporary Gemini `settings.json` (`mcpServers`) generated at runtime outside the uploaded artifact tree.
33+
- The workflow is intentionally report-first/non-blocking for internal prototype usage.
34+
- The summary table is produced in the workflow from `artifacts/gemini/results.json`.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[
2+
{"id":"site_get","tool":"matomo_site_get","expected_mode":"success","prompt_file":"site_get.txt"},
3+
{"id":"site_list","tool":"matomo_site_list","expected_mode":"success","prompt_file":"site_list.txt"},
4+
{"id":"report_processed","tool":"matomo_report_processed","expected_mode":"success","prompt_file":"report_processed.txt"}
5+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use exactly one MCP tool call, and it must be matomo_report_processed. Do not call matomo_report_metadata. Use exactly these arguments and no other keys: {"idSite": {{idSite}}, "period": "day", "date": "2012-08-09", "reportUniqueId": "{{reportUniqueId}}", "filter_limit": 2, "filter_offset": 0}. Return the first row label only.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use exactly one MCP tool call: matomo_site_get with arguments {"idSite": {{idSite}}}. Return a one-line summary only.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use exactly one MCP tool call: matomo_site_list with arguments {"limit": 5}. Return only the count and first site name.
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
GEMINI_API_KEY=${GEMINI_API_KEY:?GEMINI_API_KEY is required}
5+
GEMINI_MODEL=${GEMINI_MODEL:-gemini-2.5-flash}
6+
BASE_URL=${BASE_URL:?BASE_URL is required}
7+
STATE_FILE=${STATE_FILE:?STATE_FILE is required}
8+
CASES_FILE=${CASES_FILE:?CASES_FILE is required}
9+
PROMPTS_DIR=${PROMPTS_DIR:?PROMPTS_DIR is required}
10+
ARTIFACT_DIR=${ARTIFACT_DIR:?ARTIFACT_DIR is required}
11+
MATOMO_LOG_FILE=${MATOMO_LOG_FILE:?MATOMO_LOG_FILE is required}
12+
GEMINI_CLI_CMD=${GEMINI_CLI_CMD:-gemini}
13+
CASE_TIMEOUT_SECONDS=${CASE_TIMEOUT_SECONDS:-120}
14+
15+
MCP_URL="${BASE_URL}/index.php?module=API&method=McpServer.mcp&format=mcp"
16+
TOKEN_AUTH=$(jq -r '.token_auth' "$STATE_FILE")
17+
mkdir -p "$ARTIFACT_DIR/gemini/transcripts" "$ARTIFACT_DIR/gemini/logs" "$ARTIFACT_DIR/gemini/results"
18+
GEMINI_RUNTIME_DIR=$(mktemp -d "${TMPDIR:-/tmp}/mcp-gemini-runtime.XXXXXX")
19+
GEMINI_HOME_DIR="$GEMINI_RUNTIME_DIR/home"
20+
GEMINI_SETTINGS_DIR="$GEMINI_HOME_DIR/.gemini"
21+
GEMINI_SETTINGS_FILE="$GEMINI_SETTINGS_DIR/settings.json"
22+
trap 'rm -rf "$GEMINI_RUNTIME_DIR"' EXIT
23+
24+
if ! command -v "$GEMINI_CLI_CMD" >/dev/null 2>&1; then
25+
echo "Gemini CLI command not found: $GEMINI_CLI_CMD" >&2
26+
exit 1
27+
fi
28+
if [ "$CASE_TIMEOUT_SECONDS" -gt 0 ] && ! command -v timeout >/dev/null 2>&1; then
29+
echo "timeout command not found but CASE_TIMEOUT_SECONDS=$CASE_TIMEOUT_SECONDS is configured" >&2
30+
exit 1
31+
fi
32+
33+
setup_gemini_settings() {
34+
mkdir -p "$GEMINI_SETTINGS_DIR"
35+
jq -n \
36+
--arg mcp_url "$MCP_URL" \
37+
--arg auth_header "Bearer $TOKEN_AUTH" \
38+
'{
39+
"mcpServers": {
40+
"matomo": {
41+
"httpUrl": $mcp_url,
42+
"headers": {
43+
"Authorization": $auth_header
44+
},
45+
"trust": true
46+
}
47+
}
48+
}' > "$GEMINI_SETTINGS_FILE"
49+
}
50+
51+
setup_gemini_settings
52+
53+
escape_sed_replacement() {
54+
printf '%s' "$1" | sed -e 's/[\\&|]/\\&/g'
55+
}
56+
57+
render_prompt() {
58+
local input_file="$1"
59+
local output_file="$2"
60+
local escaped_value
61+
cp "$input_file" "$output_file"
62+
63+
while IFS='=' read -r key value; do
64+
escaped_value=$(escape_sed_replacement "$value")
65+
sed -i "s|{{${key}}}|${escaped_value}|g" "$output_file"
66+
done < <(jq -r 'to_entries[] | "\(.key)=\(.value)"' "$STATE_FILE")
67+
}
68+
69+
run_gemini() {
70+
local prompt_file="$1"
71+
local transcript_file="$2"
72+
73+
# shellcheck disable=SC2086
74+
# Intentionally allow word splitting so GEMINI_EXTRA_ARGS can pass multiple CLI flags.
75+
if [ "$CASE_TIMEOUT_SECONDS" -gt 0 ]; then
76+
HOME="$GEMINI_HOME_DIR" \
77+
timeout "${CASE_TIMEOUT_SECONDS}s" "$GEMINI_CLI_CMD" \
78+
--model "$GEMINI_MODEL" \
79+
--prompt "$(cat "$prompt_file")" \
80+
${GEMINI_EXTRA_ARGS:-} \
81+
> "$transcript_file" 2>&1 < /dev/null
82+
else
83+
HOME="$GEMINI_HOME_DIR" \
84+
"$GEMINI_CLI_CMD" \
85+
--model "$GEMINI_MODEL" \
86+
--prompt "$(cat "$prompt_file")" \
87+
${GEMINI_EXTRA_ARGS:-} \
88+
> "$transcript_file" 2>&1 < /dev/null
89+
fi
90+
}
91+
92+
result_count=0
93+
pass_count=0
94+
skip_count=0
95+
96+
while IFS= read -r case_json; do
97+
case_id=$(echo "$case_json" | jq -r '.id')
98+
tool=$(echo "$case_json" | jq -r '.tool')
99+
prompt_file=$(echo "$case_json" | jq -r '.prompt_file')
100+
101+
rendered_prompt="$ARTIFACT_DIR/gemini/transcripts/${case_id}.prompt.txt"
102+
transcript_file="$ARTIFACT_DIR/gemini/transcripts/${case_id}.txt"
103+
log_slice_file="$ARTIFACT_DIR/gemini/logs/${case_id}.log"
104+
result_file="$ARTIFACT_DIR/gemini/results/${case_id}.json"
105+
106+
if jq -e --arg case_id "$case_id" '.skip_cases // [] | index($case_id) != null' "$STATE_FILE" >/dev/null 2>&1; then
107+
jq -n \
108+
--arg stage "gemini_case" \
109+
--arg case_id "$case_id" \
110+
--arg expected_tool "$tool" \
111+
--arg observed_tool "" \
112+
--arg status "skip" \
113+
--arg reason "missing_fixture_dependency" \
114+
--arg transcript_path "gemini/transcripts/${case_id}.txt" \
115+
--arg log_path "gemini/logs/${case_id}.log" \
116+
'{stage:$stage,case_id:$case_id,expected_tool:$expected_tool,observed_tool:$observed_tool,status:$status,reason:$reason,transcript_path:$transcript_path,log_path:$log_path}' \
117+
> "$result_file"
118+
: > "$transcript_file"
119+
: > "$log_slice_file"
120+
result_count=$((result_count + 1))
121+
skip_count=$((skip_count + 1))
122+
continue
123+
fi
124+
125+
render_prompt "$PROMPTS_DIR/$prompt_file" "$rendered_prompt"
126+
{
127+
echo
128+
echo "[mcp-smoke-case:${case_id}]"
129+
cat "$rendered_prompt"
130+
} > "${rendered_prompt}.tmp"
131+
mv "${rendered_prompt}.tmp" "$rendered_prompt"
132+
133+
case_start_marker="MCP_SMOKE_CASE_START:${case_id}"
134+
case_end_marker="MCP_SMOKE_CASE_END:${case_id}"
135+
if [ -f "$MATOMO_LOG_FILE" ]; then
136+
printf '%s\n' "$case_start_marker" >> "$MATOMO_LOG_FILE"
137+
fi
138+
139+
set +e
140+
run_gemini "$rendered_prompt" "$transcript_file"
141+
cmd_exit=$?
142+
set -e
143+
144+
if [ -f "$MATOMO_LOG_FILE" ]; then
145+
printf '%s\n' "$case_end_marker" >> "$MATOMO_LOG_FILE"
146+
awk -v s="$case_start_marker" -v e="$case_end_marker" '
147+
$0 ~ s {in_range=1; next}
148+
$0 ~ e {in_range=0}
149+
in_range
150+
' "$MATOMO_LOG_FILE" > "$log_slice_file"
151+
else
152+
: > "$log_slice_file"
153+
fi
154+
155+
status="fail"
156+
reason=""
157+
if [ "$cmd_exit" -eq 124 ]; then
158+
status="fail"
159+
reason="gemini_timeout"
160+
elif [ "$cmd_exit" -ne 0 ]; then
161+
status="fail"
162+
reason="gemini_command_failed"
163+
elif grep -Eqi "MCP.*Call.*(successful|succeeded|success)" "$log_slice_file" \
164+
&& grep -Fq "$tool" "$log_slice_file"; then
165+
status="pass"
166+
reason="tool success evidence found"
167+
elif grep -Eqi "MCP.*Call" "$log_slice_file"; then
168+
status="fail"
169+
reason="tool success evidence missing"
170+
else
171+
status="fail"
172+
reason="missing_mcp_call_evidence"
173+
fi
174+
175+
jq -n \
176+
--arg stage "gemini_case" \
177+
--arg case_id "$case_id" \
178+
--arg expected_tool "$tool" \
179+
--arg observed_tool "" \
180+
--arg status "$status" \
181+
--arg reason "$reason" \
182+
--arg transcript_path "gemini/transcripts/${case_id}.txt" \
183+
--arg log_path "gemini/logs/${case_id}.log" \
184+
'{stage:$stage,case_id:$case_id,expected_tool:$expected_tool,observed_tool:$observed_tool,status:$status,reason:$reason,transcript_path:$transcript_path,log_path:$log_path}' \
185+
> "$result_file"
186+
187+
result_count=$((result_count + 1))
188+
if [ "$status" = "pass" ]; then
189+
pass_count=$((pass_count + 1))
190+
fi
191+
192+
done < <(jq -c '.[]' "$CASES_FILE")
193+
194+
if find "$ARTIFACT_DIR/gemini/results" -maxdepth 1 -type f -name '*.json' | grep -q .; then
195+
jq -s '.' "$ARTIFACT_DIR"/gemini/results/*.json > "$ARTIFACT_DIR/gemini/results.json"
196+
else
197+
echo '[]' > "$ARTIFACT_DIR/gemini/results.json"
198+
fi
199+
200+
failure_count=$((result_count - pass_count - skip_count))
201+
echo "Gemini smoke: $pass_count passed, $skip_count skipped, $failure_count failed ($result_count total)"
202+
if [ "$failure_count" -ne 0 ]; then
203+
exit 1
204+
fi

0 commit comments

Comments
 (0)