Skip to content

Commit 3a0df75

Browse files
authored
feat: integrated LLM Assistant for PDF reports (#12)
* feat: integrated LLM Assistant for PDF reports - Added agent with OpenAI integration to handle natural language requests - Enhanced PDF generation tool to handle nested data - Added special handling for data from the agent - Fixed several display issues with PDF reports - Extended test suite for better error handling - Improved demo scripts for CLI testing * fix: fix linting issues with flake8 - Fixed missing whitespace around arithmetic operators in app.py and pdf_tool.py - Fixed line break positioning for binary operators in tests and pdf_tool.py - Updated GitHub CI configuration to ignore conflicting rules W503 and W504
1 parent 54f275c commit 3a0df75

10 files changed

Lines changed: 847 additions & 63 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
with:
1515
python-version: "3.12"
1616
- run: pip install flake8
17-
- run: flake8 app.py tools/ tests/ --max-line-length 100 --exclude=".venv/" --ignore=E302,W293,W291,E128,W292,F401,F841,E305
17+
- run: flake8 app.py tools/ tests/ --max-line-length 100 --exclude=".venv/" --ignore=E302,W293,W291,E128,W292,F401,F841,E305,W503,W504
1818

1919
tests:
2020
runs-on: ubuntu-latest

agent/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""
2+
Agent integration module for the MCP Data Assistant.
3+
"""
4+
5+
from agent.assistant import answer
6+
7+
__all__ = ["answer"]

agent/assistant.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from __future__ import annotations
2+
import os
3+
import asyncio
4+
from agents import Agent, Runner
5+
from agents.mcp import MCPServerSse
6+
7+
# Gradio 5.29 SSE endpoint
8+
MCP_SSE_URL = os.getenv(
9+
"MCP_SSE_URL",
10+
"http://127.0.0.1:7860/gradio_api/mcp/sse",
11+
)
12+
13+
# Create MCP server instance but don't connect yet
14+
mcp_server = MCPServerSse(
15+
params={"url": MCP_SSE_URL},
16+
cache_tools_list=True,
17+
)
18+
19+
# Initialize agent
20+
agent = Agent(
21+
name="NeurArk Data Assistant",
22+
instructions=(
23+
"You are a data assistant that can analyze tabular data and create PDFs.\n"
24+
"You can work with SQL databases, CSV files, and generate PDF reports.\n"
25+
"Common workflows include:\n"
26+
"- Query data from database then generate PDF report with results\n"
27+
"- Analyze CSV files and create summary reports\n"
28+
"- Generate custom reports based on user specifications\n"
29+
"You should auto-discover available tools via the MCP server connection.\n\n"
30+
"When working with databases:\n"
31+
"- First, discover what tables are available in the database\n"
32+
"- If the user mentions a table that doesn't exist, look for alternatives\n"
33+
"- Explore the structure of the tables to understand columns\n"
34+
"- Execute appropriate queries based on what you discovered\n\n"
35+
"When generating PDF reports:\n"
36+
"- The 'data_json' parameter should be a JSON string with data to include\n"
37+
"- Always include the generated PDF file path in your response\n"
38+
"- Example format: {\"title\": \"Report Title\", \"data\": \"Your Data\"}\n"
39+
),
40+
model="gpt-4.1-mini",
41+
mcp_servers=[mcp_server],
42+
)
43+
44+
45+
async def _run_agent(prompt: str) -> str:
46+
"""Run the agent asynchronously with proper server connection handling."""
47+
# Connect to MCP server before running the agent
48+
async with mcp_server:
49+
# Execute the agent with the prompt
50+
result = await Runner.run(starting_agent=agent, input=prompt)
51+
return result.final_output # String with PDF path or response
52+
53+
54+
def answer(prompt: str) -> str:
55+
"""Synchronous wrapper for running the agent."""
56+
if not os.getenv("OPENAI_API_KEY"):
57+
return "⚠️ OPENAI_API_KEY not set."
58+
59+
try:
60+
# Run the async function in a synchronous context
61+
return asyncio.run(_run_agent(prompt))
62+
except Exception as e:
63+
import traceback
64+
error_trace = traceback.format_exc()
65+
print(f"Agent error: {str(e)}")
66+
print(f"Error trace: {error_trace}")
67+
return f"Error: {str(e)}\nTrace: {error_trace}"

app.py

Lines changed: 178 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,33 @@
1+
# existing imports
12
import gradio as gr
3+
import json
4+
import pathlib
5+
import threading
6+
import time
7+
import requests
28
from tools.sql_tool import run_sql
39
from tools.csv_tool import summarise_csv
410
from tools.pdf_tool import create_pdf
11+
# assistant
12+
from agent import answer
13+
514

615
def server_status() -> str:
716
"""
817
A dummy function to show the server is alive.
9-
18+
1019
Returns:
1120
str: Status message confirming the server is running
1221
"""
22+
# Function attribute to hide from MCP
23+
server_status._hide_from_mcp = True
1324
return "✅ MCP Data Assistant server is running."
1425

15-
with gr.Blocks() as demo:
26+
27+
with gr.Blocks() as tools_demo:
1628
gr.Markdown("# MCP Data Assistant")
17-
gr.Markdown("This server will expose three tools (SQL query, CSV summary, PDF report).")
18-
29+
gr.Markdown("This server will expose three tools (SQL, CSV summary, PDF report).")
30+
1931
# Register MCP tools
2032
run_sql_interface = gr.Interface(
2133
fn=run_sql,
@@ -26,7 +38,7 @@ def server_status() -> str:
2638
examples=["SELECT 1 AS one"],
2739
api_name="sql"
2840
)
29-
41+
3042
summarise_csv_interface = gr.Interface(
3143
fn=summarise_csv,
3244
inputs=gr.Textbox(label="CSV File Path"),
@@ -36,47 +48,181 @@ def server_status() -> str:
3648
examples=["sample_data/people.csv"],
3749
api_name="csv"
3850
)
39-
51+
52+
# Wrapper around create_pdf to ensure data parameter is properly processed
53+
def create_pdf_wrapper(data_json, out_path=None, include_chart=True):
54+
"""
55+
Generate a professional PDF report from provided data.
56+
57+
Creates a PDF document with the given data formatted as a table.
58+
Optionally includes a bar chart visualization of numeric values.
59+
60+
Args:
61+
data_json: JSON string or object containing the data to include
62+
out_path: Optional custom path for the generated PDF file
63+
include_chart: Whether to include a bar chart visualization
64+
65+
Returns:
66+
Absolute path to the generated PDF file
67+
68+
Raises:
69+
ValueError: If the data dictionary is empty
70+
"""
71+
# Debug log (minimal)
72+
print(f"PDF request received with type: {type(data_json)}")
73+
74+
# Parse JSON string to dict if needed
75+
if isinstance(data_json, str):
76+
try:
77+
data = json.loads(data_json)
78+
except Exception:
79+
# Handle invalid JSON by creating an error dict
80+
data = {
81+
"error": "Invalid JSON",
82+
"raw_input": (data_json[:200] + "..."
83+
if len(data_json) > 200 else data_json)
84+
}
85+
else:
86+
# Use the data directly
87+
data = data_json
88+
89+
try:
90+
# Handle basic data type conversion
91+
if isinstance(data, dict):
92+
# Dictionary - use as is
93+
pass
94+
elif isinstance(data, list):
95+
# Convert list to simple dictionary with indexed keys
96+
items = {"item_" + str(i + 1): item for i, item in enumerate(data)}
97+
data = items
98+
else:
99+
# Unsupported type - create error dict
100+
data = {
101+
"error": "Unsupported data type",
102+
"received_type": str(type(data))
103+
}
104+
105+
# Create the PDF
106+
return create_pdf(data, out_path, include_chart)
107+
108+
except Exception as e:
109+
# If PDF creation fails, create an error report
110+
try:
111+
error_data = {"error": f"Failed to create PDF: {str(e)}"}
112+
return create_pdf(error_data, out_path, include_chart=False)
113+
except Exception:
114+
# Last resort if even the error PDF can't be created
115+
return "Critical error creating PDF"
116+
40117
create_pdf_interface = gr.Interface(
41-
fn=create_pdf,
118+
fn=create_pdf_wrapper,
42119
inputs=[
43-
gr.JSON(label="Report Data"),
44-
gr.Textbox(label="Output Path (optional)",
45-
placeholder="Leave empty for default location"),
120+
gr.Textbox(
121+
label="Report Data (JSON)",
122+
value='{"customer": "ACME", "total": 1000}'
123+
),
124+
gr.Textbox(
125+
label="Output Path (optional)",
126+
placeholder="Leave empty for default location"
127+
),
46128
gr.Checkbox(label="Include Chart", value=True)
47129
],
48130
outputs=gr.Textbox(label="Generated PDF Path"),
49131
title="PDF Report Generator",
50132
description="Create professional PDF reports with data and optional charts",
51133
examples=[
52-
[{"customer": "ACME", "order_id": 12345, "total": 999, "items": 5}, None, True]
134+
['{"customer": "ACME", "total": 999}',
135+
None, True]
53136
],
54137
api_name="pdf"
55138
)
56-
139+
57140
# Add simple UI components
58141
status_btn = gr.Button("Ping server")
59142
status_output = gr.Textbox()
60-
status_btn.click(server_status, outputs=status_output)
143+
# Hide from API and MCP
144+
status_btn.click(server_status, outputs=status_output, api_name=False)
145+
146+
147+
# ---------- Assistant tab ----------
148+
assistant_chat = gr.ChatInterface(
149+
fn=answer,
150+
title="NeurArk Data Assistant",
151+
examples=[
152+
"Show me total sales for 2024 and create a PDF report"
153+
],
154+
api_name=False, # Hide from API and MCP
155+
# Specify message type explicitly to avoid warning
156+
chatbot=gr.Chatbot(type="messages"),
157+
type="messages"
158+
)
159+
160+
161+
# ---------- Tabs UI -----------------
162+
demo = gr.TabbedInterface(
163+
[tools_demo, assistant_chat],
164+
["Tools demo", "Assistant"],
165+
title="NeurArk MCP Data Assistant",
166+
)
167+
168+
169+
# Create a function to save the schema with retry
170+
def save_schema_with_retry(retries=3, delay=0.5):
171+
"""Try to save the schema with retries in case the server isn't ready yet."""
172+
for attempt in range(retries):
173+
try:
174+
pathlib.Path("static").mkdir(exist_ok=True)
175+
# Use a fixed URL
176+
schema_url = "http://127.0.0.1:7860/gradio_api/mcp/schema"
177+
response = requests.get(schema_url, timeout=2) # Short timeout
178+
if response.status_code == 200:
179+
schema = response.json()
180+
181+
# Keep only the tools we want to expose
182+
filtered_schema = {
183+
k: v for k, v in schema.items()
184+
if k in ["sql", "csv", "pdf"]
185+
}
186+
187+
with open("static/schema.json", "w") as f:
188+
f.write(json.dumps(filtered_schema, indent=2))
189+
print("Schema saved to static/schema.json")
190+
191+
# For information, display available tools
192+
tools_list = ', '.join(filtered_schema.keys())
193+
print(f"MCP Tools available: {tools_list}")
194+
return filtered_schema
195+
except Exception as e:
196+
if attempt < retries - 1:
197+
print(
198+
f"Attempt {attempt + 1}/{retries} failed: {e}. "
199+
f"Retrying in {delay}s..."
200+
)
201+
time.sleep(delay)
202+
else:
203+
print(f"Failed to save schema after {retries} attempts: {e}")
204+
return None
205+
206+
207+
# Function to save schema in background
208+
def background_save_schema():
209+
# Wait for server to start
210+
time.sleep(2.0)
211+
# Try to save the schema
212+
save_schema_with_retry(retries=3, delay=1.0)
213+
61214

62215
if __name__ == "__main__":
63-
# Ensure the static folder exists
64-
import os
65-
import pathlib
66-
import json
67-
import requests
68-
216+
# Create and start a thread to save the schema in the background
217+
schema_thread = threading.Thread(target=background_save_schema)
218+
schema_thread.daemon = True # Thread will stop when main program stops
219+
schema_thread.start()
220+
221+
print("Starting MCP server...")
222+
69223
# Enable MCP server for LLM tools access
70-
app = demo.launch(mcp_server=True, share=False, show_error=True)
71-
72-
# After launch, fetch and save the schema
73-
try:
74-
pathlib.Path("static").mkdir(exist_ok=True)
75-
schema_url = f"{app.local_url}gradio_api/mcp/schema"
76-
response = requests.get(schema_url)
77-
if response.status_code == 200:
78-
with open("static/schema.json", "w") as f:
79-
f.write(json.dumps(response.json(), indent=2))
80-
print("Schema saved to static/schema.json")
81-
except Exception as e:
82-
print(f"Failed to save schema: {e}")
224+
# In Gradio 5.29, launch the server in a blocking way (default)
225+
demo.launch(mcp_server=True, share=False, show_error=True)
226+
227+
# This code will never be reached because launch() is blocking
228+
print("Server stopped.")

0 commit comments

Comments
 (0)