Skip to content

Commit f9dc2bf

Browse files
authored
feat(pdf): add cover page and builder class (#19)
1 parent a47b492 commit f9dc2bf

6 files changed

Lines changed: 259 additions & 121 deletions

File tree

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,27 @@ docker run -p 7860:7860 neurark/mcp-data-assistant
4040

4141
**Demo** — open [`sample_docs/report-demo.pdf`](sample_docs/report-demo.pdf)
4242

43+
### Advanced PDF reports
44+
The PDF tool now supports a cover page with optional logo and summary box as
45+
well as styled charts. A minimal example:
46+
47+
```json
48+
{
49+
"title": "Sales Overview",
50+
"summary": "Highlights for 2024",
51+
"cover": {"logo_path": "assets/logo.png"},
52+
"sections": [
53+
{"title": "Intro", "type": "paragraph", "text": "Quarterly results"},
54+
{
55+
"title": "Revenue",
56+
"type": "chart",
57+
"chart_spec": {"chart_type": "bar", "labels": ["Q1", "Q2"], "values": [1, 2], "color": "#ff8800"}
58+
}
59+
]
60+
}
61+
```
62+
Run `create_pdf` with this JSON to produce a multi-page report with a styled chart.
63+
4364
## How it works
4465
The app launches Gradio with `mcp_server=True`.
4566
The LLM discovers three tools via the MCP schema and chains them as

sample_docs/report-demo-v2.pdf

118 KB
Binary file not shown.

static/pdf_schema.json

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
},
2020
"additionalProperties": false
2121
},
22+
"summary": {
23+
"type": "string",
24+
"description": "Short executive summary displayed on the cover page"
25+
},
2226
"insights": {
2327
"type": "array",
2428
"description": "List of insight paragraphs to highlight on the first page",
@@ -35,14 +39,13 @@
3539
"text": {"type": "string"},
3640
"data": {"type": ["array", "object"]},
3741
"chart_spec": {
38-
"type": "object",
39-
"properties": {
40-
"chart_type": {"type": "string", "enum": ["bar", "pie", "line"]},
41-
"labels": {"type": "array", "items": {"type": "string"}},
42-
"values": {"type": "array", "items": {"type": "number"}}
43-
},
44-
"required": ["chart_type", "labels", "values"],
45-
"additionalProperties": false
42+
"oneOf": [
43+
{"$ref": "#/definitions/chartSpec"},
44+
{
45+
"type": "array",
46+
"items": {"$ref": "#/definitions/chartSpec"}
47+
}
48+
]
4649
}
4750
},
4851
"required": ["title", "type"],
@@ -51,5 +54,20 @@
5154
}
5255
},
5356
"required": ["title", "sections"],
54-
"additionalProperties": false
57+
"additionalProperties": false,
58+
"definitions": {
59+
"chartSpec": {
60+
"type": "object",
61+
"properties": {
62+
"chart_type": {"type": "string", "enum": ["bar", "pie", "line"]},
63+
"labels": {"type": "array", "items": {"type": ["string", "number"]}},
64+
"values": {"type": "array", "items": {"type": "number"}},
65+
"color": {"type": "string", "description": "Hex color"},
66+
"width": {"type": "number", "description": "Figure width in inches"},
67+
"height": {"type": "number", "description": "Figure height in inches"}
68+
},
69+
"required": ["chart_type", "labels", "values"],
70+
"additionalProperties": false
71+
}
72+
}
5573
}

tests/test_ollama_integration.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from agent.ollama_integration import (
55
check_ollama_available,
66
get_ollama_model_name,
7-
create_ollama_model
7+
create_ollama_model,
88
)
99
from agent import answer
1010
import httpx
1111

1212
# Suppress the coroutine warning for tests
13-
warnings.filterwarnings("ignore",
14-
message="coroutine '.*' was never awaited",
15-
category=RuntimeWarning)
13+
warnings.filterwarnings(
14+
"ignore", message="coroutine '.*' was never awaited", category=RuntimeWarning
15+
)
1616

1717

1818
def test_check_ollama_available():
@@ -58,7 +58,7 @@ def test_ollama_provider():
5858
mock_result.final_output = "The answer to 2+2 is 4"
5959

6060
# Patch the asyncio run function to avoid actual API calls
61-
with patch('agent.assistant.asyncio.run', return_value=mock_result):
61+
with patch("agent.assistant.asyncio.run", return_value=mock_result):
6262
# This should now run without errors since the API call is mocked
6363
response, _ = answer("What is 2+2?", provider="ollama")
6464

@@ -97,6 +97,7 @@ def test_ollama_provider():
9797

9898
except Exception as e:
9999
import traceback
100+
100101
print(f"❌ Error testing Ollama provider: {str(e)}")
101102
print(traceback.format_exc())
102103
raise
@@ -113,6 +114,7 @@ def test_ollama_tool_knowledge():
113114
# Create a session to maintain context
114115
from agent.session_manager import session_manager
115116
from agents.mcp import MCPServerSse
117+
116118
session_id = session_manager.create_session()
117119
print(f"\nCreated test session: {session_id}")
118120

@@ -188,32 +190,45 @@ async def connect_mcp_server():
188190
found_terms = [term for term in expected_terms if term in response.lower()]
189191

190192
print(f"Found terms: {found_terms}")
191-
assert found_terms, f"Expected tool terms ({', '.join(expected_terms)}) not found in response"
193+
assert found_terms, (
194+
f"Expected tool terms ({', '.join(expected_terms)}) not found in response"
195+
)
192196

193197
# Check for specific error messages
194-
assert "[] is too short - 'messages'" not in response, "Error: Empty messages array sent to Ollama API"
198+
assert "[] is too short - 'messages'" not in response, (
199+
"Error: Empty messages array sent to Ollama API"
200+
)
195201

196202
# Step 3: Test conversation history and follow-up
197203
print("\n3️⃣ Testing follow-up question...")
198204
followup_query = "Can you list the tools again and explain what each one does?"
199-
followup_response, _ = answer(followup_query, provider="ollama", session_id=session_id)
205+
followup_response, _ = answer(
206+
followup_query, provider="ollama", session_id=session_id
207+
)
200208
print(f"Follow-up response: {followup_response[:500]}...")
201209

202210
# Verify the response has relevant terms
203211
followup_terms = ["csv", "sql", "pdf", "database", "file"]
204-
found_followup_terms = [term for term in followup_terms if term in followup_response.lower()]
205-
assert found_followup_terms, f"Follow-up response doesn't contain expected terms"
212+
found_followup_terms = [
213+
term for term in followup_terms if term in followup_response.lower()
214+
]
215+
assert found_followup_terms, "Follow-up response doesn't contain expected terms"
206216

207217
# Final check: Conversation history maintained
208218
history = session_manager.get_messages(session_id)
209219
print(f"\n✅ Session maintained context through {len(history)} messages")
210-
assert len(history) >= 4, "Expected at least 4 messages in conversation history (2 queries + 2 responses)"
220+
assert len(history) >= 4, (
221+
"Expected at least 4 messages in conversation history (2 queries + 2 responses)"
222+
)
211223

212224
print("\n✅ Direct Ollama integration test PASSED.")
213-
print(f" Found terms in responses: {', '.join(found_terms + found_followup_terms)}")
225+
print(
226+
f" Found terms in responses: {', '.join(found_terms + found_followup_terms)}"
227+
)
214228

215229
except Exception as e:
216230
import traceback
231+
217232
print(f"\n❌ Direct Ollama integration test FAILED: {str(e)}")
218233
print(traceback.format_exc())
219234
raise
@@ -258,13 +273,13 @@ def test_provider_fallback():
258273
# Test fallback if Ollama is unavailable
259274
if not check_ollama_available():
260275
response, result = answer("Test", provider="ollama")
261-
assert (
262-
"⚠️ Ollama not available" in response
263-
), "Expected unavailable message for Ollama provider"
276+
assert "⚠️ Ollama not available" in response, (
277+
"Expected unavailable message for Ollama provider"
278+
)
264279

265280
# Test fallback if OpenAI key is not set
266281
if os.getenv("OPENAI_API_KEY") is None:
267282
response, result = answer("Test", provider="openai")
268-
assert (
269-
"⚠️ OPENAI_API_KEY not set" in response
270-
), "Expected API key message for OpenAI provider"
283+
assert "⚠️ OPENAI_API_KEY not set" in response, (
284+
"Expected API key message for OpenAI provider"
285+
)

tests/test_pdf_tool.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def test_pdf_with_edge_cases(tmp_path):
115115
"""Test PDF creation with edge cases."""
116116
# Long text
117117
long_text = (
118-
"This is a very long text that should be wrapped properly " "in the PDF table "
118+
"This is a very long text that should be wrapped properly in the PDF table "
119119
) * 5
120120
data = {
121121
"title": "Edge Case Test",
@@ -238,6 +238,56 @@ def _count_images(pdf_path: Path) -> int:
238238
return f.read().count(b"/Subtype /Image")
239239

240240

241+
def test_pdf_with_cover_and_summary(tmp_path):
242+
data = {
243+
"title": "Cover Report",
244+
"summary": "Quick overview",
245+
"cover": {"logo_path": "assets/logo.png"},
246+
"sections": [{"title": "Intro", "type": "paragraph", "text": "Hello"}],
247+
}
248+
pdf_path = Path(create_pdf(data, out_path=tmp_path / "cover.pdf"))
249+
assert pdf_path.exists()
250+
from PyPDF2 import PdfReader
251+
252+
reader = PdfReader(str(pdf_path))
253+
assert len(reader.pages) >= 2
254+
assert "Quick overview" in reader.pages[0].extract_text()
255+
256+
257+
def test_multiple_chart_specs(tmp_path):
258+
data = {
259+
"title": "Charts",
260+
"sections": [
261+
{
262+
"title": "Multi",
263+
"type": "chart",
264+
"chart_spec": [
265+
{
266+
"chart_type": "bar",
267+
"labels": ["A", "B"],
268+
"values": [1, 2],
269+
"color": "#ff0000",
270+
},
271+
{"chart_type": "line", "labels": [1, 2], "values": [3, 4]},
272+
],
273+
}
274+
],
275+
}
276+
pdf_path = Path(create_pdf(data, out_path=tmp_path / "multi.pdf"))
277+
assert pdf_path.exists()
278+
assert _count_images(pdf_path) >= 3
279+
280+
281+
def test_builder_class(tmp_path):
282+
from tools.pdf_tool import PdfReportBuilder
283+
284+
with PdfReportBuilder(tmp_path / "builder.pdf") as builder:
285+
builder.add_cover("Title")
286+
builder.add_section({"title": "P", "type": "paragraph", "text": "Hi"})
287+
path = builder.save()
288+
assert Path(path).exists()
289+
290+
241291
def test_pdf_with_sections_and_charts(tmp_path):
242292
"""Generate a PDF using the new schema with multiple chart types."""
243293
data = {

0 commit comments

Comments
 (0)