From 4060fcbc614c4e355be76093deabec742efee4a3 Mon Sep 17 00:00:00 2001 From: NeurArk Date: Mon, 19 May 2025 18:29:41 +0200 Subject: [PATCH] feat(pdf): add builder class with context cleanup --- tests/test_pdf_tool.py | 61 ++++++++++++++++++----- tests/test_pdf_visuals.py | 18 ++++++- tools/pdf_tool.py | 101 +++++++++++++++++++------------------- 3 files changed, 115 insertions(+), 65 deletions(-) diff --git a/tests/test_pdf_tool.py b/tests/test_pdf_tool.py index bd8cfd1..091c2ae 100644 --- a/tests/test_pdf_tool.py +++ b/tests/test_pdf_tool.py @@ -2,12 +2,15 @@ import json import pytest import traceback -from tools.pdf_tool import create_pdf, _build_table +from tools.pdf_tool import PdfReportBuilder, create_pdf, _build_table def test_pdf_creation_size(tmp_path): sample = {"foo": "bar", "grand_total": 999} - file_path = create_pdf(sample, out_path=tmp_path / "test.pdf") + with PdfReportBuilder(tmp_path / "test.pdf") as builder: + builder.add_cover("Report") + builder.add_section({"title": "Data", "type": "table", "data": sample}) + file_path = builder.save() assert Path(file_path).exists() assert Path(file_path).stat().st_size > 1000 @@ -41,7 +44,10 @@ def test_pdf_with_different_data_types(tmp_path): "none_value": None, "grand_total": 999, } - file_path = create_pdf(sample, out_path=tmp_path / "datatypes.pdf") + with PdfReportBuilder(tmp_path / "datatypes.pdf") as builder: + builder.add_cover("Types") + builder.add_section({"title": "Data", "type": "table", "data": sample}) + file_path = builder.save() assert Path(file_path).exists() except Exception as e: print(f"PDF creation with mixed data types failed: {str(e)}") @@ -65,7 +71,10 @@ def test_pdf_from_json_string(tmp_path): """ # Parse JSON to dict data = json.loads(json_str) - file_path = create_pdf(data, out_path=tmp_path / "from_json.pdf") + with PdfReportBuilder(tmp_path / "from_json.pdf") as builder: + builder.add_cover(data.get("title", "Report")) + builder.add_section({"title": "Data", "type": "table", "data": data}) + file_path = builder.save() assert Path(file_path).exists() except Exception as e: print(f"PDF creation from JSON string failed: {str(e)}") @@ -94,7 +103,10 @@ def test_pdf_with_sql_like_data(tmp_path): data[f"sales_{i}"] = item["sales"] # Add grand total data["grand_total"] = data["total_sales"] - file_path = create_pdf(data, out_path=tmp_path / "sql_results.pdf") + with PdfReportBuilder(tmp_path / "sql_results.pdf") as builder: + builder.add_cover(data["title"]) + builder.add_section({"title": "Data", "type": "table", "data": data}) + file_path = builder.save() assert Path(file_path).exists() except Exception as e: print(f"SQL results PDF creation failed: {str(e)}") @@ -105,9 +117,10 @@ def test_pdf_with_sql_like_data(tmp_path): def test_pdf_without_chart(tmp_path): """Test PDF creation with chart disabled.""" sample = {"customer": "No Chart Test", "value": 500, "grand_total": 500} - file_path = create_pdf( - sample, out_path=tmp_path / "no_chart.pdf", include_chart=False - ) + with PdfReportBuilder(tmp_path / "no_chart.pdf") as builder: + builder.add_cover("No Chart Test") + builder.add_section({"title": "Data", "type": "table", "data": sample}) + file_path = builder.save() assert Path(file_path).exists() @@ -125,7 +138,10 @@ def test_pdf_with_edge_cases(tmp_path): "value_3": 300, "grand_total": 600, } - file_path = create_pdf(data, out_path=tmp_path / "edge_case.pdf") + with PdfReportBuilder(tmp_path / "edge_case.pdf") as builder: + builder.add_cover(data["title"]) + builder.add_section({"title": "Data", "type": "table", "data": data}) + file_path = builder.save() assert Path(file_path).exists() @@ -229,7 +245,10 @@ def test_empty_value_handling(tmp_path): "zero_value": 0, "grand_total": 1000, } - output_path = create_pdf(data, out_path=tmp_path / "empty_values.pdf") + with PdfReportBuilder(tmp_path / "empty_values.pdf") as builder: + builder.add_cover(data["title"]) + builder.add_section({"title": "Data", "type": "table", "data": data}) + output_path = builder.save() assert Path(output_path).exists() @@ -245,7 +264,15 @@ def test_pdf_with_cover_and_summary(tmp_path): "cover": {"logo_path": "assets/logo.png"}, "sections": [{"title": "Intro", "type": "paragraph", "text": "Hello"}], } - pdf_path = Path(create_pdf(data, out_path=tmp_path / "cover.pdf")) + with PdfReportBuilder(tmp_path / "cover.pdf") as builder: + builder.add_cover( + data["title"], + data["cover"].get("logo_path"), + data["summary"], + ) + for sec in data["sections"]: + builder.add_section(sec) + pdf_path = Path(builder.save()) assert pdf_path.exists() from PyPDF2 import PdfReader @@ -273,7 +300,11 @@ def test_multiple_chart_specs(tmp_path): } ], } - pdf_path = Path(create_pdf(data, out_path=tmp_path / "multi.pdf")) + with PdfReportBuilder(tmp_path / "multi.pdf") as builder: + builder.add_cover(data["title"]) + for sec in data["sections"]: + builder.add_section(sec) + pdf_path = Path(builder.save()) assert pdf_path.exists() assert _count_images(pdf_path) >= 3 @@ -329,6 +360,10 @@ def test_pdf_with_sections_and_charts(tmp_path): ], } - pdf_path = Path(create_pdf(data, out_path=tmp_path / "complex.pdf")) + with PdfReportBuilder(tmp_path / "complex.pdf") as builder: + builder.add_cover(data["title"]) + for sec in data["sections"]: + builder.add_section(sec) + pdf_path = Path(builder.save()) assert pdf_path.exists() assert _count_images(pdf_path) >= 4 diff --git a/tests/test_pdf_visuals.py b/tests/test_pdf_visuals.py index 39f9327..143075a 100644 --- a/tests/test_pdf_visuals.py +++ b/tests/test_pdf_visuals.py @@ -1,5 +1,5 @@ from pathlib import Path -from tools.pdf_tool import create_pdf +from tools.pdf_tool import PdfReportBuilder def _count_image_references(pdf_path: Path) -> int: @@ -11,6 +11,20 @@ def _count_image_references(pdf_path: Path) -> int: def test_pdf_with_logo_and_chart(tmp_path): data = {"a": 1, "b": 2, "c": 3, "grand_total": 6} - pdf_path = Path(create_pdf(data, out_path=tmp_path / "visual.pdf")) + with PdfReportBuilder(tmp_path / "visual.pdf") as builder: + builder.add_cover("Visual") + builder.add_section({"title": "Data", "type": "table", "data": data}) + builder.add_section( + { + "title": "Chart", + "type": "chart", + "chart_spec": { + "chart_type": "bar", + "labels": list(data.keys()), + "values": list(data.values()), + }, + } + ) + pdf_path = Path(builder.save()) assert pdf_path.exists() and pdf_path.stat().st_size > 8000 assert _count_image_references(pdf_path) >= 2 # logo + chart diff --git a/tools/pdf_tool.py b/tools/pdf_tool.py index 786567b..4ce2b10 100644 --- a/tools/pdf_tool.py +++ b/tools/pdf_tool.py @@ -6,9 +6,8 @@ import datetime as _dt from pathlib import Path -from typing import Dict, List, Any +from typing import Dict, List, Any, cast import tempfile -import os from reportlab.lib import colors from reportlab.lib.pagesizes import A4 @@ -35,9 +34,9 @@ class PdfReportBuilder: def __init__(self, out_path: str | Path): self.out_path = Path(out_path) self.story: List[Flowable] = [] - self.tmp_pngs: List[str] = [] self.styles = getSampleStyleSheet() self.doc = SimpleDocTemplate(str(self.out_path), pagesize=A4) + self.tmp_dir = tempfile.TemporaryDirectory() def add_cover( self, @@ -93,11 +92,10 @@ def add_section(self, section: Dict[str, Any]) -> None: spec = section.get("chart_spec", {}) specs = spec if isinstance(spec, list) else [spec] for cs in specs: - png = create_chart(cs) + png = create_chart(cs, self.tmp_dir.name) width = cs.get("width", 400) height = cs.get("height", 250) self.story.append(Image(png, width=width, height=height)) - self.tmp_pngs.append(png) else: self.story.append( Paragraph("Unsupported section type", self.styles["Italic"]) @@ -107,18 +105,14 @@ def add_section(self, section: Dict[str, Any]) -> None: def save(self) -> str: """Finalize the PDF and clean up temporary files.""" self.doc.build(self.story) - for png in self.tmp_pngs: - if os.path.exists(png): - os.unlink(png) + self.tmp_dir.cleanup() return str(self.out_path.resolve()) def __enter__(self) -> "PdfReportBuilder": return self def __exit__(self, exc_type, exc, tb) -> None: - for png in self.tmp_pngs: - if os.path.exists(png): - os.unlink(png) + self.tmp_dir.cleanup() def _build_table(data: Dict[str, object]) -> Table: @@ -226,7 +220,7 @@ def _build_table_from_list(data: List[Any]) -> Table: return Table(rows, style=TableStyle(styles)) -def create_chart(chart_spec: Dict[str, Any]) -> str: +def create_chart(chart_spec: Dict[str, Any], tmp_dir: str | None = None) -> str: """Generate a chart image from a specification and return PNG path.""" chart_type = chart_spec.get("chart_type", "bar") labels = chart_spec.get("labels", []) @@ -246,7 +240,7 @@ def create_chart(chart_spec: Dict[str, Any]) -> str: raise ValueError(f"Unsupported chart type: {chart_type}") fig.tight_layout() - tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png", dir=tmp_dir) fig.savefig(tmp.name, bbox_inches="tight") _plt.close(fig) return tmp.name @@ -283,48 +277,55 @@ def create_pdf( timestamp = _dt.datetime.now().strftime("%Y%m%d-%H%M%S") if out_path is None: - out_path = REPORT_DIR / f"report-{timestamp}.pdf" + target = REPORT_DIR / f"report-{timestamp}.pdf" else: - out_path = Path(out_path) - out_path.parent.mkdir(parents=True, exist_ok=True) + target = Path(out_path) + target.parent.mkdir(parents=True, exist_ok=True) logo_default = Path(__file__).resolve().parent.parent / "assets" / "logo.png" - builder = PdfReportBuilder(out_path) - - has_sections = isinstance(data, dict) and "sections" in data - - if has_sections: - cover = data.get("cover", {}) - builder.add_cover( - data.get("title", "Data Assistant Report"), - cover.get( - "logo_path", str(logo_default) if logo_default.exists() else None - ), - data.get("summary"), - ) - for ins in data.get("insights", []): - builder.add_section({"title": "", "type": "paragraph", "text": ins}) - for section in data.get("sections", []): - builder.add_section(section) - else: - builder.add_cover( - "Data Assistant Report", - str(logo_default) if logo_default.exists() else None, - ) - builder.add_section({"title": "Data", "type": "table", "data": data}) - if include_chart: - numeric_items = { - k: v for k, v in data.items() if isinstance(v, (int, float)) - } - if len(numeric_items) >= 3: - labels, values = zip(*numeric_items.items()) - chart_spec = {"chart_type": "bar", "labels": labels, "values": values} - builder.add_section( - {"title": "Chart", "type": "chart", "chart_spec": chart_spec} - ) + with PdfReportBuilder(target) as builder: + has_sections = isinstance(data, dict) and "sections" in data + + if has_sections: + cover: Dict[str, Any] = cast(Dict[str, Any], data.get("cover", {})) + builder.add_cover( + cast(str, data.get("title", "Data Assistant Report")), + cast( + str | None, + cover.get( + "logo_path", + str(logo_default) if logo_default.exists() else None, + ), + ), + cast(str | None, data.get("summary")), + ) + for ins in cast(List[str], data.get("insights", [])): + builder.add_section({"title": "", "type": "paragraph", "text": ins}) + for section in cast(List[Dict[str, Any]], data.get("sections", [])): + builder.add_section(section) + else: + builder.add_cover( + "Data Assistant Report", + str(logo_default) if logo_default.exists() else None, + ) + builder.add_section({"title": "Data", "type": "table", "data": data}) + if include_chart: + numeric_items = { + k: v for k, v in data.items() if isinstance(v, (int, float)) + } + if len(numeric_items) >= 3: + labels, values = zip(*numeric_items.items()) + chart_spec = { + "chart_type": "bar", + "labels": labels, + "values": values, + } + builder.add_section( + {"title": "Chart", "type": "chart", "chart_spec": chart_spec} + ) - return builder.save() + return builder.save() if __name__ == "__main__":