diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ea0afc..bd354f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,7 +93,7 @@ jobs: - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 7027807..00e5b49 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -103,3 +103,46 @@ jobs: generate_release_notes: true draft: false prerelease: ${{ contains(github.ref, 'rc') || contains(github.ref, 'beta') || contains(github.ref, 'alpha') }} + + # ── Job 4: Update Version Metadata ──────────────────────────── + # After a release is published, update CITATION.cff and README.md + # on master so they always reflect the latest released version. + # Uses GITHUB_TOKEN which intentionally does not trigger other + # workflows, preventing infinite loops. + update-metadata: + name: Update Version Metadata + needs: github-release + runs-on: ubuntu-latest + if: ${{ !contains(github.ref, 'rc') && !contains(github.ref, 'beta') && !contains(github.ref, 'alpha') }} + + steps: + - name: Checkout master + uses: actions/checkout@v4 + with: + ref: master + + - name: Extract version and date + id: meta + run: | + VERSION="${GITHUB_REF#refs/tags/v}" + DATE="$(date -u +%Y-%m-%d)" + echo "version=${VERSION}" >> "${GITHUB_OUTPUT}" + echo "date=${DATE}" >> "${GITHUB_OUTPUT}" + + - name: Update CITATION.cff + run: | + sed -i 's/^version: ".*"/version: "${{ steps.meta.outputs.version }}"/' CITATION.cff + sed -i 's/^date-released: ".*"/date-released: "${{ steps.meta.outputs.date }}"/' CITATION.cff + + - name: Update README.md bibtex version + run: | + sed -i 's/version={[^}]*}/version={${{ steps.meta.outputs.version }}}/' README.md + + - name: Commit and push if changed + run: | + git diff --quiet && exit 0 + git config user.name "Ashutosh Mishra" + git config user.email "ashutoshm1771@gmail.com" + git add CITATION.cff README.md + git commit -m "chore: update version metadata to ${{ steps.meta.outputs.version }}" + git push origin master diff --git a/CITATION.cff b/CITATION.cff index cad2a1a..578fbe3 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,15 +2,16 @@ cff-version: 1.2.0 title: "Quantum Encoding Atlas" message: "If you use this software, please cite it as below." type: software +doi: "10.5281/zenodo.18780936" authors: - family-names: "Mishra" given-names: "Ashutosh" email: "ashutoshm1771@gmail.com" repository-code: "https://github.com/encoding-atlas/quantum-encoding-atlas" -url: "https://q-encoding-atlas.web.app/documentation" +url: "https://encoding-atlas.github.io/quantum-encoding-atlas" license: MIT -version: "0.1.0" -date-released: "2026-01-24" +version: "0.4.0" +date-released: "2026-02-26" keywords: - quantum computing - quantum machine learning @@ -21,6 +22,6 @@ keywords: - cirq abstract: >- A comprehensive Python library for quantum data encodings - in machine learning, featuring 15+ encoding methods, + in machine learning, featuring 16 encoding methods, multi-framework support (PennyLane, Qiskit, Cirq), analysis tools, and practical guidance for encoding selection. diff --git a/README.md b/README.md index fbc44af..be3606f 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ [![CI](https://img.shields.io/github/actions/workflow/status/encoding-atlas/quantum-encoding-atlas/ci.yml?branch=master&logo=github&label=CI)](https://github.com/encoding-atlas/quantum-encoding-atlas/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/encoding-atlas/quantum-encoding-atlas/branch/master/graph/badge.svg)](https://codecov.io/gh/encoding-atlas/quantum-encoding-atlas) [![Documentation](https://img.shields.io/badge/docs-online-blue.svg)](https://encoding-atlas.github.io/quantum-encoding-atlas/) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18780936.svg)](https://doi.org/10.5281/zenodo.18780936) [![Website](https://img.shields.io/badge/Website-live-brightgreen?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAHFUlEQVR42sWX309UVxDHzwMvhTdeVEB+gyoCqsgqP0VERWRhkd8sgg9S66tNbKKNNjaxVhNr/wVl0RqjiU3UJ0iMtdFoY2MTAVVBZX1eLFn1Yfqd2z2ey9y9uCqrk3zynTMz58wc9sdd1HxY6sKFiaAJnEhZsCAAHYGOQV9FGMN6BBrgmkhtovqStjglJRlD7EpbtOgSmElLSSEoaWWQZ3Vjhveivh8kf87Bk8D3YDo9NZU0WNt9jY5J7HnWEPQgSFLxsoy0tATwNQhmLF5MGqwNZi18U5sO3PaAKTDAvdR8WlZ6ehq4nZmeTlCLTDTWa6H2GnutxuRdzuFeIHV+hs/I8GRnZAQBvSMzU6tB5LJAjoyLmK5zOSMIPOpTLCcry5+blRWGEgOf4SGgOiZyGtRYdWKfrhUxjazh3n71MZabne3Py84mJj8nh9UJ4rnGd+RztRrfici59PmwS+Tn5nowdBhKS2zkxwjXYr+Mc0zmzLmij9gfBrG9nZbm5aWB4JK8PGKWMvn5Bo6bnF5zDmp8YGrkPnGuPEPHxQxT0Lk/2MuWLEkAtwG5sVzG0Eiu13s8NBQI0KNHjywCZ86QZ+1azukzpG9w78v5W8D9K7Zg2bKBgqVLabmgwGCtUafjDi1fv55eBoMk7fnz53wJud8JchaiP1Tv3a2i2Yrly5NAENA7cBCrROYLbevfzp0jNztz+jTX6T3CF7j3nOJZlbSiFSsOFhYUENQCPiN9vXbViadPyc3Gx8bIpYeMy56y5oCyW3FhYTKYLkZyZVERwZ8N4jLmVjsxMeF+gfHxWM61qxsh5M0PwFVFRbtWFRcT4KEY45s1Y3wGvkbXnT9/ntwsMDio66KdZXKin0bU9Sttq1euvARoPqiprqaXL1+StBcvXlB1ZaWu42E+tddFxVayenXimlWrZqAEtWDfYOLMutJS1jlra2tqrFfiyZMn1tfoubNn+WL2Gq12rG8pnZPzSAUzIFGtXbOmCZCGk6xusccYKBQK0ZPHj+mPGzfol5MnqbWlJfqekhLXs9paW+nXU6foz5s3rYtOT0/TgwcPRL/34lWlJSUnANnhv4RWxh7jwaMZD/HD4cPWK6Rr9b61EZ8fcD8eOeL6QR8dHXWdgYky53HlKS0NWE0B9L3woHPZU+S/3bfPse+7/ftpcnKSpMkLiFmMHz02qPBXGSlbt47WR8Da7suYuIC78fu+qqKCqvDBvXDhAsViY3hOlEX6WSpAXKNjwwqP/jFATJmNcgEOZI35Amx/37tH/9y/z27MF9C9jZre5WVlOq5zo6qirOxVRXk5QbnAUmstfSjDb5F4GS7gnEGsBdMKL/GryshLXWVT7Vs5oPUpfirEy/inRqXoy8jZKhlcCDqtqquqxqqR2FBVRazVRiUcj+8Fxsd5DjOD9J2Mqg3V1SP8kKnZsIHgA/gGs0YeGv8L6J7op9XMAZ3NsNpYUxMA9A5sqBVru07E9wKmr5wh+kyDqnbjxhOANtXWEquE44z2+SEUL3v48KHuY1eDM3Zc1W3a1FSHIANfw4WsknhfQM8RK161ua4uEcwAcmPL5s3aj/sF0Mutt+Rf8JViQ9GlrVu2kAZr4RudjOMFHuEC6BErF5W2+q1bdwGqr68nVok99/OxYzQ0NERXr16lv+7epdevX9PHWjgcpjt37lhnBQIB+uno0Vk9t9nngQ84pmcx/9AgmNywbVsIEIM1q4TjjrWvuZkOHzpE165dozdv3tD7jC985coVOoQ92OvswzQ0yJgkBJKV3bY3NBwExDRu3y7V+ELt9Pf1WcO9ffuWpHHs98uXaefOnXKfOcv0kufL+gNKmrexMQlMAdI0sgqwmXVO9u7dS8+ePSNt/PD7Zs8eeQ4jesXEFEhS0azJ6x1obmqipgjsM4hHx9RG1qa+o6ODrl+/TsPDw9Te3m4/S57piHtBRM2ZZv9u5WbNzc0J4Da/N6EENfh8llqDsIoaWafP4Hp5nn2NvDNn8pJbPKOay1p8vtSWlpYgIAZr2hFRhmPW2mDyZo9zDfWxGnifY49P1PpM/ymQqmKxHTt2eEAYENMKovoCkZdrZ4wvL8+IXs+zlKoPsdbWVn9bWxtpsP7fh+q19pl2Wy7KvrmRe8XZwK8+xvDh84NwBz6E7S50COWL8AfYwlnLebsPdT8XZ4Th96hPsc7OTk9nR0cQag3VCeAz2jfw0DomlTFnSGSca6egpWo+rLurK7Wrq+s2IDvdrBJZw9jX3d3u+0ztLZCq5tPQOAEM9PT0BAH1YBCt2rchYnPnus0ZU9Dd0AQVL/P7/Un+np6D0BAgpre3l9XgjL0vHwIHQJL6XLaztzcZv2v6wUX4MzwQlFgRszM7pn3sieztgyarL2n9/f2JfX19Xuhx6CAYBqNgmmGfY5Ec13hB4nz0/g83250Vq34algAAAABJRU5ErkJggg==)](https://q-encoding-atlas.web.app) [Documentation](https://encoding-atlas.github.io/quantum-encoding-atlas/) | @@ -120,8 +121,9 @@ If you use this library in your research, please cite: title={Quantum Encoding Atlas: A Comprehensive Library for Quantum Data Encodings}, author={Mishra, Ashutosh}, year={2026}, - url={https://github.com/encoding-atlas/quantum-encoding-atlas}, - version={0.1.0} + doi={10.5281/zenodo.18780936}, + url={https://doi.org/10.5281/zenodo.18780936}, + version={0.4.0} } ``` diff --git a/experiments/config.py b/experiments/config.py index 9b26ddd..d553aed 100644 --- a/experiments/config.py +++ b/experiments/config.py @@ -70,6 +70,7 @@ "vqc", "kernel", "tradeoff", + "report", }) VALID_BACKENDS = frozenset({"pennylane", "qiskit", "cirq"}) @@ -88,6 +89,7 @@ "vqc": 6, # Stage 6a → seed 6042 "kernel": 7, # Stage 6b → seed 7042 "tradeoff": 8, # Stage 7 → seed 8042 + "report": 9, # Stage 8 → seed 9042 } # Quick-mode overrides reduce sample counts for fast validation. diff --git a/experiments/configs/stage8_report.json b/experiments/configs/stage8_report.json new file mode 100644 index 0000000..35b5817 --- /dev/null +++ b/experiments/configs/stage8_report.json @@ -0,0 +1,29 @@ +{ + "stage": "report", + "seed": 42, + "backend": "pennylane", + "encodings": [ + { + "name": "__report__", + "params": {} + } + ], + "analysis_params": { + "stage_dirs": { + "resources": "experiments/results/raw/stage1_resources", + "simulability": "experiments/results/raw/stage2_simulability", + "expressibility": "experiments/results/raw/stage3_expressibility", + "entanglement": "experiments/results/raw/stage4_entanglement", + "trainability": "experiments/results/raw/stage5_trainability", + "noise": "experiments/results/raw/stage5b_noise", + "vqc": "experiments/results/raw/stage6a_vqc", + "kernel": "experiments/results/raw/stage6b_kernel" + }, + "tradeoff_dir": "experiments/results/raw/stage7_tradeoff", + "sensitivity_dir": "experiments/results/raw/stage6a5_sensitivity", + "figure_dir": "experiments/results/figures", + "table_dir": "experiments/results/tables", + "generate_tables": true + }, + "output_dir": "experiments/results/report" +} diff --git a/experiments/report.py b/experiments/report.py new file mode 100644 index 0000000..7ee7b96 --- /dev/null +++ b/experiments/report.py @@ -0,0 +1,1120 @@ +"""Stage 8 — Report generation. + +Compiles all experiment results (Stages 1-7) into publication-ready +outputs: a master summary JSON, Markdown/LaTeX tables, and narrative +hypothesis verdict and ranking documents. + +This module is the final stage of the experiment pipeline. It reads +only from previously generated results and produces no new simulations. + +Outputs +------- +- ``summary.json`` — Master JSON combining all metrics for all encodings + across all stages, with ``schema_version``. +- ``tables/`` — Markdown and LaTeX tables for every stage, plus + VQC/kernel accuracy matrices, sensitivity grid, and consolidated + cross-stage comparison. +- ``ranking.md`` — Final encoding ranking narrative with Pareto front + explanation and practical guidance. +- ``hypotheses.md`` — H1-H7 verdict table with full supporting evidence, + test statistics, and interpretation. + +Usage +----- +As a CLI tool:: + + python -m experiments.run_stage --config experiments/configs/stage8_report.json + python -m experiments.run_stage --config experiments/configs/stage8_report.json --quick + +Programmatically:: + + from experiments.report import generate_report + result = generate_report( + stage_dirs={...}, + tradeoff_dir="experiments/results/raw/stage7_tradeoff", + output_dir="experiments/results/report", + ) + +Dependencies +------------ +This module reads from per-stage ``summary.json`` files and Stage 7 +output files (``hypothesis_verdicts.json``, ``rankings.json``, +``pareto_front.json``, ``pairwise_comparisons.json``). It also +optionally reads the Stage 6a.5 sensitivity report. +""" + +from __future__ import annotations + +import json +import logging +import os +import shutil +import time +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +_SCHEMA_VERSION = "1.0" + +# Encoding display names for tables. +_ENCODING_DISPLAY: dict[str, str] = { + "angle": "AngleEncoding", + "amplitude": "AmplitudeEncoding", + "basis": "BasisEncoding", + "iqp": "IQPEncoding", + "zz_feature_map": "ZZFeatureMap", + "pauli_feature_map": "PauliFeatureMap", + "data_reuploading": "DataReuploading", + "hardware_efficient": "HardwareEfficientEncoding", + "higher_order_angle": "HigherOrderAngleEncoding", + "qaoa_encoding": "QAOAEncoding", + "hamiltonian_encoding": "HamiltonianEncoding", + "symmetry_inspired": "SymmetryInspiredFeatureMap", + "trainable_encoding": "TrainableEncoding", + "so2_equivariant": "SO2EquivariantFeatureMap", + "cyclic_equivariant": "CyclicEquivariantFeatureMap", + "swap_equivariant": "SwapEquivariantFeatureMap", +} + +_ENCODING_FAMILIES: dict[str, str] = { + "angle": "Non-Entangling", + "amplitude": "Amplitude", + "basis": "Non-Entangling", + "iqp": "IQP-Based", + "zz_feature_map": "IQP-Based", + "pauli_feature_map": "Pauli-Based", + "data_reuploading": "Data Re-uploading", + "hardware_efficient": "Hardware-Efficient", + "higher_order_angle": "Non-Entangling", + "qaoa_encoding": "QAOA/Hamiltonian", + "hamiltonian_encoding": "QAOA/Hamiltonian", + "symmetry_inspired": "Symmetry-Based", + "trainable_encoding": "Trainable", + "so2_equivariant": "Equivariant", + "cyclic_equivariant": "Equivariant", + "swap_equivariant": "Equivariant", +} + +# Hypothesis descriptions for narrative. +_HYPOTHESIS_DESCRIPTIONS: dict[str, str] = { + "H1": ( + "Expressibility is necessary but not sufficient for high " + "classification accuracy." + ), + "H2": ( + "Equivariant encodings outperform general encodings on datasets " + "with matching symmetry structure." + ), + "H3": ( + "Data re-uploading encodings achieve higher expressibility than " + "single-pass encodings at equivalent circuit depth." + ), + "H4": ( + "Barren plateau onset (trainability collapse) correlates with " + "circuit depth more strongly than with encoding family." + ), + "H5": ( + "Noise degrades entangling encodings disproportionately compared " + "to non-entangling encodings." + ), + "H6": ( + "Quantum kernel methods and VQC methods produce different " + "encoding rankings on the same datasets." + ), + "H7": ( + "No single encoding dominates across all metrics; the Pareto " + "front contains >= 3 encodings." + ), +} + + +# --------------------------------------------------------------------------- +# JSON helpers +# --------------------------------------------------------------------------- + +def _json_default(obj: Any) -> Any: + """Fallback JSON serializer for numpy types.""" + type_name = type(obj).__name__ + if "int" in type_name and hasattr(obj, "item"): + return int(obj.item()) + if "float" in type_name and hasattr(obj, "item"): + return float(obj.item()) + if "ndarray" in type_name and hasattr(obj, "tolist"): + return obj.tolist() + if "bool" in type_name and hasattr(obj, "item"): + return bool(obj.item()) + raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable") + + +def _load_json(path: str) -> dict[str, Any] | None: + """Load a JSON file, returning None if missing or invalid.""" + if not os.path.isfile(path): + logger.warning("File not found: %s", path) + return None + try: + with open(path, "r", encoding="utf-8") as fh: + return json.load(fh) + except (json.JSONDecodeError, OSError) as exc: + logger.error("Failed to load %s: %s", path, exc) + return None + + +def _save_json(data: Any, path: str) -> None: + """Save data as pretty-printed JSON.""" + os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) + with open(path, "w", encoding="utf-8") as fh: + json.dump(data, fh, indent=2, default=_json_default) + + +def _save_text(content: str, path: str) -> None: + """Save text content to a file.""" + os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) + with open(path, "w", encoding="utf-8") as fh: + fh.write(content) + + +def _fmt(val: Any, precision: int = 4) -> str: + """Format a value for display in tables.""" + if val is None: + return "—" + if isinstance(val, bool): + return "Yes" if val else "No" + if isinstance(val, float): + if val != val: # NaN + return "NaN" + if abs(val) < 1e-10: + return "0.0000" + return f"{val:.{precision}f}" + return str(val) + + +def _tex_escape(s: str) -> str: + """Escape special LaTeX characters.""" + return s.replace("_", r"\_").replace("&", r"\&").replace("%", r"\%") + + +# --------------------------------------------------------------------------- +# 8.1: Master summary JSON +# --------------------------------------------------------------------------- + +def _build_master_summary( + stage_dirs: dict[str, str], + tradeoff_dir: str, + sensitivity_dir: str | None, +) -> dict[str, Any]: + """Build the master summary JSON combining all stages. + + Parameters + ---------- + stage_dirs : dict[str, str] + Mapping of stage name to directory path for Stages 1-6b. + tradeoff_dir : str + Path to the Stage 7 tradeoff results directory. + sensitivity_dir : str or None + Path to the Stage 6a.5 sensitivity results directory. + + Returns + ------- + dict[str, Any] + Master summary with all encoding profiles and stage data. + """ + # Load rankings (the most comprehensive per-encoding data). + rankings_path = os.path.join(tradeoff_dir, "rankings.json") + rankings_data = _load_json(rankings_path) + rankings = rankings_data.get("rankings", []) if rankings_data else [] + + # Load all per-stage summaries for encoding-level detail. + stage_summaries: dict[str, dict[str, Any] | None] = {} + for stage_name, stage_dir in stage_dirs.items(): + summary_path = os.path.join(stage_dir, "summary.json") + stage_summaries[stage_name] = _load_json(summary_path) + + # Load hypothesis verdicts. + verdicts_path = os.path.join(tradeoff_dir, "hypothesis_verdicts.json") + verdicts_data = _load_json(verdicts_path) + + # Load Pareto front. + pareto_path = os.path.join(tradeoff_dir, "pareto_front.json") + pareto_data = _load_json(pareto_path) + + # Load sensitivity report. + sensitivity_data = None + if sensitivity_dir: + sensitivity_path = os.path.join(sensitivity_dir, "sensitivity_report.json") + sensitivity_data = _load_json(sensitivity_path) + + # Build per-encoding profiles from rankings + stage data. + encoding_profiles: list[dict[str, Any]] = [] + for ranking_entry in rankings: + enc_name = ranking_entry["encoding"] + profile: dict[str, Any] = { + "encoding": enc_name, + "display_name": _ENCODING_DISPLAY.get(enc_name, enc_name), + "family": _ENCODING_FAMILIES.get(enc_name, "Other"), + "rank": ranking_entry.get("rank"), + "score": ranking_entry.get("score"), + "is_pareto": ranking_entry.get("is_pareto", False), + "is_simulable": ranking_entry.get("is_simulable"), + "metrics": { + "depth": ranking_entry.get("depth"), + "expressibility": ranking_entry.get("expressibility"), + "entanglement_capability": ranking_entry.get("entanglement_capability"), + "trainability_estimate": ranking_entry.get("trainability_estimate"), + "noise_resilience": ranking_entry.get("noise_resilience"), + "vqc_accuracy": ranking_entry.get("vqc_accuracy"), + "vqc_ci": ranking_entry.get("vqc_ci"), + "kernel_accuracy": ranking_entry.get("kernel_accuracy"), + "kernel_ci": ranking_entry.get("kernel_ci"), + }, + } + encoding_profiles.append(profile) + + # Count per-stage results. + stage_counts: dict[str, dict[str, int]] = {} + for stage_name, summary in stage_summaries.items(): + if summary is not None: + stage_counts[stage_name] = { + "total": summary.get("total_results", 0), + "success": summary.get("success_count", 0), + "failed": summary.get("failed_count", 0), + } + + master: dict[str, Any] = { + "schema_version": _SCHEMA_VERSION, + "n_encodings": len(encoding_profiles), + "encoding_profiles": encoding_profiles, + "stage_counts": stage_counts, + "hypothesis_verdicts": ( + verdicts_data.get("hypothesis_verdicts", {}) + if verdicts_data else {} + ), + "pareto_front": { + "n_pareto_optimal": ( + pareto_data.get("n_pareto_optimal", 0) + if pareto_data else 0 + ), + "pareto_optimal": ( + pareto_data.get("pareto_optimal", []) + if pareto_data else [] + ), + "objective_names": ( + pareto_data.get("objective_names", []) + if pareto_data else [] + ), + }, + } + + if sensitivity_data: + master["sensitivity_analysis"] = { + "grid": sensitivity_data.get("grid", {}), + "analysis": sensitivity_data.get("analysis", {}), + } + + return master + + +# --------------------------------------------------------------------------- +# 8.2: Table generation +# --------------------------------------------------------------------------- + +def _extract_vqc_accuracy_matrix( + vqc_summary: dict[str, Any], +) -> tuple[list[str], list[str], dict[str, dict[str, str]]]: + """Extract encoding x dataset accuracy matrix from VQC results. + + Returns + ------- + tuple + (encoding_names, dataset_names, matrix) where matrix maps + encoding -> dataset -> formatted "mean +/- std" string. + """ + results = vqc_summary.get("results", []) + + # Collect unique encodings and datasets. + encodings_set: set[str] = set() + datasets_set: set[str] = set() + matrix: dict[str, dict[str, str]] = {} + + for entry in results: + if entry.get("status") != "success": + continue + enc_name = entry.get("encoding_name", "unknown") + result_data = entry.get("result", {}) + datasets = result_data.get("datasets", {}) + + encodings_set.add(enc_name) + if enc_name not in matrix: + matrix[enc_name] = {} + + for ds_name, ds_data in datasets.items(): + datasets_set.add(ds_name) + agg = ds_data.get("aggregate", {}) + mean_acc = agg.get("mean_test_accuracy") + std_acc = agg.get("std_test_accuracy") + if mean_acc is not None and std_acc is not None: + matrix[enc_name][ds_name] = f"{mean_acc:.3f} +/- {std_acc:.3f}" + elif mean_acc is not None: + matrix[enc_name][ds_name] = f"{mean_acc:.3f}" + else: + matrix[enc_name][ds_name] = "—" + + enc_names = sorted(encodings_set) + ds_names = sorted(datasets_set) + return enc_names, ds_names, matrix + + +def _generate_accuracy_matrix_md( + enc_names: list[str], + ds_names: list[str], + matrix: dict[str, dict[str, str]], + title: str, +) -> str: + """Generate a Markdown accuracy matrix table.""" + lines = [f"# {title}", ""] + header = ["Encoding"] + ds_names + lines.append("| " + " | ".join(header) + " |") + lines.append("| " + " | ".join(["---"] * len(header)) + " |") + + for enc in enc_names: + cells = [enc] + for ds in ds_names: + cells.append(matrix.get(enc, {}).get(ds, "—")) + lines.append("| " + " | ".join(cells) + " |") + + return "\n".join(lines) + + +def _generate_accuracy_matrix_tex( + enc_names: list[str], + ds_names: list[str], + matrix: dict[str, dict[str, str]], + title: str, + label: str, +) -> str: + """Generate a LaTeX accuracy matrix table.""" + n_cols = 1 + len(ds_names) + col_spec = "l" + "c" * len(ds_names) + + lines = [ + r"\begin{table}[htbp]", + r"\centering", + r"\caption{" + _tex_escape(title) + "}", + r"\label{" + label + "}", + r"\small", + r"\begin{tabular}{" + col_spec + "}", + r"\toprule", + ] + + header_cells = ["Encoding"] + [_tex_escape(ds) for ds in ds_names] + lines.append(" & ".join(header_cells) + r" \\") + lines.append(r"\midrule") + + for enc in enc_names: + cells = [_tex_escape(enc)] + for ds in ds_names: + val = matrix.get(enc, {}).get(ds, "—") + cells.append(val.replace("+/-", r"$\pm$")) + lines.append(" " + " & ".join(cells) + r" \\") + + lines.extend([ + r"\bottomrule", + r"\end{tabular}", + r"\end{table}", + ]) + return "\n".join(lines) + + +def _generate_sensitivity_table_md( + sensitivity_data: dict[str, Any], +) -> str: + """Generate a Markdown table for Stage 6a.5 sensitivity analysis.""" + results = sensitivity_data.get("results", []) + if not results: + return "# Sensitivity Analysis\n\nNo results available.\n" + + lines = [ + "# Stage 6a.5: VQC Hyperparameter Sensitivity", + "", + "| Encoding | Dataset | LR | Layers | Accuracy | 95% CI |", + "| --- | --- | --- | --- | --- | --- |", + ] + + # Sort by encoding, dataset, then accuracy descending. + sorted_results = sorted( + results, + key=lambda r: ( + r.get("encoding", ""), + r.get("dataset", ""), + -(r.get("mean_accuracy") or 0), + ), + ) + + for r in sorted_results: + if r.get("status") != "success": + continue + enc = r.get("encoding", "—") + ds = r.get("dataset", "—") + lr = r.get("lr", "—") + layers = r.get("n_var_layers", "—") + acc = _fmt(r.get("mean_accuracy"), 4) + ci_lo = r.get("ci_lower") + ci_hi = r.get("ci_upper") + ci_str = f"({_fmt(ci_lo, 3)}-{_fmt(ci_hi, 3)})" if ci_lo is not None else "—" + lines.append(f"| {enc} | {ds} | {lr} | {layers} | {acc} | {ci_str} |") + + return "\n".join(lines) + + +def _generate_sensitivity_table_tex( + sensitivity_data: dict[str, Any], +) -> str: + """Generate a LaTeX table for Stage 6a.5 sensitivity analysis.""" + results = sensitivity_data.get("results", []) + if not results: + return "" + + lines = [ + r"\begin{table}[htbp]", + r"\centering", + r"\caption{VQC Hyperparameter Sensitivity (Stage 6a.5)}", + r"\label{tab:sensitivity}", + r"\small", + r"\begin{tabular}{llccrc}", + r"\toprule", + r"Encoding & Dataset & LR & Layers & Accuracy & 95\% CI \\", + r"\midrule", + ] + + sorted_results = sorted( + results, + key=lambda r: ( + r.get("encoding", ""), + r.get("dataset", ""), + -(r.get("mean_accuracy") or 0), + ), + ) + + for r in sorted_results: + if r.get("status") != "success": + continue + enc = _tex_escape(r.get("encoding", "—")) + ds = _tex_escape(r.get("dataset", "—")) + lr = str(r.get("lr", "—")) + layers = str(r.get("n_var_layers", "—")) + acc = _fmt(r.get("mean_accuracy"), 4) + ci_lo = r.get("ci_lower") + ci_hi = r.get("ci_upper") + ci_str = f"({_fmt(ci_lo, 3)}--{_fmt(ci_hi, 3)})" if ci_lo is not None else "—" + lines.append(f" {enc} & {ds} & {lr} & {layers} & {acc} & {ci_str}" + r" \\") + + lines.extend([ + r"\bottomrule", + r"\end{tabular}", + r"\end{table}", + ]) + return "\n".join(lines) + + +def _generate_tables( + stage_dirs: dict[str, str], + tradeoff_dir: str, + sensitivity_dir: str | None, + table_dir: str, +) -> list[str]: + """Generate all tables for the report. + + Parameters + ---------- + stage_dirs : dict[str, str] + Stage name to directory mapping. + tradeoff_dir : str + Stage 7 output directory. + sensitivity_dir : str or None + Stage 6a.5 output directory. + table_dir : str + Output directory for generated tables. + + Returns + ------- + list[str] + List of generated file paths. + """ + os.makedirs(table_dir, exist_ok=True) + generated: list[str] = [] + + # --- Copy Stage 7 tables (ranking + hypothesis) ------------------------- + for filename in ("ranking_table.md", "ranking_table.tex", + "hypothesis_table.md", "hypothesis_table.tex"): + src = os.path.join(tradeoff_dir, filename) + dst = os.path.join(table_dir, filename) + if os.path.isfile(src): + shutil.copy2(src, dst) + generated.append(dst) + logger.info("Copied %s -> %s", src, dst) + + # --- Copy per-stage raw tables ------------------------------------------ + for stage_name, stage_dir in stage_dirs.items(): + for ext in (".md", ".tex"): + # Per-stage tables are named like stage1_resources_table.md + for fname in os.listdir(stage_dir): + if fname.endswith(f"_table{ext}"): + src = os.path.join(stage_dir, fname) + dst = os.path.join(table_dir, fname) + shutil.copy2(src, dst) + generated.append(dst) + + # --- VQC accuracy matrix ------------------------------------------------ + vqc_dir = stage_dirs.get("vqc", "") + vqc_summary = _load_json(os.path.join(vqc_dir, "summary.json")) + if vqc_summary: + enc_names, ds_names, matrix = _extract_vqc_accuracy_matrix(vqc_summary) + if enc_names: + md = _generate_accuracy_matrix_md( + enc_names, ds_names, matrix, + "VQC Classification Accuracy (mean +/- std)", + ) + path = os.path.join(table_dir, "vqc_accuracy_matrix.md") + _save_text(md, path) + generated.append(path) + + tex = _generate_accuracy_matrix_tex( + enc_names, ds_names, matrix, + "VQC Classification Accuracy", + "tab:vqc_accuracy", + ) + path = os.path.join(table_dir, "vqc_accuracy_matrix.tex") + _save_text(tex, path) + generated.append(path) + + # --- Kernel accuracy matrix --------------------------------------------- + kernel_dir = stage_dirs.get("kernel", "") + kernel_summary = _load_json(os.path.join(kernel_dir, "summary.json")) + if kernel_summary: + enc_names, ds_names, matrix = _extract_vqc_accuracy_matrix(kernel_summary) + if enc_names: + md = _generate_accuracy_matrix_md( + enc_names, ds_names, matrix, + "Kernel Classification Accuracy (mean +/- std)", + ) + path = os.path.join(table_dir, "kernel_accuracy_matrix.md") + _save_text(md, path) + generated.append(path) + + tex = _generate_accuracy_matrix_tex( + enc_names, ds_names, matrix, + "Kernel Classification Accuracy", + "tab:kernel_accuracy", + ) + path = os.path.join(table_dir, "kernel_accuracy_matrix.tex") + _save_text(tex, path) + generated.append(path) + + # --- Sensitivity grid table --------------------------------------------- + if sensitivity_dir: + sens_path = os.path.join(sensitivity_dir, "sensitivity_report.json") + sens_data = _load_json(sens_path) + if sens_data: + md = _generate_sensitivity_table_md(sens_data) + path = os.path.join(table_dir, "sensitivity_grid.md") + _save_text(md, path) + generated.append(path) + + tex = _generate_sensitivity_table_tex(sens_data) + if tex: + path = os.path.join(table_dir, "sensitivity_grid.tex") + _save_text(tex, path) + generated.append(path) + + return generated + + +# --------------------------------------------------------------------------- +# 8.3: Figure verification +# --------------------------------------------------------------------------- + +def _verify_figures(figure_dir: str) -> dict[str, Any]: + """Verify that all expected figures exist. + + Parameters + ---------- + figure_dir : str + Directory containing generated figures. + + Returns + ------- + dict[str, Any] + Report of found/missing figures. + """ + if not os.path.isdir(figure_dir): + return {"status": "missing", "found": 0, "missing_dir": True} + + png_files = sorted( + f for f in os.listdir(figure_dir) if f.endswith(".png") + ) + pdf_files = sorted( + f for f in os.listdir(figure_dir) if f.endswith(".pdf") + ) + + return { + "status": "ok" if png_files else "empty", + "png_count": len(png_files), + "pdf_count": len(pdf_files), + "figure_pairs": len(png_files), + "figures": [f.replace(".png", "") for f in png_files], + } + + +# --------------------------------------------------------------------------- +# 8.4: Hypothesis verdict narrative +# --------------------------------------------------------------------------- + +_VERDICT_EMOJI: dict[str, str] = { + "supported": "SUPPORTED", + "refuted": "REFUTED", + "inconclusive": "INCONCLUSIVE", +} + +_CONFIDENCE_LABEL: dict[str, str] = { + "high": "High confidence", + "moderate": "Moderate confidence", + "low": "Low confidence", +} + + +def _format_test_statistic(stats: dict[str, Any]) -> str: + """Format test statistics into a human-readable string.""" + parts = [] + for key, val in stats.items(): + if isinstance(val, float): + if abs(val) < 0.001 and val != 0: + parts.append(f"{key} = {val:.2e}") + else: + parts.append(f"{key} = {val:.4f}") + else: + parts.append(f"{key} = {val}") + return "; ".join(parts) + + +def _generate_hypotheses_md( + verdicts_data: dict[str, Any], + pareto_data: dict[str, Any] | None, +) -> str: + """Generate the hypotheses.md narrative document. + + Parameters + ---------- + verdicts_data : dict[str, Any] + Loaded hypothesis_verdicts.json. + pareto_data : dict[str, Any] or None + Loaded pareto_front.json (for H7 detail). + + Returns + ------- + str + Full Markdown document content. + """ + verdicts = verdicts_data.get("hypothesis_verdicts", {}) + + lines = [ + "# Hypothesis Verdicts", + "", + "This document presents the results of seven pre-registered " + "hypotheses tested across the Quantum Encoding Atlas experiment " + "pipeline (Stages 1-7). Each hypothesis was evaluated using the " + "statistical tests and criteria defined in the experiment design " + "document.", + "", + "## Summary Table", + "", + "| Hypothesis | Verdict | Confidence | Key Statistic |", + "| --- | --- | --- | --- |", + ] + + for h_id in ["H1", "H2", "H3", "H4", "H5", "H6", "H7"]: + v = verdicts.get(h_id, {}) + verdict = v.get("verdict", "unknown") + confidence = v.get("confidence", "unknown") + stats = v.get("test_statistic", {}) + stat_str = _format_test_statistic(stats) if stats else "—" + lines.append( + f"| {h_id} | {_VERDICT_EMOJI.get(verdict, verdict)} " + f"| {confidence} | {stat_str} |" + ) + + lines.extend(["", "---", "", "## Detailed Analysis", ""]) + + for h_id in ["H1", "H2", "H3", "H4", "H5", "H6", "H7"]: + v = verdicts.get(h_id, {}) + description = _HYPOTHESIS_DESCRIPTIONS.get(h_id, "") + verdict = v.get("verdict", "unknown") + confidence = v.get("confidence", "unknown") + evidence = v.get("evidence", "No evidence available.") + stats = v.get("test_statistic", {}) + + lines.extend([ + f"### {h_id}: {description}", + "", + f"**Verdict:** {_VERDICT_EMOJI.get(verdict, verdict)}", + f"**Confidence:** {_CONFIDENCE_LABEL.get(confidence, confidence)}", + "", + "**Evidence:**", + "", + evidence, + "", + ]) + + if stats: + lines.append("**Test Statistics:**") + lines.append("") + for key, val in stats.items(): + if isinstance(val, float): + if abs(val) < 0.001 and val != 0: + lines.append(f"- {key}: {val:.2e}") + else: + lines.append(f"- {key}: {val:.4f}") + else: + lines.append(f"- {key}: {val}") + lines.append("") + + # H7-specific Pareto detail. + if h_id == "H7" and pareto_data: + pareto_names = pareto_data.get("pareto_optimal", []) + if pareto_names: + lines.append("**Pareto-optimal encodings:**") + lines.append("") + for name in pareto_names: + display = _ENCODING_DISPLAY.get(name, name) + family = _ENCODING_FAMILIES.get(name, "Other") + lines.append(f"- {display} ({family})") + lines.append("") + + lines.extend(["---", ""]) + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Ranking narrative +# --------------------------------------------------------------------------- + +def _generate_ranking_md( + rankings_data: dict[str, Any], + pareto_data: dict[str, Any] | None, + verdicts_data: dict[str, Any] | None, +) -> str: + """Generate the ranking.md narrative document. + + Parameters + ---------- + rankings_data : dict[str, Any] + Loaded rankings.json. + pareto_data : dict[str, Any] or None + Loaded pareto_front.json. + verdicts_data : dict[str, Any] or None + Loaded hypothesis_verdicts.json. + + Returns + ------- + str + Full Markdown document content. + """ + rankings = rankings_data.get("rankings", []) + + lines = [ + "# Final Encoding Rankings", + "", + "This document presents the final composite ranking of all 16 quantum " + "data encodings evaluated in the Quantum Encoding Atlas. Rankings are " + "computed using a weighted multi-objective score combining classification " + "accuracy, circuit efficiency, trainability, and noise resilience.", + "", + ] + + # --- Ranking table ------------------------------------------------------ + lines.extend([ + "## Composite Rankings", + "", + "| Rank | Encoding | Family | Score | VQC Acc | Kernel Acc | Pareto |", + "| --- | --- | --- | --- | --- | --- | --- |", + ]) + + for r in rankings: + enc = r.get("encoding", "unknown") + family = _ENCODING_FAMILIES.get(enc, "Other") + rank = r.get("rank", "—") + score = _fmt(r.get("score"), 4) + vqc = _fmt(r.get("vqc_accuracy"), 3) + kernel = _fmt(r.get("kernel_accuracy"), 3) + pareto = "Yes" if r.get("is_pareto") else "No" + lines.append( + f"| {rank} | {enc} | {family} | {score} | {vqc} | {kernel} | {pareto} |" + ) + + # --- Pareto front ------------------------------------------------------- + if pareto_data: + pareto_names = pareto_data.get("pareto_optimal", []) + objectives = pareto_data.get("objective_names", []) + n_analyzed = pareto_data.get("n_encodings_analyzed", 0) + + lines.extend([ + "", + "## Pareto Front", + "", + f"The Pareto front was computed over {n_analyzed} encodings " + f"using {len(objectives)} objectives: " + f"{', '.join(objectives)}.", + "", + f"**{len(pareto_names)} Pareto-optimal encodings** were " + "identified (no other encoding dominates them on all objectives " + "simultaneously):", + "", + ]) + + encodings_detail = pareto_data.get("encodings", {}) + for name in pareto_names: + display = _ENCODING_DISPLAY.get(name, name) + detail = encodings_detail.get(name, {}) + obj_vals = detail.get("objectives", []) + obj_strs = [ + f"{oname}={_fmt(oval, 3)}" + for oname, oval in zip(objectives, obj_vals) + ] + lines.append(f"- **{display}**: {', '.join(obj_strs)}") + + lines.extend([ + "", + "The existence of multiple Pareto-optimal encodings across " + "different families (Non-Entangling, Equivariant) confirms that " + "no single encoding dominates all evaluation axes. Encoding " + "selection should be guided by the specific requirements of the " + "target application.", + ]) + + # --- Key findings ------------------------------------------------------- + lines.extend([ + "", + "## Key Findings", + "", + ]) + + if rankings: + top = rankings[0] + top_name = _ENCODING_DISPLAY.get(top["encoding"], top["encoding"]) + lines.extend([ + f"1. **Top-ranked encoding:** {top_name} " + f"(score={_fmt(top.get('score'), 4)}, " + f"VQC={_fmt(top.get('vqc_accuracy'), 3)}, " + f"kernel={_fmt(top.get('kernel_accuracy'), 3)})", + "", + ]) + + # Group by family for family-level insights. + family_best: dict[str, dict[str, Any]] = {} + for r in rankings: + fam = _ENCODING_FAMILIES.get(r.get("encoding", ""), "Other") + if fam not in family_best: + family_best[fam] = r + + lines.append( + "2. **Best encoding per family:**" + ) + lines.append("") + for fam, r in sorted(family_best.items()): + display = _ENCODING_DISPLAY.get(r["encoding"], r["encoding"]) + lines.append( + f" - {fam}: {display} " + f"(rank #{r.get('rank')}, score={_fmt(r.get('score'), 4)})" + ) + lines.append("") + + # Simulable encodings. + simulable = [r for r in rankings if r.get("is_simulable")] + if simulable: + lines.append( + f"3. **Classically simulable encodings:** " + f"{', '.join(_ENCODING_DISPLAY.get(r['encoding'], r['encoding']) for r in simulable)} " + f"— these can be efficiently simulated classically, making them " + f"useful as baselines but not candidates for quantum advantage." + ) + lines.append("") + + # Practical guidance. + lines.extend([ + "## Practical Guidance", + "", + "- **For highest accuracy:** Choose the top-ranked encoding " + "for the specific dataset and paradigm (VQC or kernel).", + "- **For resource-constrained hardware:** Prefer shallow-depth " + "encodings from the Pareto front (e.g., AngleEncoding, " + "HigherOrderAngleEncoding).", + "- **For noise-resilient applications:** Prioritise encodings " + "with high noise resilience scores; non-entangling encodings " + "are generally more robust (see H5).", + "- **For trainability:** Avoid deep circuits that exhibit " + "barren plateaus (see H4); SwapEquivariantFeatureMap achieves " + "the highest trainability among entangling encodings.", + "", + ]) + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main orchestrator +# --------------------------------------------------------------------------- + +def generate_report( + *, + stage_dirs: dict[str, str], + tradeoff_dir: str, + output_dir: str, + figure_dir: str = "experiments/results/figures", + sensitivity_dir: str | None = None, + table_dir: str | None = None, + generate_tables: bool = True, +) -> dict[str, Any]: + """Generate the complete Stage 8 report. + + Parameters + ---------- + stage_dirs : dict[str, str] + Mapping of stage name to result directory (Stages 1-6b). + tradeoff_dir : str + Stage 7 tradeoff results directory. + output_dir : str + Top-level output directory for the report. + figure_dir : str + Directory containing Stage 7 figures. + sensitivity_dir : str or None + Stage 6a.5 sensitivity results directory. + table_dir : str or None + Custom table output directory. Defaults to ``output_dir/tables``. + generate_tables : bool + Whether to generate tables (default True). + + Returns + ------- + dict[str, Any] + Report generation summary. + """ + t_start = time.monotonic() + os.makedirs(output_dir, exist_ok=True) + + if table_dir is None: + table_dir = os.path.join(output_dir, "tables") + + generated_files: list[str] = [] + errors: list[str] = [] + + # ---- 8.1: Master summary JSON ----------------------------------------- + logger.info("Step 8.1: Building master summary JSON...") + print("[8.1] Building master summary JSON...", flush=True) + try: + master_summary = _build_master_summary( + stage_dirs, tradeoff_dir, sensitivity_dir, + ) + # Use "master_summary.json" to avoid collision with the runner's + # own checkpoint-format "summary.json". + summary_path = os.path.join(output_dir, "master_summary.json") + _save_json(master_summary, summary_path) + generated_files.append(summary_path) + print( + f" -> {summary_path} " + f"({master_summary['n_encodings']} encodings)", + flush=True, + ) + except Exception as exc: + msg = f"8.1 Master summary failed: {exc}" + logger.error(msg) + errors.append(msg) + + # ---- 8.2: Tables ------------------------------------------------------- + if generate_tables: + logger.info("Step 8.2: Generating tables...") + print("[8.2] Generating tables...", flush=True) + try: + table_files = _generate_tables( + stage_dirs, tradeoff_dir, sensitivity_dir, table_dir, + ) + generated_files.extend(table_files) + print(f" -> {len(table_files)} table files in {table_dir}", flush=True) + except Exception as exc: + msg = f"8.2 Table generation failed: {exc}" + logger.error(msg) + errors.append(msg) + + # ---- 8.3: Figure verification ------------------------------------------ + logger.info("Step 8.3: Verifying figures...") + print("[8.3] Verifying figures...", flush=True) + figure_report = _verify_figures(figure_dir) + print( + f" -> {figure_report.get('png_count', 0)} PNG, " + f"{figure_report.get('pdf_count', 0)} PDF figures found", + flush=True, + ) + + # ---- 8.4: Hypothesis narrative ----------------------------------------- + logger.info("Step 8.4: Generating hypothesis verdict narrative...") + print("[8.4] Generating hypothesis verdict narrative...", flush=True) + try: + verdicts_data = _load_json( + os.path.join(tradeoff_dir, "hypothesis_verdicts.json") + ) + pareto_data = _load_json( + os.path.join(tradeoff_dir, "pareto_front.json") + ) + rankings_data = _load_json( + os.path.join(tradeoff_dir, "rankings.json") + ) + + if verdicts_data: + hyp_md = _generate_hypotheses_md(verdicts_data, pareto_data) + hyp_path = os.path.join(output_dir, "hypotheses.md") + _save_text(hyp_md, hyp_path) + generated_files.append(hyp_path) + print(f" -> {hyp_path}", flush=True) + else: + errors.append("8.4 hypothesis_verdicts.json not found") + + if rankings_data: + rank_md = _generate_ranking_md( + rankings_data, pareto_data, verdicts_data, + ) + rank_path = os.path.join(output_dir, "ranking.md") + _save_text(rank_md, rank_path) + generated_files.append(rank_path) + print(f" -> {rank_path}", flush=True) + else: + errors.append("8.4 rankings.json not found") + + except Exception as exc: + msg = f"8.4 Narrative generation failed: {exc}" + logger.error(msg) + errors.append(msg) + + wall_time = round(time.monotonic() - t_start, 3) + + result: dict[str, Any] = { + "schema_version": _SCHEMA_VERSION, + "status": "success" if not errors else "partial", + "n_files_generated": len(generated_files), + "generated_files": [os.path.basename(f) for f in generated_files], + "figure_verification": figure_report, + "errors": errors, + "wall_time_seconds": wall_time, + } + + # Save the report generation result alongside the outputs. + result_path = os.path.join(output_dir, "report_generation_result.json") + _save_json(result, result_path) + + print( + f"\n[DONE] Report generation complete: " + f"{len(generated_files)} files, {wall_time:.1f}s", + flush=True, + ) + if errors: + print(f" Warnings: {len(errors)} error(s) — see report_generation_result.json") + + return result diff --git a/experiments/runner.py b/experiments/runner.py index 71ad5c4..724baff 100644 --- a/experiments/runner.py +++ b/experiments/runner.py @@ -402,8 +402,8 @@ def _execute_task(self, spec: Any, task_index: int) -> dict[str, Any]: Exception Any exception from encoding instantiation or analysis. """ - # Stage 7 uses a dummy encoding — skip instantiation. - if spec.name == "__tradeoff__": + # Stages 7-8 use dummy encodings — skip instantiation. + if spec.name in ("__tradeoff__", "__report__"): seed = self.config.task_seed(task_index) dispatch = _STAGE_HANDLERS.get(self.config.stage) if dispatch is None: @@ -1558,6 +1558,37 @@ def _handle_tradeoff( ) +def _handle_report( + encoding: Any, + config: ExperimentConfig, + seed: int, +) -> dict[str, Any]: + """Stage handler: Report generation (Stage 8). + + This handler ignores the encoding argument (which is a dummy) + and compiles all prior stage results into publication-ready outputs. + """ + from experiments.report import generate_report + + ap = config.analysis_params + stage_dirs = ap.get("stage_dirs", {}) + tradeoff_dir = ap.get("tradeoff_dir", "experiments/results/raw/stage7_tradeoff") + figure_dir = ap.get("figure_dir", "experiments/results/figures") + sensitivity_dir = ap.get("sensitivity_dir") + table_dir = ap.get("table_dir") + generate_tables = ap.get("generate_tables", True) + + return generate_report( + stage_dirs=stage_dirs, + tradeoff_dir=tradeoff_dir, + output_dir=config.output_dir, + figure_dir=figure_dir, + sensitivity_dir=sensitivity_dir, + table_dir=table_dir, + generate_tables=generate_tables, + ) + + # Handler registry — maps stage name to handler function. _STAGE_HANDLERS: dict[str, Any] = { "resources": _handle_resources, @@ -1569,6 +1600,7 @@ def _handle_tradeoff( "vqc": _handle_vqc, "kernel": _handle_kernel, "tradeoff": _handle_tradeoff, + "report": _handle_report, } diff --git a/experiments/tests/test_report_smoke.py b/experiments/tests/test_report_smoke.py new file mode 100644 index 0000000..6cff517 --- /dev/null +++ b/experiments/tests/test_report_smoke.py @@ -0,0 +1,398 @@ +"""Smoke tests for Stage 8 report generation. + +These tests verify that the report module can load existing experiment +results and produce the expected output files without errors. +""" + +from __future__ import annotations + +import json +import os +import tempfile +from pathlib import Path +from typing import Any +from unittest.mock import patch + +import pytest + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).resolve().parents[2] +_RESULTS_DIR = _PROJECT_ROOT / "experiments" / "results" / "raw" +_TRADEOFF_DIR = _RESULTS_DIR / "stage7_tradeoff" +_SENSITIVITY_DIR = _RESULTS_DIR / "stage6a5_sensitivity" +_FIGURE_DIR = _PROJECT_ROOT / "experiments" / "results" / "figures" + +_STAGE_DIRS = { + "resources": str(_RESULTS_DIR / "stage1_resources"), + "simulability": str(_RESULTS_DIR / "stage2_simulability"), + "expressibility": str(_RESULTS_DIR / "stage3_expressibility"), + "entanglement": str(_RESULTS_DIR / "stage4_entanglement"), + "trainability": str(_RESULTS_DIR / "stage5_trainability"), + "noise": str(_RESULTS_DIR / "stage5b_noise"), + "vqc": str(_RESULTS_DIR / "stage6a_vqc"), + "kernel": str(_RESULTS_DIR / "stage6b_kernel"), +} + + +def _results_available() -> bool: + """Check if experiment results are available for testing.""" + required = [ + _TRADEOFF_DIR / "rankings.json", + _TRADEOFF_DIR / "hypothesis_verdicts.json", + ] + return all(p.exists() for p in required) + + +skip_if_no_results = pytest.mark.skipif( + not _results_available(), + reason="Experiment results not available (run stages 1-7 first)", +) + + +# --------------------------------------------------------------------------- +# Unit tests: helpers +# --------------------------------------------------------------------------- + +class TestHelpers: + """Test module-level helper functions.""" + + def test_json_default_numpy_int(self) -> None: + """Numpy integers are serialized as Python int.""" + from experiments.report import _json_default + + try: + import numpy as np + val = np.int64(42) + assert _json_default(val) == 42 + except ImportError: + pytest.skip("numpy not available") + + def test_json_default_numpy_float(self) -> None: + """Numpy floats are serialized as Python float.""" + from experiments.report import _json_default + + try: + import numpy as np + val = np.float64(3.14) + assert abs(_json_default(val) - 3.14) < 1e-10 + except ImportError: + pytest.skip("numpy not available") + + def test_json_default_raises_on_unknown(self) -> None: + """Unknown types raise TypeError.""" + from experiments.report import _json_default + + with pytest.raises(TypeError): + _json_default(object()) + + def test_fmt_none(self) -> None: + """None values format as dash.""" + from experiments.report import _fmt + assert _fmt(None) == "—" + + def test_fmt_float(self) -> None: + """Float values format with precision.""" + from experiments.report import _fmt + assert _fmt(0.12345, 3) == "0.123" + + def test_fmt_bool(self) -> None: + """Boolean values format as Yes/No.""" + from experiments.report import _fmt + assert _fmt(True) == "Yes" + assert _fmt(False) == "No" + + def test_tex_escape(self) -> None: + """LaTeX special characters are escaped.""" + from experiments.report import _tex_escape + assert _tex_escape("zz_feature_map") == r"zz\_feature\_map" + + def test_load_json_missing(self) -> None: + """Missing file returns None.""" + from experiments.report import _load_json + assert _load_json("/nonexistent/file.json") is None + + +# --------------------------------------------------------------------------- +# Unit tests: table generation with mock data +# --------------------------------------------------------------------------- + +class TestTableGeneration: + """Test table generation with minimal mock data.""" + + def test_accuracy_matrix_md(self) -> None: + """Accuracy matrix markdown generation.""" + from experiments.report import _generate_accuracy_matrix_md + + enc_names = ["angle", "basis"] + ds_names = ["moons", "circles"] + matrix = { + "angle": {"moons": "0.850 +/- 0.030", "circles": "0.780 +/- 0.040"}, + "basis": {"moons": "0.550 +/- 0.050", "circles": "0.520 +/- 0.060"}, + } + md = _generate_accuracy_matrix_md(enc_names, ds_names, matrix, "Test Title") + assert "# Test Title" in md + assert "angle" in md + assert "0.850 +/- 0.030" in md + + def test_accuracy_matrix_tex(self) -> None: + """Accuracy matrix LaTeX generation.""" + from experiments.report import _generate_accuracy_matrix_tex + + enc_names = ["angle"] + ds_names = ["moons"] + matrix = {"angle": {"moons": "0.850 +/- 0.030"}} + tex = _generate_accuracy_matrix_tex( + enc_names, ds_names, matrix, "Test", "tab:test", + ) + assert r"\begin{table}" in tex + assert r"\label{tab:test}" in tex + + def test_sensitivity_table_md_empty(self) -> None: + """Sensitivity table handles empty results.""" + from experiments.report import _generate_sensitivity_table_md + md = _generate_sensitivity_table_md({"results": []}) + assert "No results available" in md + + +# --------------------------------------------------------------------------- +# Unit tests: narrative generation +# --------------------------------------------------------------------------- + +class TestNarrativeGeneration: + """Test narrative document generation with mock data.""" + + def _mock_verdicts(self) -> dict[str, Any]: + return { + "schema_version": "1.0", + "hypothesis_verdicts": { + "H1": { + "verdict": "refuted", + "confidence": "moderate", + "evidence": "Test evidence for H1.", + "test_statistic": {"rho": -0.5, "p_value": 0.01}, + }, + "H2": { + "verdict": "inconclusive", + "confidence": "low", + "evidence": "Test evidence for H2.", + "test_statistic": {"wins": 2}, + }, + "H3": { + "verdict": "supported", + "confidence": "high", + "evidence": "Test evidence for H3.", + "test_statistic": {}, + }, + "H4": { + "verdict": "supported", + "confidence": "high", + "evidence": "Test evidence.", + "test_statistic": {}, + }, + "H5": { + "verdict": "supported", + "confidence": "moderate", + "evidence": "Test evidence.", + "test_statistic": {}, + }, + "H6": { + "verdict": "inconclusive", + "confidence": "low", + "evidence": "Test evidence.", + "test_statistic": {}, + }, + "H7": { + "verdict": "supported", + "confidence": "moderate", + "evidence": "Test evidence.", + "test_statistic": {"n_pareto": 4}, + }, + }, + } + + def test_hypotheses_md_contains_all_hypotheses(self) -> None: + """Hypotheses narrative includes all H1-H7.""" + from experiments.report import _generate_hypotheses_md + + verdicts = self._mock_verdicts() + md = _generate_hypotheses_md(verdicts, None) + for h_id in ["H1", "H2", "H3", "H4", "H5", "H6", "H7"]: + assert h_id in md + + def test_hypotheses_md_verdict_labels(self) -> None: + """Verdict labels appear in the narrative.""" + from experiments.report import _generate_hypotheses_md + + verdicts = self._mock_verdicts() + md = _generate_hypotheses_md(verdicts, None) + assert "REFUTED" in md + assert "SUPPORTED" in md + assert "INCONCLUSIVE" in md + + def test_ranking_md_structure(self) -> None: + """Ranking narrative has expected sections.""" + from experiments.report import _generate_ranking_md + + rankings_data = { + "rankings": [ + { + "encoding": "angle", + "rank": 1, + "score": 0.772, + "vqc_accuracy": 0.848, + "kernel_accuracy": 0.958, + "is_pareto": True, + "is_simulable": True, + }, + ], + } + pareto_data = { + "pareto_optimal": ["angle"], + "objective_names": ["accuracy", "inv_depth"], + "n_encodings_analyzed": 16, + "encodings": { + "angle": {"objectives": [0.848, 0.5]}, + }, + } + md = _generate_ranking_md(rankings_data, pareto_data, None) + assert "# Final Encoding Rankings" in md + assert "Pareto Front" in md + assert "Key Findings" in md + assert "Practical Guidance" in md + + +# --------------------------------------------------------------------------- +# Integration tests with real data +# --------------------------------------------------------------------------- + +@skip_if_no_results +class TestReportIntegration: + """Integration tests using actual experiment results.""" + + def test_generate_report_produces_all_outputs(self) -> None: + """Full report generation produces expected files.""" + from experiments.report import generate_report + + with tempfile.TemporaryDirectory() as tmpdir: + result = generate_report( + stage_dirs=_STAGE_DIRS, + tradeoff_dir=str(_TRADEOFF_DIR), + output_dir=tmpdir, + figure_dir=str(_FIGURE_DIR), + sensitivity_dir=str(_SENSITIVITY_DIR), + ) + + assert result["status"] in ("success", "partial") + assert result["n_files_generated"] > 0 + + # Check core files exist. + assert os.path.isfile(os.path.join(tmpdir, "master_summary.json")) + assert os.path.isfile(os.path.join(tmpdir, "hypotheses.md")) + assert os.path.isfile(os.path.join(tmpdir, "ranking.md")) + + def test_master_summary_schema(self) -> None: + """Master summary has expected schema.""" + from experiments.report import _build_master_summary + + summary = _build_master_summary( + _STAGE_DIRS, str(_TRADEOFF_DIR), + str(_SENSITIVITY_DIR), + ) + assert summary["schema_version"] == "1.0" + assert summary["n_encodings"] == 16 + assert len(summary["encoding_profiles"]) == 16 + assert "hypothesis_verdicts" in summary + assert "pareto_front" in summary + + # Verify each profile has required keys. + for profile in summary["encoding_profiles"]: + assert "encoding" in profile + assert "rank" in profile + assert "metrics" in profile + assert "vqc_accuracy" in profile["metrics"] + + def test_tables_directory_populated(self) -> None: + """Table generation creates files in the tables directory.""" + from experiments.report import _generate_tables + + with tempfile.TemporaryDirectory() as tmpdir: + table_dir = os.path.join(tmpdir, "tables") + files = _generate_tables( + _STAGE_DIRS, str(_TRADEOFF_DIR), + str(_SENSITIVITY_DIR), table_dir, + ) + assert len(files) > 0 + assert os.path.isdir(table_dir) + + # Should have VQC and kernel accuracy matrices. + table_names = [os.path.basename(f) for f in files] + assert "vqc_accuracy_matrix.md" in table_names + assert "kernel_accuracy_matrix.md" in table_names + + def test_figure_verification(self) -> None: + """Figure verification reports existing figures.""" + from experiments.report import _verify_figures + + report = _verify_figures(str(_FIGURE_DIR)) + assert report["status"] in ("ok", "empty") + if report["status"] == "ok": + assert report["png_count"] > 0 + + def test_master_summary_json_is_valid(self) -> None: + """Generated master_summary.json is valid JSON with correct encoding count.""" + from experiments.report import generate_report + + with tempfile.TemporaryDirectory() as tmpdir: + generate_report( + stage_dirs=_STAGE_DIRS, + tradeoff_dir=str(_TRADEOFF_DIR), + output_dir=tmpdir, + figure_dir=str(_FIGURE_DIR), + sensitivity_dir=str(_SENSITIVITY_DIR), + generate_tables=False, + ) + + with open(os.path.join(tmpdir, "master_summary.json"), "r") as fh: + data = json.load(fh) + + assert data["schema_version"] == "1.0" + assert data["n_encodings"] == 16 + + +# --------------------------------------------------------------------------- +# Config integration test +# --------------------------------------------------------------------------- + +class TestConfigIntegration: + """Test that the report stage is properly registered.""" + + def test_report_in_valid_stages(self) -> None: + """Report stage is registered in VALID_STAGES.""" + from experiments.config import VALID_STAGES + assert "report" in VALID_STAGES + + def test_report_in_stage_offsets(self) -> None: + """Report stage has a seed offset.""" + from experiments.config import STAGE_OFFSETS + assert "report" in STAGE_OFFSETS + assert STAGE_OFFSETS["report"] == 9 + + def test_report_config_loads(self) -> None: + """Stage 8 config file loads without error.""" + from experiments.config import load_config + + config_path = str( + _PROJECT_ROOT / "experiments" / "configs" / "stage8_report.json" + ) + if os.path.isfile(config_path): + config = load_config(config_path) + assert config.stage == "report" + assert "__report__" in config.encoding_specs[0].name + + def test_report_handler_registered(self) -> None: + """Report handler is in the _STAGE_HANDLERS registry.""" + from experiments.runner import _STAGE_HANDLERS + assert "report" in _STAGE_HANDLERS diff --git a/pyproject.toml b/pyproject.toml index 55db621..29f586e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ Documentation = "https://encoding-atlas.github.io/quantum-encoding-atlas/" Repository = "https://github.com/encoding-atlas/quantum-encoding-atlas.git" Issues = "https://github.com/encoding-atlas/quantum-encoding-atlas/issues" Changelog = "https://github.com/encoding-atlas/quantum-encoding-atlas/blob/master/CHANGELOG.md" +DOI = "https://doi.org/10.5281/zenodo.18780936" [tool.setuptools] package-dir = {"" = "src"}