PolicyEngine
diff --git a/‎policyengine_api/services/report_output_alias_service.py‎
Lines changed: 26 additions & 8 deletions b/‎policyengine_api/services/report_output_alias_service.py‎
Lines changed: 26 additions & 8 deletions
diff --git a/‎policyengine_api/services/report_run_service.py‎
Lines changed: 69 additions & 27 deletions b/‎policyengine_api/services/report_run_service.py‎
Lines changed: 69 additions & 27 deletions
diff --git a/‎policyengine_api/services/report_spec_service.py‎
Lines changed: 121 additions & 36 deletions b/‎policyengine_api/services/report_spec_service.py‎
Lines changed: 121 additions & 36 deletions
@@ -4,6 +4,13 @@
 
 
 class ReportOutputAliasService:
+    def _report_output_exists(self, report_output_id: int) -> bool:
+        row: Row | None = database.query(
+            "SELECT id FROM report_outputs WHERE id = ?",
+            (report_output_id,),
+        ).fetchone()
+        return row is not None
+
     def get_alias(self, legacy_report_output_id: int) -> dict | None:
         row: Row | None = database.query(
             """
@@ -19,7 +26,13 @@ def resolve_canonical_report_output_id(
     ) -> int | None:
         alias = self.get_alias(requested_report_output_id)
         if alias is not None:
-            return alias["canonical_report_output_id"]
+            canonical_report_output_id = alias["canonical_report_output_id"]
+            if not self._report_output_exists(canonical_report_output_id):
+                raise ValueError(
+                    "Alias points to missing canonical report output "
+                    f"#{canonical_report_output_id}"
+                )
+            return canonical_report_output_id
 
         row: Row | None = database.query(
             "SELECT id FROM report_outputs WHERE id = ?",
@@ -32,6 +45,11 @@ def set_alias(
         legacy_report_output_id: int,
         canonical_report_output_id: int,
     ) -> bool:
+        if not self._report_output_exists(canonical_report_output_id):
+            raise ValueError(
+                f"Canonical report output #{canonical_report_output_id} not found"
+            )
+
         existing_alias = self.get_alias(legacy_report_output_id)
         if existing_alias is None:
             database.query(
@@ -44,12 +62,12 @@ def set_alias(
             )
             return True
 
-        database.query(
-            """
-            UPDATE legacy_report_output_aliases
-            SET canonical_report_output_id = ?
-            WHERE legacy_report_output_id = ?
-            """,
-            (canonical_report_output_id, legacy_report_output_id),
+        if existing_alias["canonical_report_output_id"] == canonical_report_output_id:
+            return True
+
+        raise ValueError(
+            "Legacy report output alias already points to canonical report output "
+            f"#{existing_alias['canonical_report_output_id']}"
         )
+
         return True
@@ -1,7 +1,9 @@
 import json
+import sqlite3
 import uuid
 from typing import Any
 
+import sqlalchemy.exc
 from sqlalchemy.engine.row import Row
 
 from policyengine_api.data import database
@@ -18,9 +20,17 @@
     "resolved_dataset",
     "resolved_options_hash",
 )
+MAX_CREATE_RUN_ATTEMPTS = 3
 
 
 class ReportRunService:
+    def _report_output_exists(self, report_output_id: int) -> bool:
+        row: Row | None = database.query(
+            "SELECT id FROM report_outputs WHERE id = ?",
+            (report_output_id,),
+        ).fetchone()
+        return row is not None
+
     def _next_run_sequence(self, report_output_id: int) -> int:
         row: Row | None = database.query(
             """
@@ -50,6 +60,14 @@ def _parse_run_row(self, row: Row | dict | None) -> dict | None:
             )
         return run
 
+    def _is_sequence_conflict(self, error: Exception) -> bool:
+        message = str(error)
+        return (
+            "report_output_run_sequence_idx" in message
+            or "report_output_runs.report_output_id, report_output_runs.run_sequence"
+            in message
+        )
+
     def create_report_output_run(
         self,
         report_output_id: int,
@@ -62,35 +80,53 @@ def create_report_output_run(
         version_manifest: dict[str, str | None] | None = None,
         run_id: str | None = None,
     ) -> dict:
+        if not self._report_output_exists(report_output_id):
+            raise ValueError(f"Report output #{report_output_id} not found")
+
         run_id = run_id or str(uuid.uuid4())
-        run_sequence = self._next_run_sequence(report_output_id)
         version_manifest = version_manifest or {}
 
-        database.query(
-            f"""
-            INSERT INTO report_output_runs (
-                id, report_output_id, run_sequence, status, output, error_message,
-                trigger_type, requested_at, started_at, finished_at, source_run_id,
-                report_spec_snapshot_json, {", ".join(REPORT_RUN_VERSION_FIELDS)}
-            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """,
-            (
-                run_id,
-                report_output_id,
-                run_sequence,
-                status,
-                self._serialize_json(output),
-                error_message,
-                trigger_type,
-                None,
-                None,
-                None,
-                source_run_id,
-                self._serialize_json(report_spec_snapshot),
-                *[version_manifest.get(field) for field in REPORT_RUN_VERSION_FIELDS],
-            ),
+        for attempt in range(MAX_CREATE_RUN_ATTEMPTS):
+            run_sequence = self._next_run_sequence(report_output_id)
+            try:
+                database.query(
+                    f"""
+                    INSERT INTO report_output_runs (
+                        id, report_output_id, run_sequence, status, output, error_message,
+                        trigger_type, requested_at, started_at, finished_at, source_run_id,
+                        report_spec_snapshot_json, {", ".join(REPORT_RUN_VERSION_FIELDS)}
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        run_id,
+                        report_output_id,
+                        run_sequence,
+                        status,
+                        self._serialize_json(output),
+                        error_message,
+                        trigger_type,
+                        None,
+                        None,
+                        None,
+                        source_run_id,
+                        self._serialize_json(report_spec_snapshot),
+                        *[
+                            version_manifest.get(field)
+                            for field in REPORT_RUN_VERSION_FIELDS
+                        ],
+                    ),
+                )
+                return self.get_report_output_run(run_id)
+            except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as error:
+                if (
+                    attempt == MAX_CREATE_RUN_ATTEMPTS - 1
+                    or not self._is_sequence_conflict(error)
+                ):
+                    raise
+
+        raise RuntimeError(
+            f"Unable to allocate report output run sequence for #{report_output_id}"
         )
-        return self.get_report_output_run(run_id)
 
     def get_report_output_run(self, run_id: str) -> dict | None:
         row: Row | None = database.query(
@@ -124,7 +160,13 @@ def get_newest_report_output_run(self, report_output_id: int) -> dict | None:
 
     def select_display_run(self, report_output: dict) -> dict | None:
         if report_output.get("active_run_id"):
-            return self.get_report_output_run(report_output["active_run_id"])
+            active_run = self.get_report_output_run(report_output["active_run_id"])
+            if active_run is not None:
+                return active_run
         if report_output.get("latest_successful_run_id"):
-            return self.get_report_output_run(report_output["latest_successful_run_id"])
+            latest_successful_run = self.get_report_output_run(
+                report_output["latest_successful_run_id"]
+            )
+            if latest_successful_run is not None:
+                return latest_successful_run
         return self.get_newest_report_output_run(report_output["id"])
@@ -42,13 +42,116 @@ class EconomyReportSpec(BaseModel):
 
 
 class ReportSpecService:
+    def _validate_schema_version(self, schema_version: int | None) -> None:
+        if schema_version != REPORT_SPEC_SCHEMA_VERSION:
+            raise ValueError(
+                f"Unsupported report spec schema version: {schema_version}"
+            )
+
     def _get_report_output_row(self, report_output_id: int) -> dict | None:
         row: Row | None = database.query(
             "SELECT * FROM report_outputs WHERE id = ?",
             (report_output_id,),
         ).fetchone()
         return dict(row) if row is not None else None
 
+    def _validate_report_country(
+        self,
+        report_output: dict,
+        simulation_1: dict,
+        simulation_2: dict | None = None,
+    ) -> None:
+        report_country_id = report_output["country_id"]
+        if simulation_1["country_id"] != report_country_id:
+            raise ValueError(
+                "Simulation 1 country must match report output country to build a "
+                "report spec"
+            )
+        if simulation_2 is not None and simulation_2["country_id"] != report_country_id:
+            raise ValueError(
+                "Simulation 2 country must match report output country to build a "
+                "report spec"
+            )
+
+    def _build_household_report_spec(
+        self,
+        report_output: dict,
+        report_kind: str,
+        simulation_1: dict,
+        simulation_2: dict | None,
+        time_period: str,
+    ) -> HouseholdReportSpec:
+        if simulation_1["population_type"] != "household":
+            raise ValueError("Household report specs require household simulations")
+        if (
+            simulation_2 is not None
+            and simulation_2["population_id"] != simulation_1["population_id"]
+        ):
+            raise ValueError(
+                "Household comparison report specs require matching household IDs"
+            )
+
+        return HouseholdReportSpec.model_validate(
+            {
+                "country_id": report_output["country_id"],
+                "report_kind": report_kind,
+                "time_period": time_period,
+                "simulation_1": {
+                    "population_type": simulation_1["population_type"],
+                    "population_id": simulation_1["population_id"],
+                    "policy_id": simulation_1["policy_id"],
+                },
+                "simulation_2": (
+                    {
+                        "population_type": simulation_2["population_type"],
+                        "population_id": simulation_2["population_id"],
+                        "policy_id": simulation_2["policy_id"],
+                    }
+                    if simulation_2 is not None
+                    else None
+                ),
+            }
+        )
+
+    def _build_economy_report_spec(
+        self,
+        report_output: dict,
+        report_kind: str,
+        simulation_1: dict,
+        simulation_2: dict | None,
+        time_period: str,
+        dataset: str,
+        target: Literal["general", "cliff"],
+        options: dict[str, Any] | None,
+    ) -> EconomyReportSpec:
+        if simulation_1["population_type"] != "geography":
+            raise ValueError("Economy report specs require geography simulations")
+        if (
+            simulation_2 is not None
+            and simulation_2["population_id"] != simulation_1["population_id"]
+        ):
+            raise ValueError(
+                "Economy comparison report specs require matching geography IDs"
+            )
+
+        return EconomyReportSpec.model_validate(
+            {
+                "country_id": report_output["country_id"],
+                "report_kind": report_kind,
+                "time_period": time_period,
+                "region": simulation_1["population_id"],
+                "baseline_policy_id": simulation_1["policy_id"],
+                "reform_policy_id": (
+                    simulation_2["policy_id"]
+                    if simulation_2 is not None
+                    else simulation_1["policy_id"]
+                ),
+                "dataset": dataset,
+                "target": target,
+                "options": options or {},
+            }
+        )
+
     def infer_report_kind(
         self,
         simulation_1: dict,
@@ -88,46 +191,26 @@ def build_report_spec(
     ) -> ReportSpec:
         report_kind = self.infer_report_kind(simulation_1, simulation_2)
         time_period = report_output["year"]
+        self._validate_report_country(report_output, simulation_1, simulation_2)
 
         if report_kind in HOUSEHOLD_REPORT_KINDS:
-            return HouseholdReportSpec.model_validate(
-                {
-                    "country_id": report_output["country_id"],
-                    "report_kind": report_kind,
-                    "time_period": time_period,
-                    "simulation_1": {
-                        "population_type": simulation_1["population_type"],
-                        "population_id": simulation_1["population_id"],
-                        "policy_id": simulation_1["policy_id"],
-                    },
-                    "simulation_2": (
-                        {
-                            "population_type": simulation_2["population_type"],
-                            "population_id": simulation_2["population_id"],
-                            "policy_id": simulation_2["policy_id"],
-                        }
-                        if simulation_2 is not None
-                        else None
-                    ),
-                }
+            return self._build_household_report_spec(
+                report_output=report_output,
+                report_kind=report_kind,
+                simulation_1=simulation_1,
+                simulation_2=simulation_2,
+                time_period=time_period,
             )
 
-        return EconomyReportSpec.model_validate(
-            {
-                "country_id": report_output["country_id"],
-                "report_kind": report_kind,
-                "time_period": time_period,
-                "region": simulation_1["population_id"],
-                "baseline_policy_id": simulation_1["policy_id"],
-                "reform_policy_id": (
-                    simulation_2["policy_id"]
-                    if simulation_2 is not None
-                    else simulation_1["policy_id"]
-                ),
-                "dataset": dataset,
-                "target": target,
-                "options": options or {},
-            }
+        return self._build_economy_report_spec(
+            report_output=report_output,
+            report_kind=report_kind,
+            simulation_1=simulation_1,
+            simulation_2=simulation_2,
+            time_period=time_period,
+            dataset=dataset,
+            target=target,
+            options=options,
         )
 
     def _parse_json_field(self, value: str | dict | None) -> dict | None:
@@ -149,6 +232,7 @@ def get_report_spec(self, report_output_id: int) -> ReportSpec | None:
         if report_output is None or report_output["report_spec_json"] is None:
             return None
 
+        self._validate_schema_version(report_output["report_spec_schema_version"])
         raw_spec = self._parse_json_field(report_output["report_spec_json"])
         return self._parse_report_spec(report_output["report_kind"], raw_spec)
 
@@ -161,6 +245,7 @@ def set_report_spec(
     ) -> bool:
         if report_spec_status not in REPORT_SPEC_STATUSES:
             raise ValueError(f"Unsupported report spec status: {report_spec_status}")
+        self._validate_schema_version(schema_version)
 
         report_output = self._get_report_output_row(report_output_id)
         if report_output is None: