Add budget-window coverage gaps

anth-volk · anth-volk · commit a1428a73676d · 2026-05-12T20:27:32.000+02:00
diff --git a/.github/scripts/modal-run-integ-tests.sh b/.github/scripts/modal-run-integ-tests.sh
@@ -1,13 +1,14 @@
 #!/bin/bash
 # Run simulation integration tests
-# Usage: ./modal-run-integ-tests.sh <environment> <base-url> [us-version]
+# Usage: ./modal-run-integ-tests.sh <environment> <base-url> [us-version] [uk-version]
 # Environment: beta runs all tests, prod excludes beta_only tests
 
 set -euo pipefail
 
 ENVIRONMENT="${1:?Environment required (beta or prod)}"
 BASE_URL="${2:?Base URL required}"
 US_VERSION="${3:-}"
+UK_VERSION="${4:-}"
 
 truthy() {
   case "${1:-}" in
@@ -115,6 +116,10 @@ if [ -n "$US_VERSION" ]; then
   export simulation_integ_test_us_model_version="$US_VERSION"
 fi
 
+if [ -n "$UK_VERSION" ]; then
+  export simulation_integ_test_uk_model_version="$UK_VERSION"
+fi
+
 if [ "$ENVIRONMENT" = "beta" ]; then
   echo "Running all simulation integration tests (including beta_only)"
   uv run pytest tests/simulation/ -v
diff --git a/.github/workflows/modal-deploy.reusable.yml b/.github/workflows/modal-deploy.reusable.yml
@@ -15,6 +15,12 @@ on:
       simulation_api_url:
         description: 'The deployed simulation API URL'
         value: ${{ jobs.deploy.outputs.simulation_api_url }}
+      us_version:
+        description: 'The deployed policyengine-us package version'
+        value: ${{ jobs.deploy.outputs.us_version }}
+      uk_version:
+        description: 'The deployed policyengine-uk package version'
+        value: ${{ jobs.deploy.outputs.uk_version }}
 
 jobs:
   deploy:
@@ -24,6 +30,7 @@ jobs:
     outputs:
       simulation_api_url: ${{ steps.get-url.outputs.simulation_api_url }}
       us_version: ${{ steps.versions.outputs.us_version }}
+      uk_version: ${{ steps.versions.outputs.uk_version }}
 
     steps:
     - name: Checkout repo
@@ -114,4 +121,4 @@ jobs:
         GATEWAY_AUTH_CLIENT_ID: ${{ secrets.GATEWAY_AUTH_CLIENT_ID }}
         GATEWAY_AUTH_CLIENT_SECRET: ${{ secrets.GATEWAY_AUTH_CLIENT_SECRET }}
         GATEWAY_AUTH_REQUIRED: ${{ vars.GATEWAY_AUTH_REQUIRED }}
-      run: .github/scripts/modal-run-integ-tests.sh "${{ inputs.environment }}" "${{ needs.deploy.outputs.simulation_api_url }}" "${{ needs.deploy.outputs.us_version }}"
+      run: .github/scripts/modal-run-integ-tests.sh "${{ inputs.environment }}" "${{ needs.deploy.outputs.simulation_api_url }}" "${{ needs.deploy.outputs.us_version }}" "${{ needs.deploy.outputs.uk_version }}"
diff --git a/projects/policyengine-api-simulation/tests/gateway/test_models.py b/projects/policyengine-api-simulation/tests/gateway/test_models.py
@@ -15,6 +15,10 @@
     SimulationRequest,
     JobSubmitResponse,
     JobStatusResponse,
+    _default_missing_state_tax_revenue_impact,
+    _enforce_max_payload_size,
+    _move_internal_telemetry_alias,
+    _strip_internal_passthrough_fields,
 )
 from tests.fixtures.budget_window_outputs import make_single_year_macro_output
 
@@ -113,6 +117,15 @@ def test_ping_response_serializes_correctly(self):
 class TestSimulationRequest:
     """Tests for SimulationRequest model."""
 
+    def test_request_pre_validators_ignore_non_dict_payloads(self):
+        """Defensive pre-validators must leave non-mapping input to Pydantic."""
+
+        value = "not-a-request-object"
+
+        assert _move_internal_telemetry_alias(value) == value
+        assert _strip_internal_passthrough_fields(value) == value
+        assert _enforce_max_payload_size(value) == value
+
     def test_simulation_request_requires_country(self):
         """
         Given no country
@@ -252,6 +265,13 @@ def test_simulation_request_rejects_payload_just_above_256kb(self):
         with pytest.raises(ValidationError, match="too large"):
             SimulationRequest(**payload)
 
+    def test_simulation_request_size_cap_defers_non_json_serializable_payloads(self):
+        """The size cap is best-effort; non-JSON objects fail later."""
+
+        payload = {("tuple", "key"): "not-json-serializable"}
+
+        assert _enforce_max_payload_size(payload) is payload
+
     def test_simulation_request_accepts_typed_telemetry_envelope(self):
         """
         Given a telemetry envelope
@@ -555,6 +575,34 @@ def test_budget_window_batch_submit_response_serializes_correctly(self):
 class TestBudgetWindowBatchStatusResponse:
     """Tests for budget-window batch status responses."""
 
+    def test_single_year_macro_output_budget_normalizer_defensive_branches(self):
+        """The state-tax default only applies to object outputs with budgets."""
+
+        raw_value = "not-a-macro-output"
+        assert _default_missing_state_tax_revenue_impact(raw_value) == raw_value
+
+        no_budget = {"poverty": {}}
+        assert _default_missing_state_tax_revenue_impact(no_budget) is no_budget
+
+        non_object_budget = {"budget": "not-an-object"}
+        assert (
+            _default_missing_state_tax_revenue_impact(non_object_budget)
+            is non_object_budget
+        )
+
+        existing_state_tax = {"budget": {"state_tax_revenue_impact": 12}}
+        assert (
+            _default_missing_state_tax_revenue_impact(existing_state_tax)
+            is existing_state_tax
+        )
+
+        missing_state_tax = {"budget": {"tax_revenue_impact": 12}}
+        normalized = _default_missing_state_tax_revenue_impact(missing_state_tax)
+
+        assert normalized is not missing_state_tax
+        assert missing_state_tax["budget"].get("state_tax_revenue_impact") is None
+        assert normalized["budget"]["state_tax_revenue_impact"] == 0.0
+
     def test_budget_window_result_requires_years_and_outputs_by_year(self):
         with pytest.raises(ValidationError):
             BudgetWindowResult(
diff --git a/projects/policyengine-api-simulation/tests/test_budget_window_results.py b/projects/policyengine-api-simulation/tests/test_budget_window_results.py
@@ -88,6 +88,55 @@ def test_validate_single_year_output_rejects_malformed_child_result():
         )
 
 
+def test_validate_single_year_output_rejects_non_object_child_result():
+    with pytest.raises(
+        ValueError,
+        match="Malformed budget-window child result: expected object for 2026",
+    ):
+        validate_single_year_output(
+            simulation_year="2026",
+            child_result="not-an-object",
+        )
+
+
+def test_validate_single_year_output_rejects_non_object_budget():
+    child_result = make_single_year_macro_output(
+        tax_revenue_impact=100,
+        state_tax_revenue_impact=40,
+        benefit_spending_impact=20,
+        budgetary_impact=80,
+    )
+    child_result["budget"] = "not-an-object"
+
+    with pytest.raises(
+        ValueError,
+        match="Malformed budget-window child result: missing budget object",
+    ):
+        validate_single_year_output(
+            simulation_year="2026",
+            child_result=child_result,
+        )
+
+
+def test_validate_single_year_output_wraps_model_shape_errors():
+    child_result = make_single_year_macro_output(
+        tax_revenue_impact=100,
+        state_tax_revenue_impact=40,
+        benefit_spending_impact=20,
+        budgetary_impact=80,
+    )
+    child_result["decile"] = "not-an-object"
+
+    with pytest.raises(
+        ValueError,
+        match="Malformed budget-window child result for 2026",
+    ):
+        validate_single_year_output(
+            simulation_year="2026",
+            child_result=child_result,
+        )
+
+
 def test_validate_single_year_output_rejects_malformed_state_tax_value():
     child_result = make_single_year_macro_output(
         tax_revenue_impact=100,
diff --git a/projects/policyengine-api-simulation/tests/test_budget_window_scheduler.py b/projects/policyengine-api-simulation/tests/test_budget_window_scheduler.py
@@ -15,10 +15,16 @@
 from fastapi.testclient import TestClient
 
 import src.modal.budget_window_batch as batch_module
+from src.modal.budget_window_context import BudgetWindowBatchContext
 import src.modal.budget_window_scheduler as scheduler_module
 import src.modal.budget_window_state as state_module
 from fixtures.gateway.shared import create_gateway_app
 from src.modal.gateway import endpoints
+from src.modal.gateway.models import (
+    BatchChildJobStatus,
+    BudgetWindowBatchRequest,
+    PolicyEngineBundle,
+)
 from tests.fixtures.budget_window_outputs import make_single_year_macro_output
 
 
@@ -298,3 +304,43 @@ def child_result_without_state_tax(simulation_year: str) -> dict:
     )
     assert body["result"]["totals"]["stateTaxRevenueImpact"] == 0.0
     assert body["result"]["totals"]["federalTaxRevenueImpact"] == 300.0
+
+
+def test_budget_window_runner_resolves_persisted_child_handle_fallback(
+    budget_window_semi_integration_client,
+):
+    _, runtime = budget_window_semi_integration_client
+    child_call = object()
+    runtime.calls["child-2026"] = child_call
+
+    request = BudgetWindowBatchRequest.model_validate(
+        {
+            "country": "us",
+            "region": "us",
+            "scope": "macro",
+            "reform": {},
+            "start_year": "2026",
+            "window_size": 1,
+            "max_parallel": 1,
+        }
+    )
+    context = BudgetWindowBatchContext(
+        batch_job_id="parent-resume-123",
+        request=request,
+        resolved_version="1.500.0",
+        resolved_app_name="policyengine-simulation-us1-500-0-uk2-66-0",
+        bundle=PolicyEngineBundle(model_version="1.500.0"),
+        raw_params=request.model_dump(mode="json"),
+    )
+    runner = scheduler_module.BudgetWindowBatchRunner(context)
+    runner.state.child_jobs["2026"] = BatchChildJobStatus(
+        job_id="child-2026",
+        status="running",
+    )
+
+    handle = runner.resolve_child_handle("2026")
+
+    assert handle.simulation_year == "2026"
+    assert handle.job_id == "child-2026"
+    assert handle.call is child_call
+    assert runner.child_handles["2026"] is handle
diff --git a/projects/policyengine-api-simulation/tests/test_modal_scripts.py b/projects/policyengine-api-simulation/tests/test_modal_scripts.py
@@ -439,6 +439,55 @@ def test_fails_when_auth_required_but_gateway_auth_vars_missing(self):
         assert result.returncode != 0
         assert "GATEWAY_AUTH_REQUIRED is enabled" in result.stderr
 
+    def test_exports_us_and_uk_model_versions_to_integration_tests(self, tmp_path):
+        """Deploy-extracted model versions should reach the pytest settings."""
+        uv_calls_log = tmp_path / "uv_calls.log"
+        fake_bin = tmp_path / "bin"
+        fake_bin.mkdir()
+        fake_uv = fake_bin / "uv"
+        fake_uv.write_text(
+            '#!/bin/bash\n'
+            'printf "%s|base=%s|us=%s|uk=%s\\n" "$*" '
+            '"${simulation_integ_test_base_url:-}" '
+            '"${simulation_integ_test_us_model_version:-}" '
+            '"${simulation_integ_test_uk_model_version:-}" >> "$UV_CALLS_LOG"\n'
+        )
+        fake_uv.chmod(0o755)
+
+        env = os.environ.copy()
+        env["PATH"] = f"{fake_bin}:{env['PATH']}"
+        env["UV_CALLS_LOG"] = str(uv_calls_log)
+        for key in (
+            "GATEWAY_AUTH_REQUIRED",
+            "GATEWAY_AUTH_ISSUER",
+            "GATEWAY_AUTH_AUDIENCE",
+            "GATEWAY_AUTH_CLIENT_ID",
+            "GATEWAY_AUTH_CLIENT_SECRET",
+        ):
+            env.pop(key, None)
+
+        result = subprocess.run(
+            [
+                "bash",
+                str(self.script),
+                "prod",
+                "https://example.com",
+                "1.690.7",
+                "2.88.14",
+            ],
+            capture_output=True,
+            text=True,
+            env=env,
+            cwd=REPO_ROOT,
+        )
+
+        assert result.returncode == 0, f"Script failed: {result.stderr}"
+        log = uv_calls_log.read_text()
+        assert "run pytest tests/simulation/ -v -m not beta_only" in log
+        assert "base=https://example.com" in log
+        assert "us=1.690.7" in log
+        assert "uk=2.88.14" in log
+
 
 class TestAllScriptsHaveShebang:
     """Verify all scripts have proper shebang and error handling."""
diff --git a/projects/policyengine-apis-integ/tests/simulation/conftest.py b/projects/policyengine-apis-integ/tests/simulation/conftest.py
@@ -34,6 +34,7 @@ class Settings(BaseSettings):
     timeout_in_millis: int = 600_000  # 10 minutes for full simulations
     poll_interval_seconds: float = 5.0
     us_model_version: str = "1.562.3"
+    uk_model_version: str = "2.88.14"
 
     model_config = SettingsConfigDict(
         env_prefix="simulation_integ_test_",
@@ -61,6 +62,12 @@ def us_model_version() -> str:
     return settings.us_model_version
 
 
+@pytest.fixture()
+def uk_model_version() -> str:
+    """Return the UK model version for testing specific version scenarios."""
+    return settings.uk_model_version
+
+
 @pytest.fixture()
 def poll_interval() -> float:
     """Return poll interval in seconds."""
diff --git a/projects/policyengine-apis-integ/tests/simulation/test_calculate.py b/projects/policyengine-apis-integ/tests/simulation/test_calculate.py
@@ -199,6 +199,7 @@ def test_calculate_specific_model(
 @pytest.mark.beta_only
 def test_calculate_uk_model(
     client: Client | AuthenticatedClient,
+    uk_model_version: str,
     max_wait_seconds: float,
     poll_interval: float,
 ):
@@ -211,6 +212,7 @@ def test_calculate_uk_model(
     request = SimulationRequest.from_dict(
         {
             "country": "uk",
+            "version": uk_model_version,
             "scope": "macro",
             "reform": {
                 "gov.hmrc.income_tax.rates.uk[0].rate": {"2023-01-01.2100-12-31": 0.21}
@@ -226,6 +228,9 @@ def test_calculate_uk_model(
     assert isinstance(submit_response, JobSubmitResponse), (
         f"Unexpected response type: {type(submit_response)}"
     )
+    assert submit_response.version == uk_model_version, (
+        f"Version mismatch: expected {uk_model_version}, got {submit_response.version}"
+    )
     job_id = submit_response.job_id
 
     # When - poll for completion