Skip to content

Commit 614f233

Browse files
authored
Merge pull request #475 from PolicyEngine/fix/job-not-found-404
Fix missing simulation job polling status
2 parents 47baf2a + 9bf5b42 commit 614f233

3 files changed

Lines changed: 185 additions & 4 deletions

File tree

projects/policyengine-api-simulation/fixtures/gateway/test_endpoints.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class MockFunctionCall:
3030
"""Mock for Modal FunctionCall returned by spawn."""
3131

3232
registry = {}
33+
from_id_errors = {}
3334

3435
def __init__(self, object_id: str = "mock-job-id-123"):
3536
self.object_id = object_id
@@ -47,6 +48,8 @@ def get(self, timeout: int = 0):
4748

4849
@classmethod
4950
def from_id(cls, object_id: str):
51+
if object_id in cls.from_id_errors:
52+
raise cls.from_id_errors[object_id]
5053
if object_id not in cls.registry:
5154
raise KeyError(object_id)
5255
return cls.registry[object_id]
@@ -64,6 +67,11 @@ def __init__(self):
6467
def bind(self, app_name: str, func_name: str) -> "BoundMockFunction":
6568
return BoundMockFunction(self, app_name, func_name)
6669

70+
def call_for(self, object_id: str) -> MockFunctionCall:
71+
call = MockFunctionCall(object_id=object_id)
72+
self.last_call = call
73+
return call
74+
6775

6876
class BoundMockFunction:
6977
"""Function handle returned by Modal.Function.from_name."""
@@ -84,6 +92,14 @@ def spawn(self, payload: dict) -> MockFunctionCall:
8492
return self.recorder.last_call
8593

8694

95+
class MockModalException:
96+
class NotFoundError(Exception):
97+
pass
98+
99+
class OutputExpiredError(Exception):
100+
pass
101+
102+
87103
@pytest.fixture
88104
def mock_modal(monkeypatch):
89105
"""Patch Modal calls in the gateway endpoints module."""
@@ -93,6 +109,7 @@ def mock_modal(monkeypatch):
93109
mock_func = MockFunction()
94110
mock_dicts = {}
95111
MockFunctionCall.registry = {}
112+
MockFunctionCall.from_id_errors = {}
96113

97114
class MockModalDict:
98115
@staticmethod
@@ -113,11 +130,14 @@ class MockModal:
113130
Dict = MockModalDict
114131
Function = MockModalFunction
115132
FunctionCall = MockFunctionCall
133+
exception = MockModalException
116134

117135
monkeypatch.setattr(endpoints, "modal", MockModal)
118136
monkeypatch.setattr(budget_window_state, "modal", MockModal)
119137

120138
return {
121139
"func": mock_func,
122140
"dicts": mock_dicts,
141+
"function_call": MockFunctionCall,
142+
"exception": MockModalException,
123143
}

projects/policyengine-api-simulation/src/modal/gateway/endpoints.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,24 @@ def _job_metadata_store():
6161
return modal.Dict.from_name(JOB_METADATA_DICT_NAME, create_if_missing=True)
6262

6363

64+
def _modal_exception_class(name: str):
65+
exception_module = getattr(modal, "exception", None)
66+
if exception_module is None:
67+
return None
68+
return getattr(exception_module, name, None)
69+
70+
71+
def _is_modal_exception(exc: BaseException, name: str) -> bool:
72+
exception_class = _modal_exception_class(name)
73+
return exception_class is not None and isinstance(exc, exception_class)
74+
75+
76+
def _is_modal_job_not_found(exc: BaseException) -> bool:
77+
return _is_modal_exception(exc, "NotFoundError") or _is_modal_exception(
78+
exc, "OutputExpiredError"
79+
)
80+
81+
6482
def _build_policyengine_bundle(
6583
country: str, resolved_version: str, payload: dict
6684
) -> PolicyEngineBundle:
@@ -267,12 +285,16 @@ async def get_job_status(job_id: str):
267285
- 500 with status="failed" and error on failure
268286
- 404 if job_id not found
269287
"""
270-
try:
271-
call = modal.FunctionCall.from_id(job_id)
272-
except Exception:
288+
job_metadata = _job_metadata_store().get(job_id)
289+
if job_metadata is None:
273290
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
274291

275-
job_metadata = _job_metadata_store().get(job_id)
292+
try:
293+
call = modal.FunctionCall.from_id(job_id)
294+
except Exception as exc:
295+
if _is_modal_job_not_found(exc):
296+
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
297+
raise
276298

277299
try:
278300
result = call.get(timeout=0)
@@ -282,6 +304,8 @@ async def get_job_status(job_id: str):
282304
except TimeoutError:
283305
return running_job_response(job_metadata)
284306
except Exception as exc:
307+
if _is_modal_job_not_found(exc):
308+
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
285309
redacted = log_and_redact_exception(
286310
exc,
287311
scope="simulation_job_status",

projects/policyengine-api-simulation/tests/gateway/test_endpoints.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,143 @@ def test__given_submitted_job_with_telemetry__then_polling_echoes_run_id(
433433
assert response.status_code == 200
434434
assert response.json()["run_id"] == "run-123"
435435

436+
def test__given_unknown_job_id__then_polling_returns_404(
437+
self, mock_modal, client: TestClient
438+
):
439+
"""
440+
Given a job id that the gateway never issued
441+
When polling job status
442+
Then the gateway returns 404 before asking Modal for a call result.
443+
"""
444+
response = client.get("/jobs/unknown-job-id")
445+
446+
assert response.status_code == 404
447+
assert response.json()["detail"] == "Job not found: unknown-job-id"
448+
449+
def test__given_lazy_modal_call_without_metadata__then_polling_returns_404(
450+
self, mock_modal, client: TestClient
451+
):
452+
"""
453+
Given Modal can construct a FunctionCall handle for an arbitrary id
454+
When the gateway has no metadata for that id
455+
Then the gateway still treats it as not found.
456+
"""
457+
mock_modal["func"].call_for("auth-smoke-probe-does-not-exist")
458+
459+
response = client.get("/jobs/auth-smoke-probe-does-not-exist")
460+
461+
assert response.status_code == 404
462+
assert (
463+
response.json()["detail"]
464+
== "Job not found: auth-smoke-probe-does-not-exist"
465+
)
466+
467+
def test__given_running_job__then_polling_returns_202(
468+
self, mock_modal, client: TestClient
469+
):
470+
mock_modal["dicts"]["simulation-api-us-versions"] = {
471+
"latest": "1.500.0",
472+
"1.500.0": "policyengine-simulation-us1-500-0-uk2-66-0",
473+
}
474+
475+
submit_response = client.post(
476+
"/simulate/economy/comparison",
477+
json={
478+
"country": "us",
479+
"scope": "macro",
480+
"reform": {},
481+
},
482+
)
483+
job_id = submit_response.json()["job_id"]
484+
mock_modal["func"].last_call.running = True
485+
486+
response = client.get(f"/jobs/{job_id}")
487+
488+
assert response.status_code == 202
489+
assert response.json()["status"] == "running"
490+
491+
def test__given_expired_modal_output__then_polling_returns_404(
492+
self, mock_modal, client: TestClient
493+
):
494+
mock_modal["dicts"]["simulation-api-us-versions"] = {
495+
"latest": "1.500.0",
496+
"1.500.0": "policyengine-simulation-us1-500-0-uk2-66-0",
497+
}
498+
499+
submit_response = client.post(
500+
"/simulate/economy/comparison",
501+
json={
502+
"country": "us",
503+
"scope": "macro",
504+
"reform": {},
505+
},
506+
)
507+
job_id = submit_response.json()["job_id"]
508+
mock_modal["func"].last_call.error = mock_modal[
509+
"exception"
510+
].OutputExpiredError()
511+
512+
# Modal's FastAPI job queue example maps OutputExpiredError to 404:
513+
# https://modal.com/docs/guide/job-queue#integration-with-web-frameworks
514+
response = client.get(f"/jobs/{job_id}")
515+
516+
assert response.status_code == 404
517+
assert response.json()["detail"] == f"Job not found: {job_id}"
518+
519+
def test__given_modal_call_not_found__then_polling_returns_404(
520+
self, mock_modal, client: TestClient
521+
):
522+
mock_modal["dicts"]["simulation-api-us-versions"] = {
523+
"latest": "1.500.0",
524+
"1.500.0": "policyengine-simulation-us1-500-0-uk2-66-0",
525+
}
526+
527+
submit_response = client.post(
528+
"/simulate/economy/comparison",
529+
json={
530+
"country": "us",
531+
"scope": "macro",
532+
"reform": {},
533+
},
534+
)
535+
job_id = submit_response.json()["job_id"]
536+
mock_modal["function_call"].from_id_errors[job_id] = mock_modal[
537+
"exception"
538+
].NotFoundError()
539+
540+
response = client.get(f"/jobs/{job_id}")
541+
542+
assert response.status_code == 404
543+
assert response.json()["detail"] == f"Job not found: {job_id}"
544+
545+
def test__given_worker_error__then_polling_returns_redacted_500(
546+
self, mock_modal, client: TestClient
547+
):
548+
mock_modal["dicts"]["simulation-api-us-versions"] = {
549+
"latest": "1.500.0",
550+
"1.500.0": "policyengine-simulation-us1-500-0-uk2-66-0",
551+
}
552+
553+
submit_response = client.post(
554+
"/simulate/economy/comparison",
555+
json={
556+
"country": "us",
557+
"scope": "macro",
558+
"reform": {},
559+
},
560+
)
561+
job_id = submit_response.json()["job_id"]
562+
mock_modal["func"].last_call.error = RuntimeError("worker crashed")
563+
564+
response = client.get(f"/jobs/{job_id}")
565+
566+
assert response.status_code == 500
567+
body = response.json()
568+
assert body["status"] == "failed"
569+
assert body["error"].startswith("Simulation failed")
570+
assert "correlation_id=" in body["error"]
571+
assert "worker crashed" not in body["error"]
572+
436573

437574
class TestBudgetWindowBatchEndpoints:
438575
"""Tests for budget-window batch gateway endpoints."""

0 commit comments

Comments
 (0)