1111 run_manifest_path ,
1212 step_manifest_dir ,
1313)
14+ from policyengine_us_data .build_datasets import (
15+ empty_stage_1_status_snapshot ,
16+ read_stage_1_status_snapshot ,
17+ )
1418from policyengine_us_data .utils .error_redaction import (
1519 DEFAULT_ERROR_MESSAGE_MAX_CHARS ,
1620 bound_error_text ,
2428 read_latest_pipeline_error ,
2529 stage_ids_for_manifest ,
2630)
27- from modal_app .step_manifests .specs import RUN_MANIFEST_STEP_IDS , step_title
31+ from modal_app .step_manifests .specs import (
32+ BUILD_DATASETS ,
33+ RUN_MANIFEST_STEP_IDS ,
34+ step_title ,
35+ )
2836
2937PIPELINE_STATUS_SCHEMA_VERSION = "1"
3038DEFAULT_RUNS_LIMIT = 25
@@ -112,6 +120,7 @@ def _message(
112120 status : str ,
113121 stage_manifests : list [dict [str , Any ]],
114122 error : dict [str , Any ] | None ,
123+ stage_1_status : dict [str , Any ] | None = None ,
115124) -> str :
116125 if error :
117126 location = (
@@ -125,6 +134,19 @@ def _message(
125134 return "Pipeline run not found."
126135 if stage_manifests :
127136 latest = stage_manifests [- 1 ]
137+ current_stage_1 = (stage_1_status or {}).get ("current" ) or {}
138+ if (
139+ latest ["step_id" ] == BUILD_DATASETS .id
140+ and latest ["status" ] == "running"
141+ and current_stage_1
142+ ):
143+ substep_id = current_stage_1 .get ("substep_id" )
144+ title = current_stage_1 .get ("title" ) or substep_id
145+ substep_status = current_stage_1 .get ("status" , "unknown" )
146+ return (
147+ f"Pipeline { status } ; current Stage 1 substep "
148+ f"{ substep_id } ({ title } ) is { substep_status } ."
149+ )
128150 return (
129151 f"Pipeline { status } ; latest manifest "
130152 f"{ latest ['substage_id' ] or latest ['stage_id' ]} is { latest ['status' ]} ."
@@ -215,6 +237,11 @@ def _latest_manifest_payload(
215237 }
216238
217239
240+ def _stage_1_status_payload (run_dir : Path ) -> dict [str , Any ]:
241+ snapshot = read_stage_1_status_snapshot (run_dir )
242+ return _sanitize_error_value (snapshot .to_dict ())
243+
244+
218245def _run_index_item (
219246 run_id : str ,
220247 * ,
@@ -241,6 +268,7 @@ def _run_index_item(
241268 "hf_staging_prefix" : run_manifest .get ("hf_staging_prefix" ),
242269 "github_run_url" : (run_manifest .get ("run_context" ) or {}).get ("github_run_url" ),
243270 "latest_manifest" : _latest_manifest_payload (stage_manifests ),
271+ "stage_1_current" : (payload .get ("stage_1_status" ) or {}).get ("current" ),
244272 "progress" : {
245273 "expected_manifests" : len (expected ),
246274 "present_manifests" : len (stage_manifests ),
@@ -271,6 +299,7 @@ def _unreadable_run_index_item(run_id: str, exc: BaseException) -> dict[str, Any
271299 "hf_staging_prefix" : None ,
272300 "github_run_url" : None ,
273301 "latest_manifest" : None ,
302+ "stage_1_current" : None ,
274303 "progress" : {
275304 "expected_manifests" : 0 ,
276305 "present_manifests" : 0 ,
@@ -357,6 +386,7 @@ def build_pipeline_status_payload(
357386 "stage_manifests" : [],
358387 "missing_expected_manifest_ids" : [],
359388 "error" : None ,
389+ "stage_1_status" : empty_stage_1_status_snapshot ().to_dict (),
360390 }
361391
362392 run_dir = _run_dir (run_id , runs_dir )
@@ -371,6 +401,7 @@ def build_pipeline_status_payload(
371401 "stage_manifests" : [],
372402 "missing_expected_manifest_ids" : list (RUN_MANIFEST_STEP_IDS ),
373403 "error" : None ,
404+ "stage_1_status" : empty_stage_1_status_snapshot ().to_dict (),
374405 }
375406
376407 run_manifest = read_run_manifest (manifest_path )
@@ -391,6 +422,7 @@ def build_pipeline_status_payload(
391422 run_manifest .error
392423 )
393424 status = run_manifest .status
425+ stage_1_status = _stage_1_status_payload (run_dir )
394426 return {
395427 "schema_version" : PIPELINE_STATUS_SCHEMA_VERSION ,
396428 "run_id" : run_id ,
@@ -399,11 +431,13 @@ def build_pipeline_status_payload(
399431 status = status ,
400432 stage_manifests = stage_manifests ,
401433 error = error ,
434+ stage_1_status = stage_1_status ,
402435 ),
403436 "run_manifest" : _run_manifest_payload (run_manifest ),
404437 "stage_manifests" : stage_manifests ,
405438 "missing_expected_manifest_ids" : missing_expected ,
406439 "error" : error ,
440+ "stage_1_status" : stage_1_status ,
407441 "updated_at" : run_manifest .updated_at ,
408442 "modal_app_name" : run_manifest .modal_app_name ,
409443 "modal_environment" : run_manifest .modal_environment ,
0 commit comments