Skip to content

Commit c245899

Browse files
committed
Add Stage 5 published artifact index
1 parent ea2976d commit c245899

9 files changed

Lines changed: 931 additions & 21 deletions

File tree

changelog.d/1044.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added the Stage 5 published artifact index JSONL artifact.

docs/engineering/stages/release_promotion.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,18 @@ Runtime step manifests for `5_validate_and_promote_release` should include the
122122
contract as a JSON `contract` output. They may still record legacy validated
123123
input artifacts for compatibility, but the contract is the preferred semantic
124124
entry point for Stage 5 status and lineage.
125+
126+
## Published Artifact Index
127+
128+
Stage 5 also writes `published_artifact_index.jsonl` under the run-local
129+
`diagnostics/` directory. Each JSONL row describes one promoted artifact or
130+
release metadata artifact with its canonical `run_id`, candidate version,
131+
release version, source-stage metadata, final Hugging Face URI, and GCS URI
132+
when the artifact is mirrored to GCS.
133+
134+
Build index rows from typed release candidate and promotion-result objects, not
135+
from console logs. Release manifest entries may supply final checksum, size,
136+
revision, and kind fields for promoted data artifacts; the index should leave
137+
the release manifest schema unchanged. The release promotion contract must
138+
reference the index as a `published_artifact_index` output so dashboards and AI
139+
systems can discover the per-artifact rows from the Stage 5 contract.

modal_app/pipeline.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -586,19 +586,26 @@ def _write_release_promotion_contract_for_run(
586586
run_context: RunContext,
587587
rel_paths: list[str],
588588
promotion_result,
589-
) -> ArtifactReference:
590-
"""Write Stage 5's run-local contract and return its manifest reference."""
589+
) -> tuple[ArtifactReference, ...]:
590+
"""Write Stage 5's run-local index/contract and return manifest references."""
591591

592592
from policyengine_us_data.release_promotion import (
593593
build_legacy_release_candidate_bundle,
594+
build_published_artifact_index,
595+
published_artifact_index_artifact_ref,
596+
published_artifact_index_path,
597+
release_promotion_contract_repo_path,
594598
release_promotion_contract_path,
599+
write_published_artifact_index,
595600
write_release_promotion_contract,
596601
)
602+
from policyengine_us_data.stage_contracts import ArtifactRef
597603

598604
run_dir = _run_dir(run_context.run_id)
605+
release_context = _release_promotion_context_from_run_context(run_context)
599606
contract_path = release_promotion_contract_path(run_dir)
600607
candidate_bundle = build_legacy_release_candidate_bundle(
601-
context=_release_promotion_context_from_run_context(run_context),
608+
context=release_context,
602609
rel_paths=rel_paths,
603610
artifact_metadata_by_path=_release_artifact_metadata_by_path(
604611
run_context.run_id,
@@ -608,23 +615,61 @@ def _write_release_promotion_contract_for_run(
608615
run_context.run_id
609616
),
610617
)
618+
contract_artifact = ArtifactRef(
619+
logical_name="release_promotion_contract",
620+
uri=(
621+
f"hf://{release_context.hf_repo_name}/"
622+
f"{release_promotion_contract_repo_path(release_context.run_id)}"
623+
),
624+
media_type="application/json",
625+
metadata={
626+
"artifact_family": "stage_contract",
627+
"source_stage_id": "5_validate_and_promote_release",
628+
"relative_path": release_promotion_contract_repo_path(
629+
release_context.run_id
630+
),
631+
},
632+
)
633+
published_index_path = published_artifact_index_path(run_dir)
634+
published_index_rows = build_published_artifact_index(
635+
candidate_bundle=candidate_bundle,
636+
promotion_result=promotion_result,
637+
diagnostic_artifacts=(contract_artifact,),
638+
)
639+
write_published_artifact_index(published_index_rows, published_index_path)
640+
published_index_manifest_ref = ArtifactReference.from_path(
641+
published_index_path,
642+
role="index",
643+
base_dir=run_dir,
644+
media_type="application/jsonl",
645+
)
646+
published_index_artifact = published_artifact_index_artifact_ref(
647+
release_context,
648+
row_count=len(published_index_rows),
649+
sha256=f"sha256:{published_index_manifest_ref.sha256}",
650+
size_bytes=published_index_manifest_ref.size_bytes,
651+
)
611652
write_release_promotion_contract(
612653
contract_path=contract_path,
613654
candidate_bundle=candidate_bundle,
614655
promotion_result=promotion_result,
615656
created_at=datetime.now(timezone.utc).isoformat(),
616657
code_sha=meta.sha,
617658
package_version=meta.version,
659+
published_artifact_index=published_index_artifact,
618660
metadata={
619661
"writer": "modal_app.pipeline.promote_run",
620662
"branch": meta.branch,
621663
},
622664
)
623-
return ArtifactReference.from_path(
624-
contract_path,
625-
role="contract",
626-
base_dir=run_dir,
627-
media_type="application/json",
665+
return (
666+
ArtifactReference.from_path(
667+
contract_path,
668+
role="contract",
669+
base_dir=run_dir,
670+
media_type="application/json",
671+
),
672+
published_index_manifest_ref,
628673
)
629674

630675

@@ -2154,7 +2199,7 @@ def promote_run(
21542199
)
21552200
print(f" {promotion_stdout}")
21562201
promotion_result = _promotion_result_from_stdout(promotion_stdout)
2157-
release_promotion_contract_ref = _write_release_promotion_contract_for_run(
2202+
release_promotion_refs = _write_release_promotion_contract_for_run(
21582203
meta=meta,
21592204
run_context=promotion_context,
21602205
rel_paths=rel_paths,
@@ -2172,7 +2217,7 @@ def promote_run(
21722217
ArtifactReference.from_dict(artifact)
21732218
for artifact in promote_inputs["validated_step_outputs"]
21742219
],
2175-
release_promotion_contract_ref,
2220+
*release_promotion_refs,
21762221
],
21772222
reuse_decision="computed",
21782223
vol=pipeline_volume,

policyengine_us_data/release_promotion/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@
3131
release_promotion_contract_repo_path,
3232
write_release_promotion_contract,
3333
)
34+
from .published_index import (
35+
PUBLISHED_ARTIFACT_INDEX_FILENAME,
36+
PUBLISHED_ARTIFACT_INDEX_MEDIA_TYPE,
37+
PublishedArtifactIndexRow,
38+
build_published_artifact_index,
39+
published_artifact_index_artifact_ref,
40+
published_artifact_index_from_jsonl,
41+
published_artifact_index_path,
42+
published_artifact_index_repo_path,
43+
published_artifact_index_to_jsonl,
44+
read_published_artifact_index,
45+
write_published_artifact_index,
46+
)
3447
from .results import (
3548
CleanupPromotionResult,
3649
CompletionMarkerPromotionResult,
@@ -55,11 +68,14 @@
5568
"RELEASE_VALIDATION_SUBSTAGE_ID",
5669
"RELEASE_PROMOTION_CONTRACT_FILENAME",
5770
"RELEASE_PROMOTION_CONTRACT_TYPE",
71+
"PUBLISHED_ARTIFACT_INDEX_FILENAME",
72+
"PUBLISHED_ARTIFACT_INDEX_MEDIA_TYPE",
5873
"CleanupPromotionResult",
5974
"CompletionMarkerPromotionResult",
6075
"FullPromotionResult",
6176
"GcsPromotionResult",
6277
"HuggingFacePromotionResult",
78+
"PublishedArtifactIndexRow",
6379
"ReleaseArtifactSpec",
6480
"ReleaseCandidateInputBundle",
6581
"ReleasePromotionContractBuilder",
@@ -69,6 +85,7 @@
6985
"ReleaseManifestPromotionResult",
7086
"VersionManifestPromotionResult",
7187
"build_legacy_release_candidate_bundle",
88+
"build_published_artifact_index",
7289
"build_release_promotion_contract",
7390
"build_release_candidate_bundle_from_stage4_contract",
7491
"build_release_candidate_shape_report",
@@ -78,9 +95,16 @@
7895
"infer_release_artifact_spec",
7996
"logical_name_for_release_path",
8097
"normalize_release_path",
98+
"published_artifact_index_artifact_ref",
99+
"published_artifact_index_from_jsonl",
100+
"published_artifact_index_path",
101+
"published_artifact_index_repo_path",
102+
"published_artifact_index_to_jsonl",
81103
"release_promotion_contract_path",
82104
"release_promotion_contract_repo_path",
105+
"read_published_artifact_index",
83106
"read_stage4_release_candidate_bundle",
84107
"strip_staging_prefix",
108+
"write_published_artifact_index",
85109
"write_release_promotion_contract",
86110
]

policyengine_us_data/release_promotion/contract.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,18 @@ class ReleasePromotionContractBuilder:
7878
package_version: str | None = None
7979
validation: ValidationReport | None = None
8080
diagnostics: Sequence[DiagnosticRef] = ()
81+
published_artifact_index: ArtifactRef | None = None
8182
metadata: Mapping[str, Any] = field(default_factory=dict)
8283

8384
def __post_init__(self) -> None:
8485
if not isinstance(self.candidate_bundle, ReleaseCandidateInputBundle):
8586
raise ValueError("candidate_bundle must be ReleaseCandidateInputBundle")
8687
if not isinstance(self.promotion_result, FullPromotionResult):
8788
raise ValueError("promotion_result must be FullPromotionResult")
89+
if self.published_artifact_index is not None and not isinstance(
90+
self.published_artifact_index, ArtifactRef
91+
):
92+
raise ValueError("published_artifact_index must be ArtifactRef")
8893
object.__setattr__(
8994
self,
9095
"diagnostics",
@@ -100,10 +105,15 @@ def build(self) -> StageContract:
100105

101106
context = self.candidate_bundle.context
102107
inputs = _contract_inputs(self.candidate_bundle)
103-
outputs = _contract_outputs(context, self.promotion_result)
108+
outputs = _contract_outputs(
109+
context,
110+
self.promotion_result,
111+
published_artifact_index=self.published_artifact_index,
112+
)
104113
parameters = _contract_parameters(
105114
self.candidate_bundle,
106115
self.promotion_result,
116+
published_artifact_index=self.published_artifact_index,
107117
)
108118
return StageContract(
109119
contract_type=RELEASE_PROMOTION_CONTRACT_TYPE,
@@ -122,6 +132,11 @@ def build(self) -> StageContract:
122132
"context": context.to_dict(),
123133
"candidate_bundle": self.candidate_bundle.to_dict(),
124134
"promotion_result": self.promotion_result.to_dict(),
135+
"published_artifact_index": (
136+
self.published_artifact_index.to_dict()
137+
if self.published_artifact_index is not None
138+
else None
139+
),
125140
"outputs": [output.to_dict() for output in outputs],
126141
}
127142
),
@@ -152,6 +167,7 @@ def build_release_promotion_contract(
152167
package_version: str | None = None,
153168
validation: ValidationReport | None = None,
154169
diagnostics: Sequence[DiagnosticRef] = (),
170+
published_artifact_index: ArtifactRef | None = None,
155171
metadata: Mapping[str, Any] | None = None,
156172
) -> StageContract:
157173
"""Build the Stage 5 release promotion contract."""
@@ -164,6 +180,7 @@ def build_release_promotion_contract(
164180
package_version=package_version,
165181
validation=validation,
166182
diagnostics=diagnostics,
183+
published_artifact_index=published_artifact_index,
167184
metadata=metadata or {},
168185
).build()
169186

@@ -178,6 +195,7 @@ def write_release_promotion_contract(
178195
package_version: str | None = None,
179196
validation: ValidationReport | None = None,
180197
diagnostics: Sequence[DiagnosticRef] = (),
198+
published_artifact_index: ArtifactRef | None = None,
181199
metadata: Mapping[str, Any] | None = None,
182200
) -> StageContract:
183201
"""Build, write, and return the Stage 5 release promotion contract."""
@@ -190,6 +208,7 @@ def write_release_promotion_contract(
190208
package_version=package_version,
191209
validation=validation,
192210
diagnostics=diagnostics,
211+
published_artifact_index=published_artifact_index,
193212
metadata=metadata,
194213
)
195214
write_contract(contract, contract_path)
@@ -266,13 +285,15 @@ def _contract_inputs(
266285
def _contract_outputs(
267286
context: ReleasePromotionContext,
268287
result: FullPromotionResult,
288+
*,
289+
published_artifact_index: ArtifactRef | None = None,
269290
) -> tuple[ArtifactRef, ...]:
270291
hf_base = f"hf://{context.hf_repo_name}"
271292
completion_marker_path = (
272293
result.completion_marker.marker_path
273294
or f"releases/{context.release_version}/release-complete.json"
274295
)
275-
return (
296+
outputs = (
276297
ArtifactRef(
277298
logical_name="huggingface_release_artifacts",
278299
uri=f"{hf_base}/",
@@ -339,11 +360,16 @@ def _contract_outputs(
339360
metadata={"artifact_family": "release_completion_marker"},
340361
),
341362
)
363+
if published_artifact_index is not None:
364+
outputs = (*outputs, published_artifact_index)
365+
return outputs
342366

343367

344368
def _contract_parameters(
345369
candidate_bundle: ReleaseCandidateInputBundle,
346370
result: FullPromotionResult,
371+
*,
372+
published_artifact_index: ArtifactRef | None = None,
347373
) -> dict[str, Any]:
348374
context = candidate_bundle.context
349375
return {
@@ -363,6 +389,9 @@ def _contract_parameters(
363389
"source_output_contract_path": candidate_bundle.source_output_contract_path,
364390
"validation_report_paths": list(candidate_bundle.validation_report_paths),
365391
"diagnostics_manifest_path": candidate_bundle.diagnostics_manifest_path,
392+
"published_artifact_index_path": _artifact_relative_path(
393+
published_artifact_index
394+
),
366395
}
367396

368397

@@ -374,6 +403,7 @@ def _contract_metadata(
374403
outputs: Sequence[ArtifactRef],
375404
extra: Mapping[str, Any],
376405
) -> dict[str, Any]:
406+
outputs_by_name = {output.logical_name: output for output in outputs}
377407
return {
378408
**dict(extra),
379409
"contract_file": RELEASE_PROMOTION_CONTRACT_FILENAME,
@@ -383,10 +413,22 @@ def _contract_metadata(
383413
"cleanup": promotion_result.cleanup.to_dict(),
384414
"already_finalized": promotion_result.already_finalized,
385415
"promotion_result": promotion_result.to_dict(),
416+
"published_artifact_index": (
417+
outputs_by_name["published_artifact_index"].to_dict()
418+
if "published_artifact_index" in outputs_by_name
419+
else None
420+
),
386421
"public_refs": {output.logical_name: output.uri for output in outputs},
387422
}
388423

389424

425+
def _artifact_relative_path(artifact: ArtifactRef | None) -> str | None:
426+
if artifact is None:
427+
return None
428+
relative_path = artifact.metadata.get("relative_path")
429+
return relative_path if isinstance(relative_path, str) and relative_path else None
430+
431+
390432
def _execution_record(result: FullPromotionResult) -> ExecutionRecord:
391433
return ExecutionRecord(
392434
status="completed",
@@ -411,6 +453,16 @@ def _substage_records(
411453
promotion_result: FullPromotionResult,
412454
) -> tuple[SubstageRecord, ...]:
413455
outputs_by_name = {artifact.logical_name: artifact for artifact in public_outputs}
456+
finalization_outputs = [
457+
outputs_by_name["release_manifest"],
458+
outputs_by_name["versioned_release_manifest"],
459+
outputs_by_name["trace_tro"],
460+
outputs_by_name["versioned_trace_tro"],
461+
outputs_by_name["version_manifest"],
462+
outputs_by_name["release_completion_marker"],
463+
]
464+
if "published_artifact_index" in outputs_by_name:
465+
finalization_outputs.append(outputs_by_name["published_artifact_index"])
414466
return (
415467
SubstageRecord(
416468
substage_id="5a_validate_outputs",
@@ -442,14 +494,7 @@ def _substage_records(
442494
SubstageRecord(
443495
substage_id="5d_write_version_manifest",
444496
status="completed",
445-
outputs=(
446-
outputs_by_name["release_manifest"],
447-
outputs_by_name["versioned_release_manifest"],
448-
outputs_by_name["trace_tro"],
449-
outputs_by_name["versioned_trace_tro"],
450-
outputs_by_name["version_manifest"],
451-
outputs_by_name["release_completion_marker"],
452-
),
497+
outputs=tuple(finalization_outputs),
453498
reuse_mode="handoff",
454499
metadata={
455500
"version_manifest_updated": promotion_result.version_manifest.updated,

0 commit comments

Comments
 (0)