Skip to content
This repository was archived by the owner on Jun 14, 2026. It is now read-only.

Commit af546b8

Browse files
committed
Add mp-300k gate input packaging
1 parent b627e16 commit af546b8

4 files changed

Lines changed: 384 additions & 4 deletions

File tree

.github/workflows/mp300k-artifact-gates.yml

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@ on:
77
- main
88
workflow_dispatch:
99
inputs:
10+
gate_inputs_artifact:
11+
description: Optional Actions artifact name containing artifact.tar.gz and evidence JSONs.
12+
required: false
13+
default: ""
14+
type: string
1015
artifact_archive_url:
1116
description: URL to a .zip, .tar, or .tar.gz artifact bundle containing manifest.json.
12-
required: true
17+
required: false
18+
default: ""
1319
type: string
1420
ecps_comparison_url:
1521
description: Optional URL to precomputed PE-native eCPS comparison JSON.
@@ -45,8 +51,13 @@ on:
4551
type: boolean
4652
workflow_call:
4753
inputs:
54+
gate_inputs_artifact:
55+
required: false
56+
default: ""
57+
type: string
4858
artifact_archive_url:
49-
required: true
59+
required: false
60+
default: ""
5061
type: string
5162
ecps_comparison_url:
5263
required: false
@@ -75,6 +86,7 @@ on:
7586
type: boolean
7687

7788
permissions:
89+
actions: read
7890
contents: read
7991

8092
jobs:
@@ -108,7 +120,8 @@ jobs:
108120
- name: Test artifact gate implementation
109121
run: |
110122
uv run --python 3.13 --extra dev --with pydantic --with-editable ../microplex pytest -q \
111-
tests/pipelines/test_mp300k_artifact_gates.py
123+
tests/pipelines/test_mp300k_artifact_gates.py \
124+
tests/pipelines/test_mp300k_gate_inputs.py
112125
113126
artifact-gates:
114127
if: github.event_name == 'workflow_dispatch' || github.event_name == 'workflow_call'
@@ -137,9 +150,21 @@ jobs:
137150
- name: Set up uv
138151
uses: astral-sh/setup-uv@v6
139152

140-
- name: Download artifact and evidence
153+
- name: Download packaged gate inputs
154+
if: inputs.gate_inputs_artifact != ''
155+
uses: actions/download-artifact@v4
156+
with:
157+
name: ${{ inputs.gate_inputs_artifact }}
158+
path: gate-inputs
159+
160+
- name: Download artifact and evidence from URLs
161+
if: inputs.gate_inputs_artifact == ''
141162
run: |
142163
mkdir -p ../gate-inputs/evidence
164+
if [ -z "${{ inputs.artifact_archive_url }}" ]; then
165+
echo "Either gate_inputs_artifact or artifact_archive_url is required."
166+
exit 1
167+
fi
143168
curl --fail --location "${{ inputs.artifact_archive_url }}" --output ../gate-inputs/artifact-archive
144169
145170
if [ -n "${{ inputs.ecps_comparison_url }}" ]; then
@@ -152,6 +177,38 @@ jobs:
152177
curl --fail --location "${{ inputs.benchmark_manifest_url }}" --output ../gate-inputs/evidence/benchmark_manifest.json
153178
fi
154179
180+
- name: Normalize gate inputs
181+
run: |
182+
uv run --python 3.13 python - <<'PY'
183+
import shutil
184+
from pathlib import Path
185+
186+
root = Path("../gate-inputs")
187+
evidence_dir = root / "evidence"
188+
evidence_dir.mkdir(parents=True, exist_ok=True)
189+
190+
archive_target = root / "artifact-archive"
191+
if not archive_target.exists():
192+
archive_candidates = []
193+
for pattern in ("artifact.tar.gz", "artifact.tgz", "artifact.tar", "artifact.zip", "*.tar.gz", "*.tgz", "*.tar", "*.zip"):
194+
archive_candidates.extend(root.glob(pattern))
195+
archive_candidates = [
196+
path for path in archive_candidates if path.is_file()
197+
]
198+
if not archive_candidates:
199+
raise SystemExit(
200+
"Packaged gate inputs did not contain artifact.tar.gz, "
201+
"artifact.tgz, artifact.tar, or artifact.zip"
202+
)
203+
shutil.copyfile(archive_candidates[0], archive_target)
204+
205+
for name in ("ecps_comparison", "runtime_smoke", "benchmark_manifest"):
206+
source = root / f"{name}.json"
207+
destination = evidence_dir / f"{name}.json"
208+
if source.exists() and not destination.exists():
209+
shutil.copyfile(source, destination)
210+
PY
211+
155212
- name: Resolve artifact directory
156213
run: |
157214
uv run --python 3.13 python - <<'PY'

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ microplex-us-backfill-pe-native-audit = "microplex_us.pipelines.backfill_pe_nati
3737
microplex-us-backfill-pe-native-scores = "microplex_us.pipelines.backfill_pe_native_scores:main"
3838
microplex-us-check-site-snapshot = "microplex_us.pipelines.check_site_snapshot:main"
3939
microplex-us-mp300k-artifact-gates = "microplex_us.pipelines.mp300k_artifact_gates:main"
40+
microplex-us-package-mp300k-gate-inputs = "microplex_us.pipelines.mp300k_gate_inputs:main"
4041
microplex-us-pe-native-target-diagnostics = "microplex_us.pipelines.pe_native_scores:main_target_diagnostics"
4142
microplex-us-score-pe-native-loss = "microplex_us.pipelines.pe_native_scores:main"
4243
microplex-us-version-bump-benchmark = "microplex_us.pipelines.version_benchmark:main"
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
"""Package mp-300k artifact-gate inputs for CI handoff."""
2+
3+
from __future__ import annotations
4+
5+
import argparse
6+
import hashlib
7+
import json
8+
import shutil
9+
import tarfile
10+
from datetime import UTC, datetime
11+
from pathlib import Path
12+
from typing import Any
13+
14+
15+
def package_mp300k_gate_inputs(
16+
artifact_dir: str | Path,
17+
output_dir: str | Path,
18+
*,
19+
candidate_dataset_path: str | Path | None = None,
20+
ecps_comparison_path: str | Path | None = None,
21+
runtime_smoke_path: str | Path | None = None,
22+
benchmark_manifest_path: str | Path | None = None,
23+
archive_name: str = "artifact.tar.gz",
24+
) -> dict[str, Any]:
25+
"""Package an artifact archive plus gate evidence for GitHub Actions.
26+
27+
The output directory is intended to be uploaded as a single Actions artifact
28+
and consumed by ``mp300k-artifact-gates.yml`` through ``gate_inputs_artifact``.
29+
"""
30+
31+
artifact_root = Path(artifact_dir).expanduser()
32+
output_root = Path(output_dir).expanduser()
33+
manifest_path = artifact_root / "manifest.json"
34+
manifest = _load_manifest(manifest_path)
35+
candidate_dataset = _resolve_candidate_dataset_path(
36+
artifact_root,
37+
manifest,
38+
candidate_dataset_path,
39+
)
40+
if not candidate_dataset.exists():
41+
raise FileNotFoundError(f"candidate dataset not found: {candidate_dataset}")
42+
43+
output_root.mkdir(parents=True, exist_ok=True)
44+
archive_path = output_root / archive_name
45+
stage_parent = output_root / ".staging"
46+
if stage_parent.exists():
47+
shutil.rmtree(stage_parent)
48+
stage_root = stage_parent / artifact_root.name
49+
stage_root.mkdir(parents=True)
50+
51+
candidate_relpath = _candidate_archive_relpath(
52+
manifest,
53+
candidate_dataset=candidate_dataset,
54+
explicit_candidate_path=candidate_dataset_path,
55+
)
56+
staged_candidate = stage_root / candidate_relpath
57+
staged_candidate.parent.mkdir(parents=True, exist_ok=True)
58+
shutil.copy2(candidate_dataset, staged_candidate)
59+
60+
staged_manifest = _manifest_for_archive(
61+
manifest,
62+
source_artifact_dir=artifact_root,
63+
source_candidate_dataset=candidate_dataset,
64+
candidate_relpath=candidate_relpath,
65+
)
66+
_write_json(stage_root / "manifest.json", staged_manifest)
67+
_write_archive(archive_path, stage_root)
68+
69+
evidence = {
70+
"ecps_comparison": _copy_optional_evidence(
71+
ecps_comparison_path,
72+
output_root / "ecps_comparison.json",
73+
),
74+
"runtime_smoke": _copy_optional_evidence(
75+
runtime_smoke_path,
76+
output_root / "runtime_smoke.json",
77+
),
78+
"benchmark_manifest": _copy_optional_evidence(
79+
benchmark_manifest_path,
80+
output_root / "benchmark_manifest.json",
81+
),
82+
}
83+
metadata = {
84+
"schema_version": 1,
85+
"generated_at": datetime.now(UTC).isoformat(),
86+
"source_artifact_dir": str(artifact_root.resolve()),
87+
"source_manifest": _file_descriptor(manifest_path),
88+
"source_candidate_dataset": _file_descriptor(candidate_dataset),
89+
"artifact_archive": _file_descriptor(archive_path),
90+
"evidence": evidence,
91+
"workflow_call": {
92+
"uses": "./.github/workflows/mp300k-artifact-gates.yml",
93+
"with": {"gate_inputs_artifact": output_root.name},
94+
},
95+
}
96+
_write_json(output_root / "gate_inputs.json", metadata)
97+
shutil.rmtree(stage_parent)
98+
return metadata
99+
100+
101+
def _load_manifest(path: Path) -> dict[str, Any]:
102+
if not path.exists():
103+
raise FileNotFoundError(f"manifest not found: {path}")
104+
return json.loads(path.read_text())
105+
106+
107+
def _resolve_candidate_dataset_path(
108+
artifact_root: Path,
109+
manifest: dict[str, Any],
110+
explicit_path: str | Path | None,
111+
) -> Path:
112+
if explicit_path is not None:
113+
return Path(explicit_path).expanduser()
114+
artifacts = dict(manifest.get("artifacts", {}))
115+
dataset_name = artifacts.get("policyengine_dataset")
116+
if not isinstance(dataset_name, str) or not dataset_name:
117+
raise ValueError(
118+
"manifest.artifacts.policyengine_dataset is required when "
119+
"candidate_dataset_path is not supplied"
120+
)
121+
dataset_path = Path(dataset_name).expanduser()
122+
if not dataset_path.is_absolute():
123+
dataset_path = artifact_root / dataset_path
124+
return dataset_path
125+
126+
127+
def _candidate_archive_relpath(
128+
manifest: dict[str, Any],
129+
*,
130+
candidate_dataset: Path,
131+
explicit_candidate_path: str | Path | None,
132+
) -> Path:
133+
if explicit_candidate_path is not None:
134+
return Path(candidate_dataset.name)
135+
dataset_name = dict(manifest.get("artifacts", {})).get("policyengine_dataset")
136+
if isinstance(dataset_name, str) and dataset_name:
137+
relpath = Path(dataset_name)
138+
if not relpath.is_absolute():
139+
return relpath
140+
return Path(candidate_dataset.name)
141+
142+
143+
def _manifest_for_archive(
144+
manifest: dict[str, Any],
145+
*,
146+
source_artifact_dir: Path,
147+
source_candidate_dataset: Path,
148+
candidate_relpath: Path,
149+
) -> dict[str, Any]:
150+
updated = dict(manifest)
151+
artifacts = dict(updated.get("artifacts", {}))
152+
artifacts["policyengine_dataset"] = str(candidate_relpath)
153+
updated["artifacts"] = artifacts
154+
updated["mp300k_gate_inputs"] = {
155+
"packaged_at": datetime.now(UTC).isoformat(),
156+
"source_artifact_dir": str(source_artifact_dir.resolve()),
157+
"source_candidate_dataset": str(source_candidate_dataset.resolve()),
158+
}
159+
return updated
160+
161+
162+
def _copy_optional_evidence(
163+
source_path: str | Path | None,
164+
destination_path: Path,
165+
) -> dict[str, Any] | None:
166+
if source_path is None:
167+
return None
168+
source = Path(source_path).expanduser()
169+
if not source.exists():
170+
raise FileNotFoundError(f"evidence file not found: {source}")
171+
shutil.copy2(source, destination_path)
172+
return _file_descriptor(destination_path)
173+
174+
175+
def _write_archive(archive_path: Path, stage_root: Path) -> None:
176+
with tarfile.open(archive_path, "w:gz") as archive:
177+
archive.add(stage_root, arcname=stage_root.name, recursive=True)
178+
179+
180+
def _write_json(path: Path, payload: dict[str, Any]) -> None:
181+
path.parent.mkdir(parents=True, exist_ok=True)
182+
path.write_text(json.dumps(payload, indent=2, sort_keys=True))
183+
184+
185+
def _file_descriptor(path: Path) -> dict[str, Any]:
186+
return {
187+
"path": str(path.resolve()),
188+
"size_bytes": path.stat().st_size,
189+
"sha256": _sha256_file(path),
190+
}
191+
192+
193+
def _sha256_file(path: Path) -> str:
194+
digest = hashlib.sha256()
195+
with path.open("rb") as handle:
196+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
197+
digest.update(chunk)
198+
return digest.hexdigest()
199+
200+
201+
def main(argv: list[str] | None = None) -> int:
202+
parser = argparse.ArgumentParser(
203+
description="Package mp-300k artifact-gate inputs for CI."
204+
)
205+
parser.add_argument("--artifact-dir", required=True)
206+
parser.add_argument("--output-dir", required=True)
207+
parser.add_argument("--candidate-dataset")
208+
parser.add_argument("--ecps-comparison-json")
209+
parser.add_argument("--runtime-smoke-json")
210+
parser.add_argument("--benchmark-manifest")
211+
parser.add_argument("--archive-name", default="artifact.tar.gz")
212+
args = parser.parse_args(argv)
213+
214+
package_mp300k_gate_inputs(
215+
args.artifact_dir,
216+
args.output_dir,
217+
candidate_dataset_path=args.candidate_dataset,
218+
ecps_comparison_path=args.ecps_comparison_json,
219+
runtime_smoke_path=args.runtime_smoke_json,
220+
benchmark_manifest_path=args.benchmark_manifest,
221+
archive_name=args.archive_name,
222+
)
223+
print(Path(args.output_dir).expanduser() / "gate_inputs.json")
224+
return 0
225+
226+
227+
if __name__ == "__main__":
228+
raise SystemExit(main())

0 commit comments

Comments
 (0)