@@ -17,6 +17,7 @@ def package_mp300k_gate_inputs(
1717 output_dir : str | Path ,
1818 * ,
1919 candidate_dataset_path : str | Path | None = None ,
20+ baseline_dataset_path : str | Path | None = None ,
2021 ecps_comparison_path : str | Path | None = None ,
2122 runtime_smoke_path : str | Path | None = None ,
2223 benchmark_manifest_path : str | Path | None = None ,
@@ -39,6 +40,11 @@ def package_mp300k_gate_inputs(
3940 )
4041 if not candidate_dataset .exists ():
4142 raise FileNotFoundError (f"candidate dataset not found: { candidate_dataset } " )
43+ baseline_dataset = _resolve_baseline_dataset_path (
44+ artifact_root ,
45+ manifest ,
46+ baseline_dataset_path ,
47+ )
4248
4349 output_root .mkdir (parents = True , exist_ok = True )
4450 archive_path = output_root / archive_name
@@ -56,12 +62,26 @@ def package_mp300k_gate_inputs(
5662 staged_candidate = stage_root / candidate_relpath
5763 staged_candidate .parent .mkdir (parents = True , exist_ok = True )
5864 shutil .copy2 (candidate_dataset , staged_candidate )
65+ baseline_relpath = None
66+ if baseline_dataset is not None :
67+ if not baseline_dataset .exists ():
68+ raise FileNotFoundError (f"baseline dataset not found: { baseline_dataset } " )
69+ baseline_relpath = _baseline_archive_relpath (
70+ manifest ,
71+ baseline_dataset = baseline_dataset ,
72+ explicit_baseline_path = baseline_dataset_path ,
73+ )
74+ staged_baseline = stage_root / baseline_relpath
75+ staged_baseline .parent .mkdir (parents = True , exist_ok = True )
76+ shutil .copy2 (baseline_dataset , staged_baseline )
5977
6078 staged_manifest = _manifest_for_archive (
6179 manifest ,
6280 source_artifact_dir = artifact_root ,
6381 source_candidate_dataset = candidate_dataset ,
6482 candidate_relpath = candidate_relpath ,
83+ source_baseline_dataset = baseline_dataset ,
84+ baseline_relpath = baseline_relpath ,
6585 )
6686 _write_json (stage_root / "manifest.json" , staged_manifest )
6787 _write_archive (archive_path , stage_root )
@@ -86,6 +106,9 @@ def package_mp300k_gate_inputs(
86106 "source_artifact_dir" : str (artifact_root .resolve ()),
87107 "source_manifest" : _file_descriptor (manifest_path ),
88108 "source_candidate_dataset" : _file_descriptor (candidate_dataset ),
109+ "source_baseline_dataset" : (
110+ _file_descriptor (baseline_dataset ) if baseline_dataset is not None else None
111+ ),
89112 "artifact_archive" : _file_descriptor (archive_path ),
90113 "evidence" : evidence ,
91114 "workflow_call" : {
@@ -124,6 +147,24 @@ def _resolve_candidate_dataset_path(
124147 return dataset_path
125148
126149
150+ def _resolve_baseline_dataset_path (
151+ artifact_root : Path ,
152+ manifest : dict [str , Any ],
153+ explicit_path : str | Path | None ,
154+ ) -> Path | None :
155+ if explicit_path is not None :
156+ return Path (explicit_path ).expanduser ()
157+ value = dict (manifest .get ("config" , {})).get ("policyengine_baseline_dataset" )
158+ if value is None :
159+ return None
160+ if not isinstance (value , str ) or not value :
161+ raise ValueError ("config.policyengine_baseline_dataset must be a path string" )
162+ baseline_path = Path (value ).expanduser ()
163+ if not baseline_path .is_absolute ():
164+ baseline_path = artifact_root / baseline_path
165+ return baseline_path
166+
167+
127168def _candidate_archive_relpath (
128169 manifest : dict [str , Any ],
129170 * ,
@@ -140,21 +181,48 @@ def _candidate_archive_relpath(
140181 return Path (candidate_dataset .name )
141182
142183
184+ def _baseline_archive_relpath (
185+ manifest : dict [str , Any ],
186+ * ,
187+ baseline_dataset : Path ,
188+ explicit_baseline_path : str | Path | None ,
189+ ) -> Path :
190+ if explicit_baseline_path is not None :
191+ return Path ("baseline" ) / baseline_dataset .name
192+ value = dict (manifest .get ("config" , {})).get ("policyengine_baseline_dataset" )
193+ if isinstance (value , str ) and value :
194+ relpath = Path (value )
195+ if not relpath .is_absolute ():
196+ return relpath
197+ return Path ("baseline" ) / baseline_dataset .name
198+
199+
143200def _manifest_for_archive (
144201 manifest : dict [str , Any ],
145202 * ,
146203 source_artifact_dir : Path ,
147204 source_candidate_dataset : Path ,
148205 candidate_relpath : Path ,
206+ source_baseline_dataset : Path | None ,
207+ baseline_relpath : Path | None ,
149208) -> dict [str , Any ]:
150209 updated = dict (manifest )
151210 artifacts = dict (updated .get ("artifacts" , {}))
152211 artifacts ["policyengine_dataset" ] = str (candidate_relpath )
153212 updated ["artifacts" ] = artifacts
213+ config = dict (updated .get ("config" , {}))
214+ if baseline_relpath is not None :
215+ config ["policyengine_baseline_dataset" ] = str (baseline_relpath )
216+ updated ["config" ] = config
154217 updated ["mp300k_gate_inputs" ] = {
155218 "packaged_at" : datetime .now (UTC ).isoformat (),
156219 "source_artifact_dir" : str (source_artifact_dir .resolve ()),
157220 "source_candidate_dataset" : str (source_candidate_dataset .resolve ()),
221+ "source_baseline_dataset" : (
222+ str (source_baseline_dataset .resolve ())
223+ if source_baseline_dataset is not None
224+ else None
225+ ),
158226 }
159227 return updated
160228
@@ -205,6 +273,7 @@ def main(argv: list[str] | None = None) -> int:
205273 parser .add_argument ("--artifact-dir" , required = True )
206274 parser .add_argument ("--output-dir" , required = True )
207275 parser .add_argument ("--candidate-dataset" )
276+ parser .add_argument ("--baseline-dataset" )
208277 parser .add_argument ("--ecps-comparison-json" )
209278 parser .add_argument ("--runtime-smoke-json" )
210279 parser .add_argument ("--benchmark-manifest" )
@@ -215,6 +284,7 @@ def main(argv: list[str] | None = None) -> int:
215284 args .artifact_dir ,
216285 args .output_dir ,
217286 candidate_dataset_path = args .candidate_dataset ,
287+ baseline_dataset_path = args .baseline_dataset ,
218288 ecps_comparison_path = args .ecps_comparison_json ,
219289 runtime_smoke_path = args .runtime_smoke_json ,
220290 benchmark_manifest_path = args .benchmark_manifest ,
0 commit comments