@@ -107,6 +107,7 @@ def items(self) -> ItemsView[str, DatasetCreationResults | SkippedStageResult]:
107107
108108 @property
109109 def final_result (self ) -> DatasetCreationResults :
110+ """Return the final stage result, or raise if it was skipped."""
110111 return self ._require_final_result ()
111112
112113 def _require_final_result (self ) -> DatasetCreationResults :
@@ -118,33 +119,41 @@ def _require_final_result(self) -> DatasetCreationResults:
118119 return result
119120
120121 def load_dataset (self ) -> pd .DataFrame :
122+ """Load the selected output from the final workflow stage."""
121123 self ._require_final_result ()
122124 return self .load_stage_output (self .final_stage_name )
123125
124126 def load_analysis (self ) -> DatasetProfilerResults :
127+ """Load analysis from the final stage result."""
125128 return self .final_result .load_analysis ()
126129
127130 def count_records (self ) -> int :
131+ """Count records in the selected output from the final workflow stage."""
128132 self ._require_final_result ()
129133 return self .count_stage_output_records (self .final_stage_name )
130134
131135 def get_stage_output_path (self , stage_name : str ) -> Path :
136+ """Return the selected output path handed downstream for a stage."""
132137 result = self .stage_results [stage_name ]
133138 if isinstance (result , SkippedStageResult ):
134139 raise DataDesignerWorkflowError (f"Stage { stage_name !r} was skipped: { result .status .value } ." )
135140 return self ._stage_output_paths .get (stage_name , result .artifact_storage .final_dataset_path )
136141
137142 def load_stage_output (self , stage_name : str ) -> pd .DataFrame :
143+ """Load the selected output handed downstream for a stage."""
138144 return _load_parquet_dataset (self .get_stage_output_path (stage_name ))
139145
140146 def count_stage_output_records (self , stage_name : str ) -> int :
147+ """Count records in the selected output handed downstream for a stage."""
141148 return _count_parquet_records (self .get_stage_output_path (stage_name ))
142149
143150 def export (self , path : Path | str , * , format : ExportFormat | None = None ) -> Path :
151+ """Export the selected output from the final workflow stage."""
144152 self ._require_final_result ()
145153 return _export_parquet_dataset (self .get_stage_output_path (self .final_stage_name ), Path (path ), format = format )
146154
147155 def push_to_hub (self , * args : Any , ** kwargs : Any ) -> str :
156+ """Push the final stage result to Hugging Face Hub when no output override is selected."""
148157 final_result = self .final_result
149158 if self .get_stage_output_path (self .final_stage_name ) != final_result .artifact_storage .final_dataset_path :
150159 raise DataDesignerWorkflowError (
@@ -155,7 +164,10 @@ def push_to_hub(self, *args: Any, **kwargs: Any) -> str:
155164
156165
157166class CompositeWorkflow :
167+ """Experimental linear workflow for chaining Data Designer stages."""
168+
158169 def __init__ (self , * , name : str , data_designer : DataDesigner ) -> None :
170+ """Create a workflow bound to a parent Data Designer instance."""
159171 _validate_dir_name (name , "workflow name" )
160172 self .name = name
161173 self ._data_designer = data_designer
@@ -210,11 +222,16 @@ def add_stage(
210222 return self
211223
212224 def run (self ) -> CompositeWorkflowResults :
213- """Run all stages from scratch, replacing deterministic stage directories."""
225+ """Run all stages from scratch.
226+
227+ Each stage writes a deterministic artifact directory under the parent
228+ Data Designer artifact path. Downstream stages are seeded from the
229+ selected output of the previous stage.
230+ """
214231 if not self ._stages :
215232 raise DataDesignerWorkflowError (f"Workflow { self .name !r} has no stages." )
216233
217- workflow_path = self ._data_designer ._artifact_path / self .name
234+ workflow_path = self ._data_designer .artifact_path / self .name
218235 workflow_path .mkdir (parents = True , exist_ok = True )
219236 metadata : dict [str , Any ] = {
220237 "name" : self .name ,
0 commit comments