44
55namespace Flow \ETL ;
66
7- use function Flow \ETL \DSL \{refs , to_output };
7+ use function Flow \ETL \DSL \{analyze , refs , to_output };
88use Flow \ETL \DataFrame \GroupedDataFrame ;
99use Flow \ETL \Dataset \{Report , Statistics };
1010use Flow \ETL \Exception \{InvalidArgumentException , RuntimeException };
@@ -692,20 +692,29 @@ public function rows(Transformer|Transformation $transformer) : self
692692 /**
693693 * @trigger
694694 *
695+ * When analyzing pipeline execution we can chose to collect various metrics through analyze()->with*() method
696+ *
697+ * - column statistics - analyze()->withColumnStatistics()
698+ * - schema - analyze()->withSchema()
699+ *
695700 * @param null|callable(Rows $rows, FlowContext $context): void $callback
696- * @param bool $analyze - when set to true, run will return Report
701+ * @param Analyze|bool $analyze - when set run will return Report
702+ *
703+ * @return ($analyze is Analyze|true ? Report : null)
697704 */
698- public function run (?callable $ callback = null , bool $ analyze = false ) : ?Report
705+ public function run (?callable $ callback = null , bool | Analyze $ analyze = false ) : ?Report
699706 {
700707 $ clone = clone $ this ;
701708
702709 $ totalRows = 0 ;
703- $ schema = new Schema ();
710+
711+ $ analyze = $ analyze === true ? analyze () : $ analyze ;
704712
705713 if ($ analyze ) {
706714 $ startedAt = $ this ->context ->config ->clock ()->now ();
707715 $ startTime = Statistics \HighResolutionTime::now ();
708- $ columnStatistics = new Statistics \Columns ();
716+ $ columnStatistics = $ analyze ->collectColumnStatistics () ? new Statistics \Columns () : null ;
717+ $ schema = $ analyze ->collectSchema () ? new Schema () : null ;
709718 }
710719
711720 foreach ($ clone ->pipeline ->process ($ clone ->context ) as $ rows ) {
@@ -714,12 +723,17 @@ public function run(?callable $callback = null, bool $analyze = false) : ?Report
714723 }
715724
716725 if ($ analyze ) {
717- $ schema = $ schema ->merge ($ rows ->schema ());
718726 $ totalRows += $ rows ->count ();
719727
720- foreach ($ rows ->all () as $ row ) {
721- foreach ($ row ->entries ()->all () as $ entry ) {
722- $ columnStatistics ->add ($ entry );
728+ if ($ schema !== null ) {
729+ $ schema = $ schema ->merge ($ rows ->schema ());
730+ }
731+
732+ if ($ columnStatistics !== null ) {
733+ foreach ($ rows ->all () as $ row ) {
734+ foreach ($ row ->entries ()->all () as $ entry ) {
735+ $ columnStatistics ->add ($ entry );
736+ }
723737 }
724738 }
725739 }
0 commit comments