@@ -257,8 +257,10 @@ def __validate_jobs_need__(self) -> Self:
257257 f"{ self .name !r} ."
258258 )
259259
260- # NOTE: Set job ID to the job model.
261- self .jobs [job ].id = job
260+ # NOTE: Copy the job model and set job ID to the job model.
261+ job_model = self .jobs [job ].model_copy ()
262+ job_model .id = job
263+ self .jobs [job ] = job_model
262264
263265 # VALIDATE: Validate workflow name should not dynamic with params
264266 # template.
@@ -771,3 +773,217 @@ def execute(
771773 ).to_dict (),
772774 },
773775 )
776+
777+ def rerun (
778+ self ,
779+ context : DictData ,
780+ * ,
781+ parent_run_id : Optional [str ] = None ,
782+ event : Optional [Event ] = None ,
783+ timeout : float = 3600 ,
784+ max_job_parallel : int = 2 ,
785+ ) -> Result :
786+ """Re-Execute workflow with passing the error context data.
787+
788+ :param context: A context result that get the failed status.
789+ :param parent_run_id: (Optional[str]) A parent workflow running ID.
790+ :param event: (Event) An Event manager instance that use to cancel this
791+ execution if it forces stopped by parent execution.
792+ :param timeout: (float) A workflow execution time out in second unit
793+ that use for limit time of execution and waiting job dependency.
794+ This value does not force stop the task that still running more than
795+ this limit time. (Default: 60 * 60 seconds)
796+ :param max_job_parallel: (int) The maximum workers that use for job
797+ execution in `ThreadPoolExecutor` object. (Default: 2 workers)
798+
799+ :rtype: Result
800+ """
801+ ts : float = time .monotonic ()
802+
803+ result : Result = Result .construct_with_rs_or_id (
804+ parent_run_id = parent_run_id ,
805+ id_logic = self .name ,
806+ extras = self .extras ,
807+ )
808+ if context ["status" ] == SUCCESS :
809+ result .trace .info (
810+ "[WORKFLOW]: Does not rerun because it already executed with "
811+ "success status."
812+ )
813+ return result .catch (status = SUCCESS , context = context )
814+
815+ err = context ["errors" ]
816+ result .trace .info (f"[WORKFLOW]: Previous error: { err } " )
817+
818+ event : Event = event or Event ()
819+ max_job_parallel : int = dynamic (
820+ "max_job_parallel" , f = max_job_parallel , extras = self .extras
821+ )
822+ result .trace .info (
823+ f"[WORKFLOW]: Execute: { self .name !r} ("
824+ f"{ 'parallel' if max_job_parallel > 1 else 'sequential' } jobs)"
825+ )
826+ if not self .jobs :
827+ result .trace .warning (f"[WORKFLOW]: { self .name !r} does not set jobs" )
828+ return result .catch (status = SUCCESS , context = context )
829+
830+ # NOTE: Prepare the new context for rerun process.
831+ jobs : DictData = context .get ("jobs" )
832+ new_context : DictData = {
833+ "params" : context ["params" ].copy (),
834+ "jobs" : {j : jobs [j ] for j in jobs if jobs [j ]["status" ] == SUCCESS },
835+ }
836+
837+ total_job : int = 0
838+ job_queue : Queue = Queue ()
839+ for job_id in self .jobs :
840+
841+ if job_id in new_context ["jobs" ]:
842+ continue
843+
844+ job_queue .put (job_id )
845+ total_job += 1
846+
847+ if total_job == 0 :
848+ result .trace .warning ("[WORKFLOW]: It does not have job to rerun." )
849+ return result .catch (status = SUCCESS , context = context )
850+
851+ not_timeout_flag : bool = True
852+ statuses : list [Status ] = [WAIT ] * total_job
853+ skip_count : int = 0
854+ sequence_statuses : list [Status ] = []
855+ timeout : float = dynamic (
856+ "max_job_exec_timeout" , f = timeout , extras = self .extras
857+ )
858+
859+ result .catch (status = WAIT , context = new_context )
860+ if event and event .is_set ():
861+ return result .catch (
862+ status = CANCEL ,
863+ context = {
864+ "errors" : WorkflowCancelError (
865+ "Execution was canceled from the event was set before "
866+ "workflow execution."
867+ ).to_dict (),
868+ },
869+ )
870+
871+ with ThreadPoolExecutor (max_job_parallel , "wf" ) as executor :
872+ futures : list [Future ] = []
873+
874+ while not job_queue .empty () and (
875+ not_timeout_flag := ((time .monotonic () - ts ) < timeout )
876+ ):
877+ job_id : str = job_queue .get ()
878+ job : Job = self .job (name = job_id )
879+ if (check := job .check_needs (new_context ["jobs" ])) == WAIT :
880+ job_queue .task_done ()
881+ job_queue .put (job_id )
882+ time .sleep (0.15 )
883+ continue
884+ elif check == FAILED : # pragma: no cov
885+ return result .catch (
886+ status = FAILED ,
887+ context = {
888+ "status" : FAILED ,
889+ "errors" : WorkflowError (
890+ f"Validate job trigger rule was failed with "
891+ f"{ job .trigger_rule .value !r} ."
892+ ).to_dict (),
893+ },
894+ )
895+ elif check == SKIP : # pragma: no cov
896+ result .trace .info (
897+ f"[JOB]: Skip job: { job_id !r} from trigger rule."
898+ )
899+ job .set_outputs (output = {"status" : SKIP }, to = new_context )
900+ job_queue .task_done ()
901+ skip_count += 1
902+ continue
903+
904+ if max_job_parallel > 1 :
905+ futures .append (
906+ executor .submit (
907+ self .execute_job ,
908+ job = job ,
909+ params = new_context ,
910+ result = result ,
911+ event = event ,
912+ ),
913+ )
914+ job_queue .task_done ()
915+ continue
916+
917+ if len (futures ) < 1 :
918+ futures .append (
919+ executor .submit (
920+ self .execute_job ,
921+ job = job ,
922+ params = new_context ,
923+ result = result ,
924+ event = event ,
925+ )
926+ )
927+ elif (future := futures .pop (0 )).done ():
928+ if e := future .exception ():
929+ sequence_statuses .append (get_status_from_error (e ))
930+ else :
931+ st , _ = future .result ()
932+ sequence_statuses .append (st )
933+ job_queue .put (job_id )
934+ elif future .cancelled ():
935+ sequence_statuses .append (CANCEL )
936+ job_queue .put (job_id )
937+ elif future .running () or "state=pending" in str (future ):
938+ futures .insert (0 , future )
939+ job_queue .put (job_id )
940+ else : # pragma: no cov
941+ job_queue .put (job_id )
942+ futures .insert (0 , future )
943+ result .trace .warning (
944+ f"[WORKFLOW]: ... Execution non-threading not "
945+ f"handle: { future } ."
946+ )
947+
948+ job_queue .task_done ()
949+
950+ if not_timeout_flag :
951+ job_queue .join ()
952+ for total , future in enumerate (as_completed (futures ), start = 0 ):
953+ try :
954+ statuses [total ], _ = future .result ()
955+ except WorkflowError as e :
956+ statuses [total ] = get_status_from_error (e )
957+
958+ # NOTE: Update skipped status from the job trigger.
959+ for i in range (skip_count ):
960+ statuses [total + 1 + i ] = SKIP
961+
962+ # NOTE: Update status from none-parallel job execution.
963+ for i , s in enumerate (sequence_statuses , start = 0 ):
964+ statuses [total + 1 + skip_count + i ] = s
965+
966+ return result .catch (
967+ status = validate_statuses (statuses ), context = new_context
968+ )
969+
970+ event .set ()
971+ for future in futures :
972+ future .cancel ()
973+
974+ result .trace .error (
975+ f"[WORKFLOW]: { self .name !r} was timeout because it use exec "
976+ f"time more than { timeout } seconds."
977+ )
978+
979+ time .sleep (0.0025 )
980+
981+ return result .catch (
982+ status = FAILED ,
983+ context = {
984+ "errors" : WorkflowTimeoutError (
985+ f"{ self .name !r} was timeout because it use exec time more "
986+ f"than { timeout } seconds."
987+ ).to_dict (),
988+ },
989+ )
0 commit comments