Skip to content

Commit ac14daa

Browse files
committed
Fix cleanup parent status convergence
1 parent 7f68ddd commit ac14daa

2 files changed

Lines changed: 205 additions & 75 deletions

File tree

inc/Cleanup/DataMachineJobCleanupRunEvidenceStore.php

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,17 @@ public function read( string $run_id, bool $include_evidence = false, bool $incl
3737
$child_jobs = $this->get_cleanup_run_descendant_jobs($job_id);
3838
$aggregate = $this->aggregate_cleanup_child_jobs($child_jobs);
3939
$children = $aggregate['children'];
40-
$state = $this->cleanup_run_state( (string) ( $job['status'] ?? '' ), $children);
40+
$parent_status = (string) ( $job['status'] ?? '' );
41+
$state = $this->cleanup_run_state($parent_status, $children, $parent_result, $aggregate);
4142

4243
$children_for_output = ( $include_evidence || $include_details ) ? $children : $this->summarize_cleanup_children($children);
4344
$output = array(
4445
'success' => true,
4546
'state' => $state,
4647
'run_id' => $this->cleanup_run_id($job_id),
4748
'job_id' => $job_id,
48-
'status' => in_array($state, array( 'children_processing', 'partial_failed' ), true) ? $state : ( $job['status'] ?? '' ),
49+
'status' => $state,
50+
'parent_status' => $parent_status,
4951
'created_at' => $job['created_at'] ?? '',
5052
'parent_completed_at' => $job['completed_at'] ?? '',
5153
'artifact_cleanup' => $aggregate['artifact_cleanup'],
@@ -132,6 +134,7 @@ private function aggregate_cleanup_child_jobs( array $child_jobs ): array {
132134
'processing' => 0,
133135
'completed' => 0,
134136
'failed' => 0,
137+
'skipped' => 0,
135138
'running' => 0,
136139
'total' => 0,
137140
'statuses' => array(),
@@ -218,6 +221,7 @@ private function summarize_cleanup_children( array $children ): array {
218221
'processing' => (int) ( $children['processing'] ?? 0 ),
219222
'completed' => (int) ( $children['completed'] ?? 0 ),
220223
'failed' => (int) ( $children['failed'] ?? 0 ),
224+
'skipped' => (int) ( $children['skipped'] ?? 0 ),
221225
'running' => (int) ( $children['running'] ?? 0 ),
222226
'total' => (int) ( $children['total'] ?? 0 ),
223227
'statuses' => (array) ( $children['statuses'] ?? array() ),
@@ -266,7 +270,7 @@ private function cleanup_run_drain_summary( int $job_id, string $state, array $c
266270
}
267271

268272
return array(
269-
'needed' => in_array($state, array( 'running', 'children_processing' ), true),
273+
'needed' => in_array($state, array( 'running', 'waiting_on_children' ), true),
270274
'commands' => $commands,
271275
'active_child_job_ids' => $active_child_ids,
272276
'bytes_reclaimed' => (int) ( $cleanup_items['bytes_reclaimed'] ?? 0 ),
@@ -478,6 +482,10 @@ private function count_cleanup_child_status( array &$children, string $status ):
478482
++$children['failed'];
479483
return;
480484
}
485+
if ( str_starts_with($status, 'skipped') ) {
486+
++$children['skipped'];
487+
return;
488+
}
481489
if ( str_starts_with($status, 'completed') ) {
482490
++$children['completed'];
483491
}
@@ -574,22 +582,69 @@ private function sum_cleanup_rows_bytes( array $rows, array $fields ): int {
574582
* @param array $children Child summary.
575583
* @return string
576584
*/
577-
private function cleanup_run_state( string $status, array $children ): string {
585+
private function cleanup_run_state( string $status, array $children, array $parent_result, array $aggregate ): string {
578586
$parent_state = $this->cleanup_job_state($status);
579-
if ( in_array($parent_state, array( 'cancelled', 'partial_failure' ), true) ) {
587+
if ( in_array($parent_state, array( 'cancelled', 'failed' ), true) ) {
580588
return $parent_state;
581589
}
582590

583591
if ( (int) ( $children['running'] ?? 0 ) > 0 ) {
584-
return 'children_processing';
592+
return 'waiting_on_children';
585593
}
586594
if ( (int) ( $children['failed'] ?? 0 ) > 0 ) {
587-
return 'partial_failed';
595+
return 'failed';
596+
}
597+
598+
$child_total = (int) ( $children['total'] ?? 0 );
599+
$terminal_child = (int) ( $children['completed'] ?? 0 ) + (int) ( $children['skipped'] ?? 0 );
600+
if ( $child_total > 0 && $terminal_child >= $child_total ) {
601+
return 'complete';
602+
}
603+
604+
if ( 0 === $child_total && $this->cleanup_parent_has_no_planned_work($parent_result, $aggregate) ) {
605+
return 'no_work';
588606
}
589607

590608
return $parent_state;
591609
}
592610

611+
/**
612+
* Determine whether parent cleanup evidence proves there was no work to schedule.
613+
*
614+
* @param array $parent_result Parent system task result.
615+
* @param array $aggregate Child aggregate.
616+
* @return bool
617+
*/
618+
private function cleanup_parent_has_no_planned_work( array $parent_result, array $aggregate ): bool {
619+
if ( array() === $parent_result ) {
620+
return false;
621+
}
622+
623+
$cleanup_items = (array) ( $aggregate['cleanup_items'] ?? array() );
624+
if ( (int) ( $cleanup_items['planned_rows'] ?? 0 ) > 0 ) {
625+
return false;
626+
}
627+
628+
if ( array_key_exists('chunks', $parent_result) && array() !== (array) $parent_result['chunks'] ) {
629+
return false;
630+
}
631+
632+
if ( isset($parent_result['evidence']) && is_array($parent_result['evidence']) && (int) ( $parent_result['evidence']['planned_chunks'] ?? 0 ) > 0 ) {
633+
return false;
634+
}
635+
636+
if ( isset($parent_result['chunk_row_counts']) && is_array($parent_result['chunk_row_counts']) ) {
637+
foreach ( $parent_result['chunk_row_counts'] as $count ) {
638+
if ( (int) $count > 0 ) {
639+
return false;
640+
}
641+
}
642+
return true;
643+
}
644+
645+
return array_key_exists('chunks', $parent_result) || array_key_exists('chunk_row_counts', $parent_result);
646+
}
647+
593648
/**
594649
* Convert job status into cleanup run state.
595650
*
@@ -604,7 +659,7 @@ private function cleanup_job_state( string $status ): string {
604659
return 'cancelled';
605660
}
606661
if ( str_starts_with($status, 'failed') ) {
607-
return 'partial_failure';
662+
return 'failed';
608663
}
609664
if ( str_starts_with($status, 'completed') ) {
610665
return 'complete';

tests/smoke-worktree-cleanup-cli.php

Lines changed: 142 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -900,14 +900,23 @@ public function execute( array $input ): array
900900
}
901901
}
902902

903-
class FakeGetJobsAbility
904-
{
905-
public function execute( array $input ): array
906-
{
907-
if (isset($input['parent_job_id']) ) {
908-
$children = array(
909-
123 => array(
910-
array(
903+
class FakeGetJobsAbility
904+
{
905+
public function execute( array $input ): array
906+
{
907+
$scenario = (string) ( $GLOBALS['datamachine_code_cleanup_status_scenario'] ?? 'default' );
908+
if (isset($input['parent_job_id']) ) {
909+
if ( 'no_work' === $scenario ) {
910+
return array(
911+
'success' => true,
912+
'jobs' => array(),
913+
'total' => 0,
914+
);
915+
}
916+
917+
$children = array(
918+
123 => array(
919+
array(
911920
'job_id' => 124,
912921
'parent_job_id' => 123,
913922
'source' => 'system',
@@ -917,12 +926,12 @@ public function execute( array $input ): array
917926
),
918927
124 => array(
919928
array(
920-
'job_id' => 125,
921-
'parent_job_id' => 124,
922-
'source' => 'batch',
923-
'status' => ! empty($GLOBALS['datamachine_code_cleanup_child_drained']) ? 'completed' : 'processing',
924-
'engine_data' => array(
925-
'batch_id' => 'dm_batch_123',
929+
'job_id' => 125,
930+
'parent_job_id' => 124,
931+
'source' => 'batch',
932+
'status' => 'completed_children' === $scenario || ! empty($GLOBALS['datamachine_code_cleanup_child_drained']) ? 'completed' : 'processing',
933+
'engine_data' => array(
934+
'batch_id' => 'dm_batch_123',
926935
'task_type' => 'worktree_cleanup_chunk',
927936
),
928937
),
@@ -950,23 +959,23 @@ public function execute( array $input ): array
950959
),
951960
),
952961
array(
953-
'job_id' => 127,
954-
'parent_job_id' => 124,
955-
'source' => 'system',
956-
'status' => 'failed - apply_failed',
957-
'engine_data' => array(
958-
'task_type' => 'worktree_cleanup_chunk',
959-
'success' => false,
960-
'chunk_type' => 'artifacts',
961-
'planned_count' => 1,
962-
'applied_count' => 0,
963-
'skipped_count' => 0,
964-
'failed_count' => 1,
965-
'bytes_reclaimed' => 0,
966-
'skipped' => array(),
967-
'failed' => array(
968-
array( 'handle' => 'repo@failed', 'reason_code' => 'apply_failed', 'artifact_size_bytes' => 2048 ),
969-
),
962+
'job_id' => 127,
963+
'parent_job_id' => 124,
964+
'source' => 'system',
965+
'status' => 'completed_children' === $scenario ? 'completed' : 'failed - apply_failed',
966+
'engine_data' => array(
967+
'task_type' => 'worktree_cleanup_chunk',
968+
'success' => 'completed_children' === $scenario,
969+
'chunk_type' => 'artifacts',
970+
'planned_count' => 1,
971+
'applied_count' => 'completed_children' === $scenario ? 1 : 0,
972+
'skipped_count' => 0,
973+
'failed_count' => 'completed_children' === $scenario ? 0 : 1,
974+
'bytes_reclaimed' => 0,
975+
'skipped' => array(),
976+
'failed' => 'completed_children' === $scenario ? array() : array(
977+
array( 'handle' => 'repo@failed', 'reason_code' => 'apply_failed', 'artifact_size_bytes' => 2048 ),
978+
),
970979
),
971980
),
972981
),
@@ -985,31 +994,60 @@ public function execute( array $input ): array
985994
);
986995
}
987996

988-
return array(
989-
'success' => true,
990-
'jobs' => array(
991-
array(
992-
'job_id' => (int) $input['job_id'],
993-
'flow_id' => null,
994-
'pipeline_id' => null,
995-
'source' => 'system',
996-
'status' => 'completed',
997-
'created_at' => '2026-05-03 00:00:00',
998-
'completed_at' => '2026-05-03 00:10:00',
999-
'engine_data' => array(
1000-
'cleanup_run' => array(
1001-
'mode' => 'retention',
1002-
'source' => 'workspace_cleanup_cli',
1003-
),
1004-
'success' => true,
1005-
'job_backed' => true,
1006-
'report' => array(
1007-
'bytes_reclaimed' => 0,
1008-
'freed_human' => 'pending child jobs',
1009-
),
1010-
),
1011-
),
1012-
),
997+
if ( 'no_work' === $scenario ) {
998+
$engine_data = array(
999+
'cleanup_run' => array(
1000+
'mode' => 'retention',
1001+
'source' => 'workspace_cleanup_cli',
1002+
),
1003+
'success' => true,
1004+
'dry_run' => false,
1005+
'destructive' => false,
1006+
'job_backed' => true,
1007+
'chunk_row_counts' => array(
1008+
'artifact_discovery' => 0,
1009+
'artifacts' => 0,
1010+
'worktrees' => 0,
1011+
),
1012+
'chunks' => array(),
1013+
'report' => array(
1014+
'bytes_reclaimed' => 0,
1015+
'freed_human' => '0 B',
1016+
),
1017+
'evidence' => array(
1018+
'planned_chunks' => 0,
1019+
'note' => 'No cleanup chunks were eligible after plan generation.',
1020+
),
1021+
);
1022+
} else {
1023+
$engine_data = array(
1024+
'cleanup_run' => array(
1025+
'mode' => 'retention',
1026+
'source' => 'workspace_cleanup_cli',
1027+
),
1028+
'success' => true,
1029+
'job_backed' => true,
1030+
'report' => array(
1031+
'bytes_reclaimed' => 0,
1032+
'freed_human' => 'pending child jobs',
1033+
),
1034+
);
1035+
}
1036+
1037+
return array(
1038+
'success' => true,
1039+
'jobs' => array(
1040+
array(
1041+
'job_id' => (int) $input['job_id'],
1042+
'flow_id' => null,
1043+
'pipeline_id' => null,
1044+
'source' => 'system',
1045+
'status' => in_array($scenario, array( 'no_work', 'completed_children' ), true) ? 'processing' : 'completed',
1046+
'created_at' => '2026-05-03 00:00:00',
1047+
'completed_at' => '2026-05-03 00:10:00',
1048+
'engine_data' => $engine_data,
1049+
),
1050+
),
10131051
);
10141052
}
10151053
}
@@ -1225,12 +1263,13 @@ public function execute( array $input ): array
12251263
$command->cleanup(array( 'resume', 'cleanup-run-20260504193024-abc123' ), array( 'limit' => 9, 'format' => 'json' ));
12261264
datamachine_code_cleanup_assert(9 === (int) ( $cleanup_status_ability->last_input['limit'] ?? 0 ), 'DB cleanup resume forwards bounded limit');
12271265

1228-
WP_CLI::$logs = array();
1229-
WP_CLI::$successes = array();
1230-
$command->cleanup(array( 'status', 'cleanup-run-123' ), array( 'format' => 'json' ));
1231-
$status_json = json_decode(WP_CLI::$logs[0] ?? '', true);
1232-
datamachine_code_cleanup_assert('children_processing' === ( $status_json['state'] ?? '' ), 'cleanup status stays active while child batch is processing');
1233-
datamachine_code_cleanup_assert('children_processing' === ( $status_json['status'] ?? '' ), 'cleanup status does not report parent completed while children run');
1266+
WP_CLI::$logs = array();
1267+
WP_CLI::$successes = array();
1268+
$command->cleanup(array( 'status', 'cleanup-run-123' ), array( 'format' => 'json' ));
1269+
$status_json = json_decode(WP_CLI::$logs[0] ?? '', true);
1270+
datamachine_code_cleanup_assert('waiting_on_children' === ( $status_json['state'] ?? '' ), 'cleanup status stays active while child batch is processing');
1271+
datamachine_code_cleanup_assert('waiting_on_children' === ( $status_json['status'] ?? '' ), 'cleanup status does not report parent completed while children run');
1272+
datamachine_code_cleanup_assert('completed' === ( $status_json['parent_status'] ?? '' ), 'cleanup status preserves raw parent job status separately');
12341273
datamachine_code_cleanup_assert('2026-05-03 00:10:00' === ( $status_json['parent_completed_at'] ?? '' ), 'cleanup status labels parent scheduler completion separately');
12351274
datamachine_code_cleanup_assert(! isset($status_json['completed_at']), 'cleanup status does not expose parent completion as run completion');
12361275
datamachine_code_cleanup_assert(! isset($status_json['children']['job_ids']), 'cleanup status omits full child job ids by default');
@@ -1275,12 +1314,48 @@ public function execute( array $input ): array
12751314
$drained_json = json_decode(WP_CLI::$logs[0] ?? '', true);
12761315
datamachine_code_cleanup_assert(array( 'datamachine drain --job-id=123', 'datamachine drain --job-id=125' ) === WP_CLI::$runcommands, 'cleanup run --drain drains parent then active child jobs');
12771316
datamachine_code_cleanup_assert(4096 === (int) ( $drained_json['drain']['bytes_reclaimed'] ?? 0 ), 'cleanup run --drain reports verified reclaimed bytes');
1278-
datamachine_code_cleanup_assert('partial_failed' === (string) ( $drained_json['drain']['completion_state'] ?? '' ), 'cleanup run --drain reports final cleanup state');
1279-
datamachine_code_cleanup_assert('studio wp datamachine-code workspace cleanup status cleanup-run-123 --format=json' === (string) ( $drained_json['drain']['verify_command'] ?? '' ), 'cleanup run --drain emits one verification command');
1280-
$GLOBALS['datamachine_code_cleanup_parent_drained'] = false;
1281-
$GLOBALS['datamachine_code_cleanup_child_drained'] = false;
1317+
datamachine_code_cleanup_assert('failed' === (string) ( $drained_json['drain']['completion_state'] ?? '' ), 'cleanup run --drain reports final cleanup state');
1318+
datamachine_code_cleanup_assert('studio wp datamachine-code workspace cleanup status cleanup-run-123 --format=json' === (string) ( $drained_json['drain']['verify_command'] ?? '' ), 'cleanup run --drain emits one verification command');
1319+
$GLOBALS['datamachine_code_cleanup_parent_drained'] = false;
1320+
$GLOBALS['datamachine_code_cleanup_child_drained'] = false;
12821321

1283-
WP_CLI::$logs = array();
1322+
WP_CLI::$logs = array();
1323+
WP_CLI::$successes = array();
1324+
$GLOBALS['datamachine_code_cleanup_status_scenario'] = 'no_work';
1325+
$command->cleanup(array( 'status', 'cleanup-run-123' ), array( 'format' => 'json' ));
1326+
$no_work_status_json = json_decode(WP_CLI::$logs[0] ?? '', true);
1327+
datamachine_code_cleanup_assert('no_work' === ( $no_work_status_json['state'] ?? '' ), 'cleanup status converges no-child/no-work runs to no_work even when parent is still processing');
1328+
datamachine_code_cleanup_assert('processing' === ( $no_work_status_json['parent_status'] ?? '' ), 'no-work cleanup status preserves stale active parent job status separately');
1329+
datamachine_code_cleanup_assert(0 === (int) ( $no_work_status_json['cleanup_items']['planned_rows'] ?? -1 ), 'no-work cleanup status reports zero planned cleanup rows');
1330+
datamachine_code_cleanup_assert(0 === (int) ( $no_work_status_json['children']['total'] ?? -1 ), 'no-work cleanup status reports no children');
1331+
datamachine_code_cleanup_assert(false === (bool) ( $no_work_status_json['drain']['needed'] ?? true ), 'no-work cleanup status does not request more drain passes');
1332+
1333+
WP_CLI::$logs = array();
1334+
WP_CLI::$successes = array();
1335+
WP_CLI::$runcommands = array();
1336+
$GLOBALS['datamachine_code_cleanup_parent_drained'] = false;
1337+
$GLOBALS['datamachine_code_cleanup_child_drained'] = false;
1338+
$command->cleanup(array( 'run' ), array( 'mode' => 'retention', 'drain' => true, 'format' => 'json' ));
1339+
$no_work_drained_json = json_decode(WP_CLI::$logs[0] ?? '', true);
1340+
datamachine_code_cleanup_assert(array( 'datamachine drain --job-id=123' ) === WP_CLI::$runcommands, 'cleanup run --drain drains no-work parent once and does not invent child drains');
1341+
datamachine_code_cleanup_assert('no_work' === (string) ( $no_work_drained_json['drain']['completion_state'] ?? '' ), 'cleanup run --drain reports no_work instead of successful running state for no-child/no-work parents');
1342+
datamachine_code_cleanup_assert(true === (bool) ( $no_work_drained_json['drain']['success'] ?? false ), 'cleanup run --drain succeeds after no-work convergence');
1343+
$GLOBALS['datamachine_code_cleanup_status_scenario'] = 'default';
1344+
$GLOBALS['datamachine_code_cleanup_parent_drained'] = false;
1345+
$GLOBALS['datamachine_code_cleanup_child_drained'] = false;
1346+
1347+
WP_CLI::$logs = array();
1348+
WP_CLI::$successes = array();
1349+
$GLOBALS['datamachine_code_cleanup_status_scenario'] = 'completed_children';
1350+
$command->cleanup(array( 'status', 'cleanup-run-123' ), array( 'format' => 'json' ));
1351+
$completed_children_status_json = json_decode(WP_CLI::$logs[0] ?? '', true);
1352+
datamachine_code_cleanup_assert('complete' === ( $completed_children_status_json['state'] ?? '' ), 'cleanup status converges to complete when all children are terminal even if parent is still processing');
1353+
datamachine_code_cleanup_assert('processing' === ( $completed_children_status_json['parent_status'] ?? '' ), 'completed-children cleanup status preserves stale active parent job status separately');
1354+
datamachine_code_cleanup_assert(4 === (int) ( $completed_children_status_json['children']['completed'] ?? 0 ), 'completed-children cleanup status counts terminal descendants');
1355+
datamachine_code_cleanup_assert(false === (bool) ( $completed_children_status_json['drain']['needed'] ?? true ), 'completed-children cleanup status does not request more drain passes');
1356+
$GLOBALS['datamachine_code_cleanup_status_scenario'] = 'default';
1357+
1358+
WP_CLI::$logs = array();
12841359
WP_CLI::$successes = array();
12851360
$command->cleanup(array( 'status', 'cleanup-run-123' ), array());
12861361
datamachine_code_cleanup_assert(in_array('Remaining work summary:', WP_CLI::$logs, true), 'human cleanup status renders compact remaining-work summary');

0 commit comments

Comments
 (0)