Skip to content

Commit 55e7fb7

Browse files
authored
Bound artifact cleanup apply scheduling (#435)
* fix: bound artifact cleanup apply fanout * chore: fix artifact cleanup lint findings
1 parent a9e490c commit 55e7fb7

5 files changed

Lines changed: 102 additions & 64 deletions

File tree

inc/Abilities/WorkspaceAbilities.php

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,9 +1547,9 @@ private function registerAbilities(): void {
15471547
'description' => 'Captured session identifiers in a runtime-agnostic envelope. `primary_id` is the single renderer-friendly identifier downstream surfaces display. `ids` is a free-form map keyed by runtime ID (a string the integration layer chooses, e.g. via the `datamachine_code_worktree_runtime_signatures` filter); each entry is a string-map of subkeys (e.g. session_id, thread_id, thread_url, run_id) the integration chose to capture. DMC enumerates no runtime IDs and no subkeys.',
15481548
'properties' => array(
15491549
'primary_id' => array( 'type' => array( 'string', 'null' ) ),
1550-
'ids' => array(
1551-
'type' => 'object',
1552-
'description' => 'Map of runtime-id => { subkey => string|null }. Keys are opaque; DMC does not validate against a closed set.',
1550+
'ids' => array(
1551+
'type' => 'object',
1552+
'description' => 'Map of runtime-id => { subkey => string|null }. Keys are opaque; DMC does not validate against a closed set.',
15531553
'additionalProperties' => array(
15541554
'type' => 'object',
15551555
'additionalProperties' => array( 'type' => array( 'string', 'null' ) ),
@@ -2874,6 +2874,17 @@ public static function workspaceCleanupRun( array $input ): array|\WP_Error {
28742874
if ( isset( $input['older_than'] ) && '' !== trim( (string) $input['older_than'] ) ) {
28752875
$params['worktree_older_than'] = trim( (string) $input['older_than'] );
28762876
}
2877+
if ( 'artifacts' === $mode ) {
2878+
if ( isset( $input['limit'] ) ) {
2879+
$params['limit'] = (int) $input['limit'];
2880+
}
2881+
if ( isset( $input['offset'] ) ) {
2882+
$params['offset'] = (int) $input['offset'];
2883+
}
2884+
if ( ! empty( $input['exhaustive'] ) ) {
2885+
$params['exhaustive'] = true;
2886+
}
2887+
}
28772888

28782889
$context = array();
28792890
if ( isset( $input['user_id'] ) ) {

inc/Cli/Commands/WorkspaceCommand.php

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -307,14 +307,14 @@ public function adopt_repo( array $args, array $assoc_args ): void {
307307
* : Pass an age gate such as 7d or 24h into cleanup task params.
308308
*
309309
* [--limit=<count>]
310-
* : Maximum worktrees to scan in a `--mode=artifacts --dry-run` page.
311-
* Defaults to 100 — keeps dry-run bounded on workspaces with hundreds of
312-
* worktrees. Use 0 to disable the cap (combine with --exhaustive for a
313-
* full audit).
310+
* : Maximum worktrees to scan in a `--mode=artifacts` page. Dry-run reviews
311+
* scan this bounded page synchronously; apply runs freeze eligible candidates
312+
* from the same bounded page and schedule only those candidates. Defaults to
313+
* 100. Use 0 to disable the cap (combine with --exhaustive for a full audit).
314314
*
315315
* [--offset=<count>]
316-
* : Pagination offset (0-indexed) for `--mode=artifacts --dry-run`. Walk
317-
* huge workspaces by feeding the previous response's
316+
* : Pagination offset (0-indexed) for `--mode=artifacts` dry-run and apply
317+
* pages. Walk huge workspaces by feeding the previous response's
318318
* `pagination.next_offset` until `pagination.complete` is true.
319319
*
320320
* [--exhaustive]
@@ -468,6 +468,17 @@ private function cleanup_run_input( string $mode, array $assoc_args ): array {
468468
if ( isset( $assoc_args['older-than'] ) && '' !== trim( (string) $assoc_args['older-than'] ) ) {
469469
$input['older_than'] = trim( (string) $assoc_args['older-than'] );
470470
}
471+
if ( 'artifacts' === $mode ) {
472+
if ( isset( $assoc_args['limit'] ) ) {
473+
$input['limit'] = (int) $assoc_args['limit'];
474+
}
475+
if ( isset( $assoc_args['offset'] ) ) {
476+
$input['offset'] = (int) $assoc_args['offset'];
477+
}
478+
if ( ! empty( $assoc_args['exhaustive'] ) ) {
479+
$input['exhaustive'] = true;
480+
}
481+
}
471482

472483
return $input;
473484
}

inc/Tasks/WorkspaceRetentionCleanupTask.php

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ public function executeTask( int $jobId, array $params ): void {
131131
* @return array<string,mixed>|\WP_Error
132132
*/
133133
private function schedule_job_backed_cleanup( int $jobId, Workspace $workspace, array $opts, array $params ): array|\WP_Error {
134-
$started_at = microtime( true );
135-
$chunk_rows = $this->build_cleanup_chunk_rows( $workspace, $opts, $params );
134+
$started_at = microtime( true );
135+
$chunk_rows = $this->build_cleanup_chunk_rows( $workspace, $opts, $params );
136136
if ( $chunk_rows instanceof \WP_Error ) {
137137
return $chunk_rows;
138138
}
@@ -146,22 +146,22 @@ private function schedule_job_backed_cleanup( int $jobId, Workspace $workspace,
146146

147147
if ( array() === $item_params ) {
148148
return array(
149-
'success' => true,
150-
'dry_run' => false,
151-
'destructive' => false,
152-
'job_backed' => true,
153-
'generated_at' => gmdate( 'c' ),
154-
'workspace_path' => $workspace->get_path(),
155-
'chunk_row_counts' => $chunk_row_counts,
156-
'chunks' => array(),
157-
'report' => array(
158-
'removed_count' => 0,
159-
'bytes_reclaimed' => 0,
160-
'freed_human' => '0 B',
161-
'skipped_dirty_unpushed_count' => 0,
162-
'remaining_disk_budget_human' => 'unknown disk',
149+
'success' => true,
150+
'dry_run' => false,
151+
'destructive' => false,
152+
'job_backed' => true,
153+
'generated_at' => gmdate( 'c' ),
154+
'workspace_path' => $workspace->get_path(),
155+
'chunk_row_counts' => $chunk_row_counts,
156+
'chunks' => array(),
157+
'report' => array(
158+
'removed_count' => 0,
159+
'bytes_reclaimed' => 0,
160+
'freed_human' => '0 B',
161+
'skipped_dirty_unpushed_count' => 0,
162+
'remaining_disk_budget_human' => 'unknown disk',
163163
),
164-
'evidence' => array(
164+
'evidence' => array(
165165
'elapsed_ms' => (int) round( ( microtime( true ) - $started_at ) * 1000 ),
166166
'note' => 'No cleanup chunks were eligible after plan generation.',
167167
),
@@ -188,34 +188,34 @@ private function schedule_job_backed_cleanup( int $jobId, Workspace $workspace,
188188
}
189189

190190
return array(
191-
'success' => true,
192-
'dry_run' => false,
193-
'destructive' => true,
194-
'job_backed' => true,
195-
'generated_at' => gmdate( 'c' ),
196-
'workspace_path' => $workspace->get_path(),
197-
'policy' => array(
191+
'success' => true,
192+
'dry_run' => false,
193+
'destructive' => true,
194+
'job_backed' => true,
195+
'generated_at' => gmdate( 'c' ),
196+
'workspace_path' => $workspace->get_path(),
197+
'policy' => array(
198198
'worktree_cleanup' => (bool) $opts['worktree_cleanup'],
199199
'artifact_cleanup' => (bool) $opts['artifact_cleanup'],
200200
'worktree_older_than' => (string) ( $opts['worktree_older_than'] ?? '14d' ),
201201
'skip_github' => (bool) $opts['skip_github'],
202202
'force' => (bool) $opts['force'],
203203
),
204-
'chunk_row_counts' => $chunk_row_counts,
205-
'chunks' => $batch,
206-
'report' => array(
204+
'chunk_row_counts' => $chunk_row_counts,
205+
'chunks' => $batch,
206+
'report' => array(
207207
'removed_count' => 0,
208208
'bytes_reclaimed' => 0,
209209
'freed_human' => 'pending child jobs',
210210
'skipped_dirty_unpushed_count' => 0,
211211
'remaining_disk_budget_human' => 'pending child jobs',
212212
),
213-
'evidence' => array(
214-
'elapsed_ms' => (int) round( ( microtime( true ) - $started_at ) * 1000 ),
215-
'planned_chunks' => count( $item_params ),
216-
'planned_handles' => $this->cleanup_chunk_handles( $chunk_rows ),
217-
'batch_job_id' => (int) ( $batch['batch_job_id'] ?? 0 ),
218-
'direct_job_ids' => $batch['job_ids'] ?? array(),
213+
'evidence' => array(
214+
'elapsed_ms' => (int) round( ( microtime( true ) - $started_at ) * 1000 ),
215+
'planned_chunks' => count( $item_params ),
216+
'planned_handles' => $this->cleanup_chunk_handles( $chunk_rows ),
217+
'batch_job_id' => (int) ( $batch['batch_job_id'] ?? 0 ),
218+
'direct_job_ids' => $batch['job_ids'] ?? array(),
219219
),
220220
);
221221
}
@@ -236,25 +236,21 @@ private function build_cleanup_chunk_rows( Workspace $workspace, array $opts, ar
236236
);
237237

238238
if ( ! empty( $opts['artifact_cleanup'] ) ) {
239-
$page_size = max( 1, (int) ( $params['artifact_chunk_size'] ?? 10 ) );
240-
$artifact_page = $workspace->worktree_cleanup_artifacts(
239+
$artifact_limit = isset( $params['limit'] ) ? max( 0, (int) $params['limit'] ) : 100;
240+
$artifact_page = $workspace->worktree_cleanup_artifacts(
241241
array(
242-
'dry_run' => true,
243-
'force' => ! empty( $opts['force'] ),
244-
'limit' => 1,
245-
'offset' => 0,
242+
'dry_run' => true,
243+
'force' => ! empty( $opts['force'] ),
244+
'limit' => $artifact_limit,
245+
'offset' => isset( $params['offset'] ) ? max( 0, (int) $params['offset'] ) : 0,
246+
'exhaustive' => ! empty( $params['exhaustive'] ),
247+
'safety_probes' => true,
246248
)
247249
);
248250
if ( $artifact_page instanceof \WP_Error ) {
249251
return $artifact_page;
250252
}
251-
$total = max( 0, (int) ( $artifact_page['pagination']['total'] ?? $artifact_page['summary']['pagination']['total'] ?? 0 ) );
252-
for ( $offset = 0; $offset < $total; $offset += $page_size ) {
253-
$rows['artifact_discovery'][] = array(
254-
'offset' => $offset,
255-
'limit' => $page_size,
256-
);
257-
}
253+
$rows['artifacts'] = array_values( (array) $artifact_page['candidates'] );
258254
}
259255

260256
if ( ! empty( $opts['worktree_cleanup'] ) ) {
@@ -270,7 +266,7 @@ private function build_cleanup_chunk_rows( Workspace $workspace, array $opts, ar
270266
if ( $worktree_plan instanceof \WP_Error ) {
271267
return $worktree_plan;
272268
}
273-
$rows['worktrees'] = array_values( (array) ( $worktree_plan['candidates'] ?? array() ) );
269+
$rows['worktrees'] = array_values( (array) $worktree_plan['candidates'] );
274270
}
275271

276272
return $rows;

tests/smoke-workspace-retention-task.php

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,15 @@ public function worktree_cleanup_artifacts( array $opts = array() ): array {
9797
return array(
9898
'success' => true,
9999
'dry_run' => true,
100-
'candidates' => array(),
100+
'candidates' => array(
101+
array(
102+
'handle' => 'repo@active',
103+
'repo' => 'repo',
104+
'branch' => 'active',
105+
'path' => '/tmp/dmc-retention-task-workspace/repo@active',
106+
'artifacts' => array( array( 'path' => 'vendor', 'size_bytes' => 1024 ) ),
107+
),
108+
),
101109
'skipped' => array(),
102110
'summary' => array(
103111
'pagination' => array(
@@ -180,7 +188,7 @@ function datamachine_code_retention_task_assert( bool $condition, string $messag
180188
datamachine_code_retention_task_assert( empty( $completed[0][1]['skipped'] ), 'explicit CLI run bypasses disabled recurring schedule' );
181189
datamachine_code_retention_task_assert( true === (bool) ( $completed[0][1]['dry_run'] ?? false ), 'explicit CLI run forwards task params' );
182190

183-
echo "\n[4] Artifact cleanup schedules bounded discovery chunks\n";
191+
echo "\n[4] Artifact cleanup freezes bounded candidates before scheduling chunks\n";
184192
\DataMachine\Engine\Tasks\TaskScheduler::$batches = array();
185193
\DataMachineCode\Workspace\Workspace::$artifact_opts = array();
186194
$task = new \DataMachineCode\Tasks\WorkspaceRetentionCleanupTask();
@@ -191,16 +199,19 @@ function datamachine_code_retention_task_assert( bool $condition, string $messag
191199
'artifact_cleanup' => true,
192200
'worktree_cleanup' => false,
193201
'artifact_chunk_size' => 10,
202+
'limit' => 100,
194203
)
195204
);
196205
$completed = $task->{$completed_prop};
197206
$batch = \DataMachine\Engine\Tasks\TaskScheduler::$batches[0] ?? array();
198207
datamachine_code_retention_task_assert( 'worktree_cleanup_chunk' === ( $batch['task_type'] ?? '' ), 'retention task schedules cleanup chunk batch' );
199-
datamachine_code_retention_task_assert( 3 === count( $batch['items'] ?? array() ), 'artifact inventory total fans out into bounded discovery pages' );
200-
datamachine_code_retention_task_assert( 'artifact_discovery' === ( $batch['items'][0]['chunk_type'] ?? '' ), 'artifact cleanup uses discovery chunks instead of prebuilt artifact rows' );
201-
datamachine_code_retention_task_assert( array( 0, 10, 20 ) === array_column( $batch['items'], 'offset' ), 'discovery chunks carry stable offsets' );
202-
datamachine_code_retention_task_assert( empty( \DataMachineCode\Workspace\Workspace::$artifact_opts[0]['exhaustive'] ), 'parent does not run exhaustive artifact dry-run' );
203-
datamachine_code_retention_task_assert( 3 === (int) ( $completed[0][1]['chunk_row_counts']['artifact_discovery'] ?? 0 ), 'completion report exposes discovery chunk count' );
208+
datamachine_code_retention_task_assert( 1 === count( $batch['items'] ?? array() ), 'artifact candidates fan out proportionally to eligible rows' );
209+
datamachine_code_retention_task_assert( 'artifacts' === ( $batch['items'][0]['chunk_type'] ?? '' ), 'artifact cleanup uses frozen candidate chunks instead of discovery pages' );
210+
datamachine_code_retention_task_assert( 'repo@active' === ( $batch['items'][0]['rows'][0]['handle'] ?? '' ), 'artifact chunk carries reviewed candidate rows' );
211+
datamachine_code_retention_task_assert( 100 === (int) ( \DataMachineCode\Workspace\Workspace::$artifact_opts[0]['limit'] ?? 0 ), 'parent forwards artifact scan limit' );
212+
datamachine_code_retention_task_assert( true === ( \DataMachineCode\Workspace\Workspace::$artifact_opts[0]['safety_probes'] ?? false ), 'parent runs safety probes before scheduling artifact apply chunks' );
213+
datamachine_code_retention_task_assert( 0 === (int) ( $completed[0][1]['chunk_row_counts']['artifact_discovery'] ?? -1 ), 'completion report shows no discovery chunks' );
214+
datamachine_code_retention_task_assert( 1 === (int) ( $completed[0][1]['chunk_row_counts']['artifacts'] ?? 0 ), 'completion report exposes artifact candidate chunk count' );
204215

205216
echo "\nAll workspace retention task smoke tests passed.\n";
206217
}

tests/smoke-worktree-cleanup-cli.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ public function execute( array $input ): array {
622622
datamachine_code_cleanup_assert( str_contains( $cleanup_doc_comment, 'Control task-backed workspace cleanup runs.' ), 'workspace cleanup command documents task-backed controller surface' );
623623
datamachine_code_cleanup_assert( str_contains( $cleanup_doc_comment, '<plan|apply|run|status|resume|cancel|evidence>' ), 'workspace cleanup synopsis exposes DB-backed and task-backed cleanup operations' );
624624
datamachine_code_cleanup_assert( str_contains( $cleanup_doc_comment, '[--dry-run]' ), 'task-backed cleanup synopsis keeps synchronous dry-run review' );
625+
datamachine_code_cleanup_assert( str_contains( $cleanup_doc_comment, 'apply runs freeze eligible candidates' ), 'workspace cleanup limit help clarifies artifact apply scoping' );
625626
datamachine_code_cleanup_assert( str_contains( $doc_comment, 'Daily cleanup path: DB-backed plan, then apply only those rows after revalidation' ), 'worktree examples point daily cleanup to DB-backed run_id controller path' );
626627
datamachine_code_cleanup_assert( str_contains( $doc_comment, 'workspace cleanup plan --mode=retention' ), 'worktree examples include DB-backed cleanup plan' );
627628
datamachine_code_cleanup_assert( str_contains( $doc_comment, 'workspace cleanup run --mode=retention' ), 'worktree examples include task-backed cleanup run' );
@@ -640,11 +641,19 @@ public function execute( array $input ): array {
640641
datamachine_code_cleanup_assert( 'retention' === ( $cleanup_run_ability->last_input['mode'] ?? '' ), 'cleanup run ability receives mode' );
641642
datamachine_code_cleanup_assert( 'workspace_cleanup_cli' === ( $cleanup_run_ability->last_input['source'] ?? '' ), 'cleanup run ability identifies explicit CLI source' );
642643

644+
WP_CLI::$logs = array();
645+
WP_CLI::$successes = array();
646+
$command->cleanup( array( 'run' ), array( 'mode' => 'artifacts', 'limit' => 25, 'offset' => 50, 'format' => 'json' ) );
647+
datamachine_code_cleanup_assert( 'artifacts' === ( $cleanup_run_ability->last_input['mode'] ?? '' ), 'cleanup run can schedule artifact mode' );
648+
datamachine_code_cleanup_assert( 25 === (int) ( $cleanup_run_ability->last_input['limit'] ?? 0 ), 'cleanup run forwards artifact apply limit' );
649+
datamachine_code_cleanup_assert( 50 === (int) ( $cleanup_run_ability->last_input['offset'] ?? 0 ), 'cleanup run forwards artifact apply offset' );
650+
$last_scheduled_cleanup_run = $cleanup_run_ability->last_input;
651+
643652
WP_CLI::$logs = array();
644653
WP_CLI::$successes = array();
645654
$command->cleanup( array( 'run' ), array( 'mode' => 'artifacts', 'dry-run' => true, 'format' => 'json' ) );
646655
datamachine_code_cleanup_assert( true === ( $artifact_ability->last_input['dry_run'] ?? false ), 'cleanup run --dry-run uses artifact cleanup ability directly' );
647-
datamachine_code_cleanup_assert( 'retention' === ( $cleanup_run_ability->last_input['mode'] ?? '' ), 'cleanup run --dry-run does not schedule cleanup run ability' );
656+
datamachine_code_cleanup_assert( $last_scheduled_cleanup_run === $cleanup_run_ability->last_input, 'cleanup run --dry-run does not schedule cleanup run ability' );
648657

649658
WP_CLI::$logs = array();
650659
WP_CLI::$successes = array();

0 commit comments

Comments
 (0)