Skip to content

Commit d4b82bd

Browse files
authored
Prefilter metadata reconciliation candidates (#500)
* fix: prefilter metadata reconciliation candidates * fix: align metadata reconciliation assignments
1 parent f1b2a83 commit d4b82bd

2 files changed

Lines changed: 99 additions & 16 deletions

File tree

inc/Workspace/WorkspaceMetadataReconciliation.php

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,10 @@ public function worktree_reconcile_metadata( array $opts = array() ): array|\WP_
8888
fn( $wt ) => empty($wt['is_primary'])
8989
)
9090
);
91-
$total_worktrees = count($all_worktrees);
92-
$page_worktrees = $paged ? array_slice($all_worktrees, $offset, $limit) : $all_worktrees;
91+
$prefilter = $this->prefilter_worktree_metadata_reconciliation_rows($all_worktrees);
92+
$page_scope = $prefilter['candidates'];
93+
$total_worktrees = count($page_scope);
94+
$page_worktrees = $paged ? array_slice($page_scope, $offset, $limit) : $page_scope;
9395

9496
$proposals = array();
9597
$skipped = array();
@@ -125,7 +127,7 @@ public function worktree_reconcile_metadata( array $opts = array() ): array|\WP_
125127
$pagination['next_command'] = sprintf('studio wp datamachine-code workspace worktree reconcile-metadata --%s --limit=%d --offset=%d%s --format=json', $apply ? 'apply' : 'dry-run', $limit, (int) $pagination['next_offset'], null !== $budget_context ? ' --until-budget=' . (string) $budget_context['label'] : '');
126128
}
127129

128-
$plan = array(
130+
$plan = array(
129131
'success' => true,
130132
'dry_run' => $dry_run,
131133
'applied' => false,
@@ -136,14 +138,16 @@ public function worktree_reconcile_metadata( array $opts = array() ): array|\WP_
136138
'skipped' => $skipped,
137139
'still_unsafe' => $classified_skips['still_unsafe'],
138140
'external_worktrees' => $classified_skips['external_worktrees'],
139-
'summary' => $this->build_worktree_metadata_reconciliation_summary($paged ? count($page_worktrees) : count( (array) ( $listing['worktrees'] ?? array() )), $proposals, array(), $skipped),
141+
'summary' => $this->build_worktree_metadata_reconciliation_summary($paged ? count($page_worktrees) : count($page_scope), $proposals, array(), $skipped),
140142
);
143+
$plan['summary']['prefiltered'] = $prefilter['summary'];
141144
if ( null !== $pagination ) {
142145
$plan['pagination'] = $pagination;
143146
$plan['evidence'] = array(
144147
'scope' => 'paginated metadata reconciliation dry-run',
145-
'note' => 'Only this page ran per-worktree dirty, unpushed, merge-signal, and GitHub probes. Run the next_offset page until complete for full inventory review.',
148+
'note' => 'Only candidate rows with missing, incomplete, invalid, or finalizable metadata ran per-worktree dirty, unpushed, merge-signal, and GitHub probes. Run the next_offset page until complete for full inventory review.',
146149
'fields_skipped_by_listing' => (array) ( $listing['fields_skipped'] ?? array() ),
150+
'prefilter' => $prefilter['summary'],
147151
);
148152
if ( null !== $budget_context ) {
149153
$plan['evidence']['budget'] = $this->summarize_worktree_loop_budget_context($budget_context, $budget_stopped);
@@ -458,6 +462,74 @@ private function summarize_worktree_loop_budget_context( array $context, bool $e
458462
);
459463
}
460464

465+
/**
466+
* Keep expensive reconciliation probes focused on rows that can change.
467+
*
468+
* @param array<int,array<string,mixed>> $worktrees Non-primary worktree rows.
469+
* @return array{candidates:array<int,array<string,mixed>>,summary:array<string,mixed>}
470+
*/
471+
private function prefilter_worktree_metadata_reconciliation_rows( array $worktrees ): array {
472+
$candidates = array();
473+
$skipped = 0;
474+
$reasons = array();
475+
476+
foreach ( $worktrees as $wt ) {
477+
$reason = $this->worktree_metadata_reconciliation_candidate_reason($wt);
478+
if ( null !== $reason ) {
479+
$candidates[] = $wt;
480+
$reasons[ $reason ] = (int) ( $reasons[ $reason ] ?? 0 ) + 1;
481+
continue;
482+
}
483+
484+
++$skipped;
485+
$reasons['complete_metadata'] = (int) ( $reasons['complete_metadata'] ?? 0 ) + 1;
486+
}
487+
488+
return array(
489+
'candidates' => array_values($candidates),
490+
'summary' => array(
491+
'input_rows' => count($worktrees),
492+
'candidate_rows' => count($candidates),
493+
'skipped_rows' => $skipped,
494+
'reasons' => $reasons,
495+
'candidate_scope' => 'missing_or_incomplete_metadata_and_stored_finalizer_signals',
496+
),
497+
);
498+
}
499+
500+
/**
501+
* Return a cheap candidate reason when reconciliation may write metadata.
502+
*
503+
* @param array<string,mixed> $wt Worktree list row.
504+
*/
505+
private function worktree_metadata_reconciliation_candidate_reason( array $wt ): ?string {
506+
if ( ! empty($wt['external']) ) {
507+
return 'external_worktree';
508+
}
509+
510+
$metadata = is_array($wt['metadata'] ?? null) ? (array) $wt['metadata'] : null;
511+
if ( null === $metadata || array() === $metadata ) {
512+
return 'missing_metadata';
513+
}
514+
515+
foreach ( array( 'handle', 'repo', 'branch', 'path', 'created_at', 'observed_at', 'lifecycle_state' ) as $field ) {
516+
if ( ! array_key_exists($field, $metadata) || '' === trim( (string) $metadata[ $field ] ) ) {
517+
return 'incomplete_metadata';
518+
}
519+
}
520+
521+
$state = WorktreeContextInjector::normalize_state( (string) $metadata['lifecycle_state'] );
522+
if ( null === $state ) {
523+
return 'invalid_lifecycle_state';
524+
}
525+
526+
if ( WorktreeContextInjector::STATE_CLEANUP_ELIGIBLE !== $state && ( ! empty($metadata['pr_url']) || ! empty($metadata['pr_number']) ) ) {
527+
return 'stored_pr_signal';
528+
}
529+
530+
return null;
531+
}
532+
461533
/**
462534
* Build one metadata reconciliation row for a current worktree listing row.
463535
*

tests/smoke-worktree-metadata-reconcile.php

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ function () use ( $tmp ) {
360360
$run('git checkout main', $primary);
361361
};
362362
$make_branch('unmanaged-missing');
363+
$make_branch('unmanaged-empty');
363364
$make_branch('unmanaged-partial');
364365
$make_branch('unmanaged-dirty');
365366
$make_branch('unmanaged-invalid');
@@ -377,6 +378,7 @@ function () use ( $tmp ) {
377378
$run('git checkout main', $primary);
378379

379380
$run(sprintf('git worktree add %s unmanaged-missing', escapeshellarg($tmp . '/demo@unmanaged-missing')), $primary);
381+
$run(sprintf('git worktree add %s unmanaged-empty', escapeshellarg($tmp . '/demo@unmanaged-empty')), $primary);
380382
$run(sprintf('git worktree add %s unmanaged-partial', escapeshellarg($tmp . '/demo@unmanaged-partial')), $primary);
381383
$run(sprintf('git worktree add %s unmanaged-dirty', escapeshellarg($tmp . '/demo@unmanaged-dirty')), $primary);
382384
$run(sprintf('git worktree add %s unmanaged-invalid', escapeshellarg($tmp . '/demo@unmanaged-invalid')), $primary);
@@ -403,6 +405,10 @@ function () use ( $tmp ) {
403405
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/upstream-gone', escapeshellarg($remote)));
404406
$run(sprintf('git --git-dir=%s update-ref -d refs/heads/dirty-merged', escapeshellarg($remote)));
405407

408+
\DataMachineCode\Workspace\WorktreeContextInjector::store_metadata(
409+
'demo@unmanaged-empty',
410+
array()
411+
);
406412
\DataMachineCode\Workspace\WorktreeContextInjector::store_metadata(
407413
'demo@unmanaged-partial',
408414
array(
@@ -577,10 +583,10 @@ function () use ( $tmp ) {
577583
$plan = $ws->worktree_reconcile_metadata(array( 'dry_run' => true ));
578584
$assert(true, ! is_wp_error($plan) && ( $plan['success'] ?? false ), 'dry-run succeeds');
579585
$assert(true, $plan['dry_run'] ?? false, 'dry-run flag is true');
580-
$assert(7, (int) ( $plan['summary']['proposed'] ?? 0 ), 'dry-run proposes unmanaged rows and safe merged lifecycle finalizers');
586+
$assert(8, (int) ( $plan['summary']['proposed'] ?? 0 ), 'dry-run proposes unmanaged rows and safe merged lifecycle finalizers');
581587
$assert(0, (int) ( $plan['summary']['written'] ?? 0 ), 'dry-run writes nothing');
582588
$assert(1, (int) ( $plan['summary']['skipped_by_reason']['external_worktree'] ?? 0 ), 'dry-run distinguishes external worktrees');
583-
$assert(3, (int) ( $plan['summary']['skipped_by_reason']['unsafe_cleanup_eligible_state'] ?? 0 ), 'dry-run keeps dirty and unpushed merged worktrees out of auto-finalize proposals');
589+
$assert(2, (int) ( $plan['summary']['skipped_by_reason']['unsafe_cleanup_eligible_state'] ?? 0 ), 'dry-run keeps dirty and unpushed merged worktrees out of auto-finalize proposals');
584590
$assert(1, count($plan['external_worktrees'] ?? array()), 'dry-run exposes external worktree bucket');
585591

586592
$by_handle = array();
@@ -592,7 +598,11 @@ function () use ( $tmp ) {
592598
$assert('reconcile_run', $by_handle['demo@unmanaged-missing']['source_map']['observed_at'] ?? '', 'missing metadata observed_at source is reconcile run');
593599
$assert('current_site', $by_handle['demo@unmanaged-missing']['source_map']['origin_site'] ?? '', 'missing metadata origin site is inferred from current site');
594600
$assert(true, isset($by_handle['demo@unmanaged-missing']['elapsed_ms']), 'metadata reconciliation proposal rows include elapsed timing');
601+
$assert('operator_plan', $by_handle['demo@unmanaged-empty']['source_map']['lifecycle_state'] ?? '', 'empty-array metadata lifecycle source is operator_plan');
595602
$assert(true, isset($plan['summary']['slow_rows'][0]['elapsed_ms']), 'metadata reconciliation summary includes slow row timing samples');
603+
$assert(true, (int) ( $plan['summary']['prefiltered']['skipped_rows'] ?? 0 ) > 0, 'metadata reconciliation prefilter skips valid complete metadata rows before expensive probes');
604+
$assert(true, (int) ( $plan['summary']['prefiltered']['reasons']['missing_metadata'] ?? 0 ) >= 2, 'metadata reconciliation prefilter includes null and empty-array metadata rows');
605+
$assert(true, (int) ( $plan['summary']['prefiltered']['reasons']['stored_pr_signal'] ?? 0 ) >= 2, 'metadata reconciliation prefilter keeps stored PR finalizer signals');
596606
$assert('metadata', $by_handle['demo@unmanaged-partial']['source_map']['created_at'] ?? '', 'partial metadata preserves created_at source');
597607
$assert('git', $by_handle['demo@unmanaged-partial']['source_map']['branch'] ?? '', 'branch source is git');
598608
$assert(array( 'lifecycle_state' ), $by_handle['demo@unmanaged-invalid']['invalid_fields'] ?? array(), 'invalid lifecycle state is planned for repair');
@@ -660,10 +670,10 @@ function () use ( $tmp ) {
660670
echo "\nApply reviewed plan\n";
661671
$apply = $ws->worktree_reconcile_metadata(array( 'apply_plan' => $plan ));
662672
$assert(true, ! is_wp_error($apply) && ( $apply['success'] ?? false ), 'apply succeeds');
663-
$assert(7, (int) ( $apply['summary']['written'] ?? 0 ), 'apply writes exact current matches');
664-
$assert(7, (int) ( $apply['summary']['written'] ?? 0 ), 'apply reports written metadata rows');
673+
$assert(8, (int) ( $apply['summary']['written'] ?? 0 ), 'apply writes exact current matches');
674+
$assert(8, (int) ( $apply['summary']['written'] ?? 0 ), 'apply reports written metadata rows');
665675
$assert(0, (int) ( $apply['summary']['skipped'] ?? 0 ), 'apply skips nothing for current plan');
666-
$assert(7, count($apply['written'] ?? array()), 'apply exposes written rows distinctly');
676+
$assert(8, count($apply['written'] ?? array()), 'apply exposes written rows distinctly');
667677
$stored = \DataMachineCode\Workspace\WorktreeContextInjector::get_metadata('demo@unmanaged-missing');
668678
$assert('demo@unmanaged-missing', $stored['handle'] ?? '', 'stored metadata includes handle');
669679
$assert(true, ! empty($stored['observed_at']), 'stored metadata includes observed_at');
@@ -691,14 +701,14 @@ function () use ( $tmp ) {
691701
$assert(true, ! is_wp_error($bounded_auto_apply) && ( $bounded_auto_apply['success'] ?? false ), 'bounded direct reconciliation apply runs without a manual plan file');
692702
$assert(true, (bool) ( $bounded_auto_apply['direct_apply'] ?? false ), 'bounded direct apply identifies direct apply source');
693703
$assert(false, (bool) ( $bounded_auto_apply['dry_run'] ?? true ), 'bounded direct apply is not a dry-run');
694-
$assert(2, (int) ( $bounded_auto_apply['summary']['inspected'] ?? 0 ), 'bounded direct apply summary stays page-scoped');
704+
$assert(1, (int) ( $bounded_auto_apply['summary']['inspected'] ?? 0 ), 'bounded direct apply summary stays candidate-page scoped');
695705
$assert(2, (int) ( $bounded_auto_apply['pagination']['limit'] ?? 0 ), 'bounded direct apply preserves pagination limit');
696706
$assert(2, (int) ( $bounded_auto_apply['pagination']['offset'] ?? 0 ), 'bounded direct apply preserves pagination offset');
697707
$assert('direct_apply', $bounded_auto_apply['evidence']['apply_source'] ?? '', 'bounded direct apply exposes evidence source');
698708

699709
$inventory_after = $ws->worktree_cleanup_merged(array( 'dry_run' => true, 'inventory_only' => true, 'skip_github' => true ));
700710
$assert(1, (int) ( $inventory_after['summary']['skipped_by_reason']['needs_metadata_reconcile'] ?? 0 ), 'inventory cleanup requires fewer metadata reconciliation passes after apply');
701-
$assert(8, (int) ( $inventory_after['summary']['skipped_by_reason']['active_no_signal'] ?? 0 ), 'inventory cleanup treats reconciled active metadata like current active metadata');
711+
$assert(9, (int) ( $inventory_after['summary']['skipped_by_reason']['active_no_signal'] ?? 0 ), 'inventory cleanup treats reconciled active metadata like current active metadata');
702712
$assert(false, isset($inventory_after['summary']['repair_status']), 'inventory cleanup no longer exposes migration status');
703713

704714
$run('git remote set-url origin https://github.com/acme/demo.git', $primary);
@@ -740,10 +750,11 @@ function () use ( $tmp ) {
740750

741751
\DataMachineCode\Workspace\WorktreeContextInjector::forget_metadata('demo@unmanaged-missing');
742752
$budget_offset = 0;
743-
$budget_listing = $ws->worktree_list(null, null, array( 'include_status' => false, 'include_disk' => false ));
744-
foreach ( array_values(array_filter((array) ( $budget_listing['worktrees'] ?? array() ), fn( $wt ) => empty($wt['is_primary']))) as $index => $wt ) {
745-
if ('demo@unmanaged-missing' === ( $wt['handle'] ?? '' ) ) {
746-
$budget_offset = (int) $index;
753+
for ( $probe_offset = 0; $probe_offset < 20; ++$probe_offset ) {
754+
$budget_probe = $ws->worktree_reconcile_metadata(array( 'dry_run' => true, 'limit' => 1, 'offset' => $probe_offset ));
755+
$proposal = $budget_probe['proposals'][0] ?? array();
756+
if ('demo@unmanaged-missing' === ( $proposal['handle'] ?? '' ) ) {
757+
$budget_offset = $probe_offset;
747758
break;
748759
}
749760
}

0 commit comments

Comments
 (0)