Skip to content

Commit 1f0041a

Browse files
authored
Classify metadata identity reconciliation buckets (#656)
1 parent ac10bd5 commit 1f0041a

2 files changed

Lines changed: 290 additions & 17 deletions

File tree

inc/Workspace/WorkspaceMetadataReconciliation.php

Lines changed: 209 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -587,13 +587,21 @@ private function build_worktree_metadata_reconciliation_row( array $wt, array &$
587587
}
588588

589589
if ( array() !== (array) $identity['conflicts'] ) {
590+
$identity_classification = $this->classify_worktree_identity_metadata_conflict($wt, $identity);
591+
if ( ! empty($identity_classification['repairable']) && 'stale_identity_metadata' === (string) $identity_classification['reason_code'] ) {
592+
return $this->build_stale_worktree_identity_metadata_repair_row($base_row, $metadata, $identity_classification);
593+
}
594+
590595
return array(
591596
'skip' => array_merge(
592597
$base_row,
593598
array(
594-
'reason_code' => 'inconsistent_identity_metadata',
595-
'reason' => 'stored worktree identity metadata does not match the handle/path row',
596-
'identity_conflicts' => $identity['conflicts'],
599+
'reason_code' => (string) $identity_classification['reason_code'],
600+
'reason' => (string) $identity_classification['reason'],
601+
'identity_conflicts' => $identity['conflicts'],
602+
'identity_classification' => (string) $identity_classification['classification'],
603+
'proposed_source_of_truth' => $identity_classification['proposed_source_of_truth'],
604+
'next_command' => (string) $identity_classification['next_command'],
597605
)
598606
),
599607
);
@@ -1029,6 +1037,204 @@ private function set_reconciled_metadata_field( array &$metadata, array &$source
10291037
$source_map[ $field ] = $source;
10301038
}
10311039

1040+
/**
1041+
* Classify identity metadata conflicts into operator-actionable buckets.
1042+
*
1043+
* @param array<string,mixed> $wt Worktree row.
1044+
* @param array<string,mixed> $identity Recovered identity data.
1045+
* @return array<string,mixed>
1046+
*/
1047+
private function classify_worktree_identity_metadata_conflict( array $wt, array $identity ): array {
1048+
$handle = (string) ( $wt['handle'] ?? '' );
1049+
$repo = (string) ( $wt['repo'] ?? '' );
1050+
$branch = (string) ( $wt['branch'] ?? '' );
1051+
$path = rtrim((string) ( $wt['path'] ?? '' ), '/');
1052+
$parsed = '' !== $handle ? $this->parse_handle($handle) : array( 'repo' => '', 'branch_slug' => '', 'is_worktree' => false );
1053+
$handle_branch = (string) ( $parsed['branch_slug'] ?? '' );
1054+
$branch_slug = $this->slugify_branch($branch);
1055+
$path_basename = '' !== $path ? basename($path) : '';
1056+
$handle_path = '' !== $handle && $path_basename === $handle;
1057+
$handle_repo = ! empty($parsed['is_worktree']) && (string) ( $parsed['repo'] ?? '' ) === $repo;
1058+
$handle_branch_matches_current = '' !== $branch_slug && $branch_slug === $handle_branch;
1059+
$default_branch = $this->resolve_worktree_identity_default_branch((string) ( $identity['repo'] ?? $repo ));
1060+
1061+
$base = array(
1062+
'classification' => 'manual_review_identity_metadata',
1063+
'reason_code' => 'manual_review_identity_metadata',
1064+
'reason' => 'stored worktree identity metadata conflicts with the current row and no safe automatic source of truth is available',
1065+
'repairable' => false,
1066+
'proposed_source_of_truth' => array(
1067+
'handle' => 'manual_review',
1068+
'repo' => 'manual_review',
1069+
'branch' => 'manual_review',
1070+
'path' => 'manual_review',
1071+
),
1072+
'next_command' => 'studio wp datamachine-code workspace worktree reconcile-metadata --dry-run --format=json',
1073+
);
1074+
1075+
if ( $handle_repo && $handle_path && $handle_branch_matches_current ) {
1076+
return array_merge(
1077+
$base,
1078+
array(
1079+
'classification' => 'stale_identity_metadata',
1080+
'reason_code' => 'stale_identity_metadata',
1081+
'reason' => 'stored identity metadata is stale; current handle, path, and git branch agree',
1082+
'repairable' => true,
1083+
'proposed_source_of_truth' => array(
1084+
'handle' => 'filesystem_handle',
1085+
'repo' => 'filesystem_handle',
1086+
'branch' => 'current_git_branch',
1087+
'path' => 'git_worktree_path',
1088+
),
1089+
'next_command' => 'studio wp datamachine-code workspace worktree reconcile-metadata --apply --format=json',
1090+
)
1091+
);
1092+
}
1093+
1094+
if ( $handle_repo && $handle_path && '' !== $branch && $default_branch === $branch ) {
1095+
return array_merge(
1096+
$base,
1097+
array(
1098+
'classification' => 'default_branch_checkout_in_feature_worktree',
1099+
'reason_code' => 'default_branch_checkout_in_feature_worktree',
1100+
'reason' => sprintf('worktree handle is feature-scoped, but git is currently on the default branch %s', $branch),
1101+
'proposed_source_of_truth' => array(
1102+
'handle' => 'filesystem_handle',
1103+
'repo' => 'filesystem_handle',
1104+
'branch' => 'operator_review_required',
1105+
'path' => 'git_worktree_path',
1106+
),
1107+
'next_command' => sprintf('git -C %s switch <intended-feature-branch>', escapeshellarg($path)),
1108+
)
1109+
);
1110+
}
1111+
1112+
if ( $handle_repo && $handle_path && '' !== $branch && ! $handle_branch_matches_current ) {
1113+
return array_merge(
1114+
$base,
1115+
array(
1116+
'classification' => 'branch_renamed_worktree',
1117+
'reason_code' => 'branch_renamed_worktree',
1118+
'reason' => 'git branch no longer matches the canonical branch slug encoded in the worktree handle/path',
1119+
'proposed_source_of_truth' => array(
1120+
'handle' => 'filesystem_handle',
1121+
'repo' => 'filesystem_handle',
1122+
'branch' => 'current_git_branch',
1123+
'path' => 'git_worktree_path',
1124+
),
1125+
'next_command' => sprintf('studio wp datamachine-code workspace worktree add %s %s --from=%s', escapeshellarg($repo), escapeshellarg($branch), escapeshellarg($branch)),
1126+
)
1127+
);
1128+
}
1129+
1130+
return $base;
1131+
}
1132+
1133+
/**
1134+
* Resolve the short default branch name for identity diagnostics.
1135+
*/
1136+
private function resolve_worktree_identity_default_branch( string $repo ): string {
1137+
if ( '' === $repo ) {
1138+
return '';
1139+
}
1140+
1141+
$primary_path = $this->get_primary_path($repo);
1142+
if ( ! is_dir($primary_path . '/.git') ) {
1143+
return '';
1144+
}
1145+
1146+
$default_ref = $this->resolve_remote_default_ref($primary_path, self::CLEANUP_GIT_PROBE_TIMEOUT);
1147+
if ( is_wp_error($default_ref) || ! is_string($default_ref) || '' === $default_ref ) {
1148+
return '';
1149+
}
1150+
1151+
$prefix = 'refs/remotes/origin/';
1152+
return str_starts_with($default_ref, $prefix) ? substr($default_ref, strlen($prefix)) : basename($default_ref);
1153+
}
1154+
1155+
/**
1156+
* Build a metadata-only repair proposal for stale stored identity metadata.
1157+
*
1158+
* @param array<string,mixed> $base_row Shared reconciliation row data.
1159+
* @param array<string,mixed> $metadata Stored metadata.
1160+
* @param array<string,mixed> $classification Identity classification.
1161+
* @return array{proposal?:array<string,mixed>,skip?:array<string,mixed>}
1162+
*/
1163+
private function build_stale_worktree_identity_metadata_repair_row( array $base_row, array $metadata, array $classification ): array {
1164+
$handle = (string) ( $base_row['handle'] ?? '' );
1165+
$repo = (string) ( $base_row['repo'] ?? '' );
1166+
$branch = (string) ( $base_row['branch'] ?? '' );
1167+
$path = (string) ( $base_row['path'] ?? '' );
1168+
1169+
$dirty = $this->probe_worktree_dirty_count($path, self::CLEANUP_GIT_PROBE_TIMEOUT);
1170+
if ( is_wp_error($dirty) ) {
1171+
$diagnostic = $this->classify_worktree_git_probe_failure($handle, $repo, $path, $dirty, 'dirty-state probe', 'leaving stale identity metadata unchanged');
1172+
return array( 'skip' => array_merge($base_row, $classification, $diagnostic) );
1173+
}
1174+
1175+
$unpushed = $this->count_unpushed_commits($path);
1176+
if ( is_wp_error($unpushed) ) {
1177+
$diagnostic = $this->classify_worktree_git_probe_failure($handle, $repo, $path, $unpushed, 'cleanup safety probe', 'leaving stale identity metadata unchanged');
1178+
return array( 'skip' => array_merge($base_row, $classification, $diagnostic) );
1179+
}
1180+
1181+
if ( (int) $dirty > 0 || (int) $unpushed > 0 ) {
1182+
return array(
1183+
'skip' => array_merge(
1184+
$base_row,
1185+
$classification,
1186+
array(
1187+
'reason_code' => 'unsafe_stale_identity_metadata',
1188+
'reason' => 'stale identity metadata is repairable, but dirty or unpushed worktree state blocks automatic metadata writes',
1189+
'dirty' => (int) $dirty,
1190+
'unpushed' => (int) $unpushed,
1191+
)
1192+
),
1193+
);
1194+
}
1195+
1196+
$proposed = $metadata;
1197+
$source_map = array();
1198+
$this->set_reconciled_metadata_field($proposed, $source_map, 'handle', $handle, 'filesystem');
1199+
$this->set_reconciled_metadata_field($proposed, $source_map, 'repo', $repo, 'filesystem');
1200+
$this->set_reconciled_metadata_field($proposed, $source_map, 'branch', $branch, 'git');
1201+
$this->set_reconciled_metadata_field($proposed, $source_map, 'path', $path, 'git');
1202+
$this->set_reconciled_metadata_field($proposed, $source_map, 'observed_at', gmdate('c'), 'reconcile_run');
1203+
1204+
$created_at = '';
1205+
if ( ! empty($metadata['created_at']) && false !== strtotime((string) $metadata['created_at']) ) {
1206+
$created_at = gmdate('c', (int) strtotime((string) $metadata['created_at']));
1207+
$this->set_reconciled_metadata_field($proposed, $source_map, 'created_at', $created_at, 'metadata');
1208+
} else {
1209+
$mtime = file_exists($path) ? filemtime($path) : false;
1210+
if ( false !== $mtime ) {
1211+
$created_at = gmdate('c', (int) $mtime);
1212+
$this->set_reconciled_metadata_field($proposed, $source_map, 'created_at', $created_at, 'filesystem');
1213+
}
1214+
}
1215+
1216+
$state = isset($metadata['lifecycle_state']) ? WorktreeContextInjector::normalize_state((string) $metadata['lifecycle_state']) : null;
1217+
$this->set_reconciled_metadata_field($proposed, $source_map, 'lifecycle_state', $state ?? WorktreeContextInjector::STATE_ACTIVE, null === $state ? 'operator_plan' : 'metadata');
1218+
1219+
return array(
1220+
'proposal' => array_merge(
1221+
$base_row,
1222+
array(
1223+
'reason_code' => 'stale_identity_metadata',
1224+
'reason' => (string) $classification['reason'],
1225+
'dirty' => (int) $dirty,
1226+
'unpushed' => (int) $unpushed,
1227+
'identity_conflicts' => $base_row['identity_conflicts'] ?? array(),
1228+
'identity_classification' => 'stale_identity_metadata',
1229+
'proposed_source_of_truth' => $classification['proposed_source_of_truth'],
1230+
'next_command' => (string) $classification['next_command'],
1231+
'proposed_metadata' => $proposed,
1232+
'source_map' => $source_map,
1233+
)
1234+
),
1235+
);
1236+
}
1237+
10321238
/**
10331239
* Apply a reviewed metadata reconciliation plan after exact revalidation.
10341240
*

tests/smoke-worktree-metadata-reconcile.php

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -577,9 +577,9 @@ function () use ( $tmp ) {
577577
$assert(true, isset($active_rows['demo@dirty-active']['upstream_equivalence']['probe_timings_ms']['dirty_path_classification']), 'upstream equivalence includes dirty path classification timing');
578578
$assert(true, (int) ( $active_rows['demo@dirty-active']['upstream_equivalence']['dirty_paths']['inspected'] ?? 0 ) >= 1, 'batched dirty path classification preserves inspected path count');
579579

580-
class Inconsistent_Identity_Metadata_Workspace extends \DataMachineCode\Workspace\Workspace
581-
{
582-
private string $tmp;
580+
class Inconsistent_Identity_Metadata_Workspace extends \DataMachineCode\Workspace\Workspace
581+
{
582+
private string $tmp;
583583

584584
public function __construct( string $tmp )
585585
{
@@ -591,11 +591,56 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
591591
{
592592
return array(
593593
'success' => true,
594-
'worktrees' => array(
595-
array(
596-
'handle' => 'demo@feature-foo',
597-
'repo' => 'demo',
598-
'branch' => '',
594+
'worktrees' => array(
595+
array(
596+
'handle' => 'demo@unmanaged-missing',
597+
'repo' => 'demo',
598+
'branch' => 'unmanaged-missing',
599+
'path' => $this->tmp . '/demo@unmanaged-missing',
600+
'metadata' => array(
601+
'handle' => 'demo@unmanaged-missing',
602+
'repo' => 'demo',
603+
'branch' => 'old-unmanaged-missing',
604+
'path' => $this->tmp . '/demo@unmanaged-missing-old',
605+
'created_at' => '2026-04-01T00:00:00+00:00',
606+
'observed_at' => '2026-04-01T00:00:00+00:00',
607+
'lifecycle_state' => \DataMachineCode\Workspace\WorktreeContextInjector::STATE_ACTIVE,
608+
),
609+
),
610+
array(
611+
'handle' => 'demo@unmanaged-partial',
612+
'repo' => 'demo',
613+
'branch' => 'renamed/current',
614+
'path' => $this->tmp . '/demo@unmanaged-partial',
615+
'metadata' => array(
616+
'handle' => 'demo@unmanaged-partial',
617+
'repo' => 'demo',
618+
'branch' => 'unmanaged-partial',
619+
'path' => $this->tmp . '/demo@unmanaged-partial',
620+
'created_at' => '2026-04-01T00:00:00+00:00',
621+
'observed_at' => '2026-04-01T00:00:00+00:00',
622+
'lifecycle_state' => \DataMachineCode\Workspace\WorktreeContextInjector::STATE_ACTIVE,
623+
),
624+
),
625+
array(
626+
'handle' => 'demo@unmanaged-empty',
627+
'repo' => 'demo',
628+
'branch' => 'main',
629+
'path' => $this->tmp . '/demo@unmanaged-empty',
630+
'metadata' => array(
631+
'handle' => 'demo@unmanaged-empty',
632+
'repo' => 'demo',
633+
'branch' => 'unmanaged-empty',
634+
'path' => $this->tmp . '/demo@unmanaged-empty',
635+
'created_at' => '2026-04-01T00:00:00+00:00',
636+
'observed_at' => '2026-04-01T00:00:00+00:00',
637+
'lifecycle_state' => \DataMachineCode\Workspace\WorktreeContextInjector::STATE_ACTIVE,
638+
),
639+
),
640+
array(
641+
'handle' => 'demo@feature-foo',
642+
'repo' => 'demo',
643+
'branch' => '',
599644
'path' => $this->tmp . '/demo@unmanaged-missing',
600645
'metadata' => array(
601646
'handle' => 'demo@feature-foo',
@@ -610,13 +655,35 @@ public function worktree_list( ?string $repo = null, ?string $state = null, arra
610655
),
611656
);
612657
}
613-
}
658+
}
614659

615-
$identity_plan = ( new Inconsistent_Identity_Metadata_Workspace($tmp) )->worktree_reconcile_metadata(array( 'dry_run' => true ));
616-
$identity_skip = $identity_plan['skipped'][0] ?? array();
617-
$assert('inconsistent_identity_metadata', $identity_skip['reason_code'] ?? '', 'metadata reconciliation blocks inconsistent stored identity metadata explicitly');
618-
$assert(true, isset($identity_skip['identity_conflicts']['branch']), 'inconsistent identity skip includes branch mismatch diagnostics');
619-
$active_report_page = $ws->worktree_active_no_signal_report(array( 'limit' => 1, 'offset' => 0, 'internal_budget_label' => '1s', 'internal_budget_seconds' => 60, 'internal_budget_started' => microtime(true) ));
660+
$identity_plan = ( new Inconsistent_Identity_Metadata_Workspace($tmp) )->worktree_reconcile_metadata(array( 'dry_run' => true ));
661+
$identity_proposals = array();
662+
foreach ( (array) ( $identity_plan['proposals'] ?? array() ) as $row ) {
663+
$identity_proposals[ $row['handle'] ?? '' ] = $row;
664+
}
665+
$identity_skips = array();
666+
foreach ( (array) ( $identity_plan['skipped'] ?? array() ) as $row ) {
667+
$identity_skips[ $row['handle'] ?? '' ] = $row;
668+
}
669+
$stale_identity = $identity_proposals['demo@unmanaged-missing'] ?? array();
670+
$assert('stale_identity_metadata', $stale_identity['reason_code'] ?? '', 'metadata reconciliation proposes safe stale identity metadata repair');
671+
$assert('current_git_branch', $stale_identity['proposed_source_of_truth']['branch'] ?? '', 'stale identity proposal names git branch as branch source of truth');
672+
$assert('studio wp datamachine-code workspace worktree reconcile-metadata --apply --format=json', $stale_identity['next_command'] ?? '', 'stale identity proposal includes apply next command');
673+
$assert(0, (int) ( $stale_identity['dirty'] ?? -1 ), 'stale identity repair requires clean dirty safety gate');
674+
$assert(0, (int) ( $stale_identity['unpushed'] ?? -1 ), 'stale identity repair requires clean unpushed safety gate');
675+
$branch_renamed = $identity_skips['demo@unmanaged-partial'] ?? array();
676+
$assert('branch_renamed_worktree', $branch_renamed['reason_code'] ?? '', 'metadata reconciliation classifies branch-renamed worktrees');
677+
$assert('current_git_branch', $branch_renamed['proposed_source_of_truth']['branch'] ?? '', 'branch-renamed row names current git branch as proposed source of truth');
678+
$assert(true, str_contains($branch_renamed['next_command'] ?? '', 'workspace worktree add'), 'branch-renamed row includes a replacement worktree command');
679+
$default_checkout = $identity_skips['demo@unmanaged-empty'] ?? array();
680+
$assert('default_branch_checkout_in_feature_worktree', $default_checkout['reason_code'] ?? '', 'metadata reconciliation classifies default-branch checkout in feature worktree');
681+
$assert('operator_review_required', $default_checkout['proposed_source_of_truth']['branch'] ?? '', 'default checkout leaves branch source of truth to operator review');
682+
$assert(true, str_contains($default_checkout['next_command'] ?? '', 'switch <intended-feature-branch>'), 'default checkout includes branch switch next command');
683+
$manual_identity = $identity_skips['demo@feature-foo'] ?? array();
684+
$assert('manual_review_identity_metadata', $manual_identity['reason_code'] ?? '', 'metadata reconciliation leaves ambiguous identity conflicts for manual review');
685+
$assert(true, isset($manual_identity['identity_conflicts']['branch']), 'manual identity skip includes branch mismatch diagnostics');
686+
$active_report_page = $ws->worktree_active_no_signal_report(array( 'limit' => 1, 'offset' => 0, 'internal_budget_label' => '1s', 'internal_budget_seconds' => 60, 'internal_budget_started' => microtime(true) ));
620687
$assert(true, ! is_wp_error($active_report_page) && ( $active_report_page['success'] ?? false ), 'paginated active/no-signal report succeeds');
621688
$assert(true, str_contains($active_report_page['pagination']['next_command'] ?? '', 'active-no-signal-report --limit=1 --offset=1 --until-budget=1s --format=json'), 'active/no-signal report continuation preserves report operation');
622689
$budgeted_active_report = $ws->worktree_active_no_signal_report(array( 'limit' => 20, 'offset' => 0, 'internal_budget_label' => '1s', 'internal_budget_seconds' => 1, 'internal_budget_started' => microtime(true) - 1 ));

0 commit comments

Comments
 (0)