Skip to content

Commit 0ce6195

Browse files
Improve worktree cleanup at scale (#751)
* fix: improve worktree cleanup at scale * fix: satisfy cleanup lint --------- Co-authored-by: homeboy-ci[bot] <266378653+homeboy-ci[bot]@users.noreply.github.com>
1 parent f6b3685 commit 0ce6195

114 files changed

Lines changed: 364 additions & 26974 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

inc/Workspace/WorkspaceCoreUtilities.php

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,11 +874,22 @@ private function count_unpushed_commits( string $wt_path, int $timeout_seconds =
874874
return 0;
875875
}
876876

877+
$status = $this->run_git($wt_path, 'status --porcelain=v1 --branch --untracked-files=no', $timeout_seconds);
878+
if ( is_wp_error($status) ) {
879+
return $this->is_git_timeout_error($status) ? $status : 0;
880+
}
881+
882+
$header = strtok( (string) ( $status['output'] ?? '' ), "\n");
883+
$header = false === $header ? '' : trim($header);
884+
if ( preg_match('/ahead (\d+)/', $header, $ahead_match) ) {
885+
return (int) $ahead_match[1];
886+
}
887+
877888
// Prefer `@{push}` (respects push.default / push.remote mapping); fall
878889
// back to `@{upstream}` for the common case where they're the same.
879-
// Both expand to the tracked remote ref; if that ref is gone, this
880-
// returns non-zero exit and we can't compute unpushed — treat as 0
881-
// and let dirty / merge-signal checks handle it.
890+
// Both expand to the tracked remote ref. If that ref is gone, fall back
891+
// to the status header above: explicit ahead counts are protected, while
892+
// clean upstream-gone worktrees can still use marker-independent cleanup.
882893
$commands = array(
883894
'rev-list --count @{push}..HEAD',
884895
'rev-list --count @{upstream}..HEAD',
@@ -899,6 +910,10 @@ private function count_unpushed_commits( string $wt_path, int $timeout_seconds =
899910
}
900911
}
901912

913+
if ( ! str_contains($header, 'gone') && ! preg_match('/^## [^.\[]+$/', $header) ) {
914+
return 0;
915+
}
916+
902917
return 0;
903918
}
904919

inc/Workspace/WorkspaceHygieneReport.php

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ public function workspace_hygiene_report( array $opts = array() ): array|\WP_Err
8585
'generated_at' => gmdate('c'),
8686
'workspace_path' => $this->workspace_path,
8787
'destructive' => false,
88+
'fast_stats' => $this->build_workspace_fast_stats($worktrees, $cleanup, $size_report, $include_worktree_status),
8889
'size' => $size_report,
8990
'disk' => $this->build_workspace_disk_report(),
9091
'inventory' => array(
@@ -397,7 +398,8 @@ private function build_workspace_inventory_rows(): array {
397398
'branch_slug' => $parsed['branch_slug'],
398399
'branch' => is_array($metadata) && ! empty($metadata['branch']) ? (string) $metadata['branch'] : (string) ( $parsed['branch_slug'] ?? '' ),
399400
'path' => $path,
400-
'dirty' => 0,
401+
'dirty' => null,
402+
'git_marker_state' => $this->workspace_entry_git_marker_state($kind, $path),
401403
'created_at' => is_array($metadata) ? ( $metadata['created_at'] ?? null ) : null,
402404
'lifecycle_state' => is_array($metadata) ? ( $metadata['lifecycle_state'] ?? null ) : null,
403405
'pr_url' => is_array($metadata) ? ( $metadata['pr_url'] ?? null ) : null,
@@ -710,6 +712,109 @@ private function summarize_workspace_cleanup( ?array $cleanup, ?array $error, ar
710712
);
711713
}
712714

715+
/**
716+
* Build a cheap high-volume summary before expensive status probes finish.
717+
*
718+
* @param array<int,array> $worktrees Cheap or full worktree rows.
719+
* @param array<string,mixed>|null $cleanup Inventory cleanup report.
720+
* @param array<string,mixed> $size_report Bounded top-level size report.
721+
* @param bool $include_worktree_status Whether dirty probes ran.
722+
* @return array<string,mixed>
723+
*/
724+
private function build_workspace_fast_stats( array $worktrees, ?array $cleanup, array $size_report, bool $include_worktree_status ): array {
725+
$cleanup_candidates = (array) ( $cleanup['candidates'] ?? array() );
726+
$cleanup_summary = (array) ( $cleanup['summary'] ?? array() );
727+
$safe_handles = array();
728+
foreach ( $cleanup_candidates as $candidate ) {
729+
$handle = is_array($candidate) ? (string) ( $candidate['handle'] ?? '' ) : '';
730+
if ( '' !== $handle ) {
731+
$safe_handles[ $handle ] = true;
732+
}
733+
}
734+
735+
$counts = array(
736+
'total_candidates' => count($worktrees),
737+
'safe_removable_count' => count($cleanup_candidates),
738+
'valid_clean_count' => 0,
739+
'valid_dirty_count' => 0,
740+
'invalid_broken_orphan_count' => 0,
741+
'unmanaged_skipped_count' => 0,
742+
'dirty_probe_skipped_count' => 0,
743+
'known_worktree_count' => 0,
744+
'known_primary_count' => 0,
745+
);
746+
747+
foreach ( $worktrees as $row ) {
748+
$kind = (string) ( $row['kind'] ?? '' );
749+
$is_primary = ! empty($row['is_primary']);
750+
$is_worktree = ! empty($row['is_worktree']);
751+
$marker_state = (string) ( $row['git_marker_state'] ?? ( $is_worktree || $is_primary ? 'unknown' : 'unmanaged' ) );
752+
753+
if ( $is_primary ) {
754+
++$counts['known_primary_count'];
755+
if ( ! in_array($marker_state, array( 'primary_git_dir', 'unknown' ), true) ) {
756+
++$counts['invalid_broken_orphan_count'];
757+
}
758+
continue;
759+
}
760+
761+
if ( ! $is_worktree ) {
762+
if ( in_array($kind, array( 'artifact', 'other' ), true) ) {
763+
++$counts['unmanaged_skipped_count'];
764+
}
765+
continue;
766+
}
767+
768+
++$counts['known_worktree_count'];
769+
if ( ! in_array($marker_state, array( 'worktree_git_file', 'unknown' ), true) ) {
770+
++$counts['invalid_broken_orphan_count'];
771+
continue;
772+
}
773+
774+
$dirty = $row['dirty'] ?? null;
775+
if ( null === $dirty ) {
776+
++$counts['dirty_probe_skipped_count'];
777+
if ( isset($safe_handles[ (string) ( $row['handle'] ?? '' ) ]) ) {
778+
++$counts['valid_clean_count'];
779+
}
780+
continue;
781+
}
782+
783+
if ( (int) $dirty > 0 ) {
784+
++$counts['valid_dirty_count'];
785+
} else {
786+
++$counts['valid_clean_count'];
787+
}
788+
}
789+
790+
$blocked_dirty = (int) ( $cleanup_summary['skipped_by_reason']['dirty_worktree'] ?? 0 ) + (int) ( $cleanup_summary['skipped_by_reason']['unpushed_commits'] ?? 0 );
791+
if ( $blocked_dirty > $counts['valid_dirty_count'] ) {
792+
$counts['valid_dirty_count'] = $blocked_dirty;
793+
}
794+
795+
$estimated_reclaimable = (int) ( $cleanup_summary['total_size_bytes'] ?? 0 );
796+
if ( $estimated_reclaimable <= 0 ) {
797+
foreach ( $cleanup_candidates as $candidate ) {
798+
$estimated_reclaimable += max(0, (int) ( is_array($candidate) ? ( $candidate['size_bytes'] ?? 0 ) : 0 ));
799+
}
800+
}
801+
802+
return array(
803+
'mode' => $include_worktree_status ? 'full_git_status' : 'cheap_metadata_first',
804+
'partial' => ! $include_worktree_status || empty($size_report['scan_complete']),
805+
'status_probe_required_for_summary' => false,
806+
'counts' => $counts,
807+
'estimated_reclaimable_bytes' => $estimated_reclaimable,
808+
'estimated_reclaimable_human' => $this->format_bytes($estimated_reclaimable),
809+
'top_disk_consumers' => array_slice( (array) ( $size_report['top_entries'] ?? array() ), 0, 10),
810+
'progress' => array(
811+
'size_scanned_entries' => (int) ( $size_report['scanned_entries'] ?? 0 ),
812+
'size_total_entries' => (int) ( $size_report['total_entries'] ?? count($worktrees) ),
813+
'size_scan_complete' => (bool) ( $size_report['scan_complete'] ?? true ),
814+
),
815+
);
816+
}
817+
713818
/**
714819
* Report whether DB cleanup storage tables are available for retention hooks.
715820
*
@@ -944,6 +1049,32 @@ private function classify_workspace_entry_kind( string $entry, array $parsed, st
9441049
return '' !== (string) ( $parsed['repo'] ?? '' ) ? 'primary' : 'other';
9451050
}
9461051

1052+
/**
1053+
* Classify a top-level entry's git marker using filesystem metadata only.
1054+
*
1055+
* @param string $kind Entry kind.
1056+
* @param string $path Entry path.
1057+
* @return string
1058+
*/
1059+
private function workspace_entry_git_marker_state( string $kind, string $path ): string {
1060+
$marker = rtrim($path, '/') . '/.git';
1061+
if ( 'worktree' === $kind ) {
1062+
if ( is_file($marker) ) {
1063+
return 'worktree_git_file';
1064+
}
1065+
if ( is_dir($marker) ) {
1066+
return 'primary_git_dir_in_worktree_slot';
1067+
}
1068+
return 'missing_git_marker';
1069+
}
1070+
1071+
if ( 'primary' === $kind ) {
1072+
return is_dir($marker) ? 'primary_git_dir' : 'missing_git_marker';
1073+
}
1074+
1075+
return 'unmanaged';
1076+
}
1077+
9471078
/**
9481079
* Format bytes for reports.
9491080
*

0 commit comments

Comments
 (0)