55namespace Workflow \V2 \Support ;
66
77use Carbon \CarbonInterface ;
8+ use Illuminate \Contracts \Cache \Repository as CacheRepository ;
9+ use Illuminate \Support \Facades \App ;
810
911final class HealthCheck
1012{
13+ public const CATEGORY_CORRECTNESS = 'correctness ' ;
14+
15+ public const CATEGORY_ACCELERATION = 'acceleration ' ;
16+
1117 /**
1218 * @return array<string, mixed>
1319 */
@@ -24,6 +30,7 @@ public static function snapshot(?CarbonInterface $now = null): array
2430 self ::taskTransportCheck ($ metrics ['tasks ' ] ?? [], $ metrics ['backlog ' ] ?? []),
2531 self ::durableResumePathCheck ($ metrics ['backlog ' ] ?? [], $ metrics ['repair ' ] ?? []),
2632 self ::workerCompatibilityCheck ($ metrics ['workers ' ] ?? []),
33+ self ::longPollWakeAccelerationCheck (),
2734 ];
2835 $ status = self ::status ($ checks );
2936
@@ -32,6 +39,7 @@ public static function snapshot(?CarbonInterface $now = null): array
3239 'status ' => $ status ,
3340 'healthy ' => $ status !== 'error ' ,
3441 'checks ' => $ checks ,
42+ 'categories ' => self ::categorySummary ($ checks ),
3543 'operator_metrics ' => $ metrics ,
3644 'structural_limits ' => StructuralLimits::snapshot (),
3745 ];
@@ -60,6 +68,7 @@ private static function backendCheck(array $backend): array
6068 $ supported
6169 ? 'The configured database, queue, and cache backends satisfy the v2 capability contract. '
6270 : 'One or more configured v2 backend capabilities are unsupported. ' ,
71+ self ::CATEGORY_CORRECTNESS ,
6372 [
6473 'issue_count ' => count ($ issues ),
6574 'issues ' => $ issues ,
@@ -81,6 +90,7 @@ private static function runSummaryProjectionCheck(array $projection): array
8190 $ needsRebuild === 0
8291 ? 'Run-summary projections are aligned with durable v2 runs. '
8392 : 'Run-summary projections are missing, stale, schema-outdated, or orphaned; rebuild them before trusting Waterline lists. ' ,
93+ self ::CATEGORY_CORRECTNESS ,
8494 [
8595 'needs_rebuild ' => $ needsRebuild ,
8696 'missing ' => self ::integer ($ projection ['missing ' ] ?? 0 ),
@@ -120,6 +130,7 @@ private static function selectedRunProjectionCheck(array $projections): array
120130 $ needsRebuild === 0
121131 ? 'Selected-run wait, timeline, timer, and lineage projections are aligned with durable v2 detail. '
122132 : 'Selected-run wait, timeline, timer, or lineage projections need rebuild before trusting Waterline detail. ' ,
133+ self ::CATEGORY_CORRECTNESS ,
123134 [
124135 'needs_rebuild ' => $ needsRebuild ,
125136 'run_waits_needs_rebuild ' => $ waitNeedsRebuild ,
@@ -160,6 +171,7 @@ private static function historyRetentionInvariantCheck(array $history): array
160171 $ orphaned === 0
161172 ? 'Workflow history events all reference retained workflow runs. '
162173 : 'Workflow history events exist without retained workflow runs; retention cleanup must reconcile them. ' ,
174+ self ::CATEGORY_CORRECTNESS ,
163175 [
164176 'history_orphan_total ' => $ orphaned ,
165177 'events ' => self ::integer ($ history ['events ' ] ?? 0 ),
@@ -181,6 +193,7 @@ private static function commandContractCheck(array $metrics): array
181193 $ needed === 0
182194 ? 'WorkflowStarted command-contract snapshots are complete. '
183195 : 'Some WorkflowStarted command-contract snapshots need backfill before operators can trust command forms. ' ,
196+ self ::CATEGORY_CORRECTNESS ,
184197 [
185198 'backfill_needed_runs ' => $ needed ,
186199 'backfill_available_runs ' => self ::integer ($ metrics ['backfill_available_runs ' ] ?? 0 ),
@@ -204,6 +217,7 @@ private static function taskTransportCheck(array $tasks, array $backlog): array
204217 $ unhealthyTasks === 0
205218 ? 'No unhealthy durable task transport state is currently projected. '
206219 : 'One or more durable tasks have unhealthy transport, claim, dispatch, or lease state. ' ,
220+ self ::CATEGORY_CORRECTNESS ,
207221 [
208222 'unhealthy_tasks ' => $ unhealthyTasks ,
209223 'repair_needed_runs ' => self ::integer ($ backlog ['repair_needed_runs ' ] ?? 0 ),
@@ -228,6 +242,7 @@ private static function durableResumePathCheck(array $backlog, array $repair): a
228242 $ repairNeededRuns === 0
229243 ? 'Every open v2 run has a projected durable resume path. '
230244 : 'One or more open v2 runs are missing their durable next-resume source and need repair. ' ,
245+ self ::CATEGORY_CORRECTNESS ,
231246 [
232247 'repair_needed_runs ' => $ repairNeededRuns ,
233248 'missing_task_candidates ' => self ::integer ($ repair ['missing_task_candidates ' ] ?? 0 ),
@@ -258,6 +273,7 @@ private static function workerCompatibilityCheck(array $workers): array
258273 $ required === null
259274 ? 'No current v2 compatibility marker is required. '
260275 : 'At least one active worker heartbeat advertises the current v2 compatibility marker. ' ,
276+ self ::CATEGORY_CORRECTNESS ,
261277 [
262278 'required_compatibility ' => $ required ,
263279 'active_workers ' => self ::integer ($ workers ['active_workers ' ] ?? 0 ),
@@ -271,6 +287,7 @@ private static function workerCompatibilityCheck(array $workers): array
271287 'worker_compatibility ' ,
272288 'warning ' ,
273289 'No active worker heartbeat advertises the current v2 compatibility marker. ' ,
290+ self ::CATEGORY_CORRECTNESS ,
274291 [
275292 'required_compatibility ' => $ required ,
276293 'active_workers ' => self ::integer ($ workers ['active_workers ' ] ?? 0 ),
@@ -280,6 +297,112 @@ private static function workerCompatibilityCheck(array $workers): array
280297 );
281298 }
282299
300+ /**
301+ * Acceleration-layer health for the long-poll wake surface.
302+ *
303+ * The wake layer is optional by contract: correctness continues even
304+ * when this check reports `warning`. The check exists so operators
305+ * can answer "is the acceleration layer propagating?" as a separate
306+ * question from "is work being discovered?".
307+ *
308+ * @return array<string, mixed>
309+ */
310+ private static function longPollWakeAccelerationCheck (): array
311+ {
312+ $ multiNode = (bool ) config ('workflows.v2.long_poll.multi_node ' , false );
313+ $ data = [
314+ 'multi_node ' => $ multiNode ,
315+ 'backend ' => null ,
316+ 'capable ' => null ,
317+ 'safe ' => null ,
318+ 'reason ' => null ,
319+ ];
320+
321+ $ cache = self ::resolveCacheRepository ();
322+
323+ if ($ cache === null ) {
324+ return self ::check (
325+ 'long_poll_wake_acceleration ' ,
326+ 'warning ' ,
327+ 'Cache repository is not resolvable; wake acceleration may be disabled. Durable discovery continues via bounded polling. ' ,
328+ self ::CATEGORY_ACCELERATION ,
329+ $ data ,
330+ );
331+ }
332+
333+ $ validator = new LongPollCacheValidator ();
334+ $ capability = $ validator ->validateMultiNodeCapable ($ cache );
335+ $ safety = $ validator ->checkMultiNodeSafety ($ cache , $ multiNode );
336+
337+ $ data ['backend ' ] = is_string ($ capability ['backend ' ] ?? null ) ? $ capability ['backend ' ] : null ;
338+ $ data ['capable ' ] = (bool ) ($ capability ['capable ' ] ?? false );
339+ $ data ['safe ' ] = (bool ) ($ safety ['safe ' ] ?? true );
340+ $ data ['reason ' ] = is_string ($ safety ['message ' ] ?? null )
341+ ? $ safety ['message ' ]
342+ : (is_string ($ capability ['reason ' ] ?? null ) ? $ capability ['reason ' ] : null );
343+
344+ if ($ data ['safe ' ] === true ) {
345+ return self ::check (
346+ 'long_poll_wake_acceleration ' ,
347+ 'ok ' ,
348+ $ multiNode
349+ ? 'Wake acceleration backend is multi-node capable; dispatch discovery benefits from sub-second signalling. '
350+ : 'Wake acceleration backend is configured; dispatch discovery benefits from sub-second signalling. ' ,
351+ self ::CATEGORY_ACCELERATION ,
352+ $ data ,
353+ );
354+ }
355+
356+ return self ::check (
357+ 'long_poll_wake_acceleration ' ,
358+ 'warning ' ,
359+ $ data ['reason ' ] ?? 'Wake acceleration layer is degraded; durable discovery continues via bounded polling. ' ,
360+ self ::CATEGORY_ACCELERATION ,
361+ $ data ,
362+ );
363+ }
364+
365+ private static function resolveCacheRepository (): ?CacheRepository
366+ {
367+ try {
368+ return App::make (CacheRepository::class);
369+ } catch (\Throwable ) {
370+ return null ;
371+ }
372+ }
373+
374+ /**
375+ * Summarize check status per category so operators can answer
376+ * "is work being discovered?" (correctness) and "is the
377+ * acceleration layer propagating?" (acceleration) as separate
378+ * questions without re-aggregating the check list.
379+ *
380+ * @param list<array<string, mixed>> $checks
381+ * @return array<string, array<string, mixed>>
382+ */
383+ private static function categorySummary (array $ checks ): array
384+ {
385+ $ categories = [
386+ self ::CATEGORY_CORRECTNESS => [],
387+ self ::CATEGORY_ACCELERATION => [],
388+ ];
389+
390+ foreach ($ checks as $ check ) {
391+ $ category = $ check ['category ' ] ?? self ::CATEGORY_CORRECTNESS ;
392+ $ categories [$ category ][] = $ check ;
393+ }
394+
395+ $ summaries = [];
396+ foreach ($ categories as $ name => $ entries ) {
397+ $ summaries [$ name ] = [
398+ 'status ' => self ::status ($ entries ),
399+ 'check_count ' => count ($ entries ),
400+ ];
401+ }
402+
403+ return $ summaries ;
404+ }
405+
283406 /**
284407 * @param list<array<string, mixed>> $checks
285408 */
@@ -302,11 +425,12 @@ private static function status(array $checks): string
302425 * @param array<string, mixed> $data
303426 * @return array<string, mixed>
304427 */
305- private static function check (string $ name , string $ status , string $ message , array $ data ): array
428+ private static function check (string $ name , string $ status , string $ message , string $ category , array $ data ): array
306429 {
307430 return [
308431 'name ' => $ name ,
309432 'status ' => $ status ,
433+ 'category ' => $ category ,
310434 'message ' => $ message ,
311435 'data ' => $ data ,
312436 ];
0 commit comments