Skip to content

Commit c249d9b

Browse files
authored
FEATURE: add show all errors log to aggregate errors from all workers (#50)
1 parent 0528a33 commit c249d9b

6 files changed

Lines changed: 447 additions & 75 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flowpack\DecoupledContentStore\BackendUi\Dto;
6+
7+
use Flowpack\DecoupledContentStore\BackendUi\WorkerErrorLogAggregator;
8+
use Neos\Eel\ProtectedContextAwareInterface;
9+
use Neos\Flow\Annotations as Flow;
10+
11+
/**
12+
* @Flow\Proxy(false)
13+
*/
14+
final class WorkerErrorLog
15+
{
16+
public bool $wasKilledByOrchestrator;
17+
18+
/**
19+
* @param string[] $errorBlocks
20+
*/
21+
public function __construct(
22+
public string $workerName,
23+
public string $status,
24+
public int $exitCode,
25+
public ?string $taskError,
26+
public array $errorBlocks,
27+
public ?string $lastAttemptedNode = null
28+
) {
29+
$this->wasKilledByOrchestrator = $this->exitCode === WorkerErrorLogAggregator::EXIT_CODE_SIGTERM;
30+
}
31+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flowpack\DecoupledContentStore\BackendUi;
6+
7+
use Neos\Flow\Annotations as Flow;
8+
9+
/**
10+
* @Flow\Scope("singleton")
11+
*/
12+
class RenderingErrorExtractor
13+
{
14+
/**
15+
* Extracts ERROR-prefixed lines and exception blocks from a raw log string.
16+
*
17+
* Log format produced by ContentReleaseLogger:
18+
* - error() => single line "[Renderer X] ERROR <message> [<json>]"
19+
* - logException() => one writeln with "<header>\n\n<msg>\n\n<trace>\n\n<json>"
20+
*
21+
* Strategy: split on blank lines into paragraphs; keep paragraphs that
22+
* contain an ERROR-prefixed line or PHP stack-trace markers, and also
23+
* include the paragraph immediately before a stack-trace paragraph (that
24+
* paragraph carries the exception message in logException output).
25+
*
26+
* @return string[]
27+
*/
28+
public function extractErrorBlocks(string $log): array
29+
{
30+
$log = trim($log);
31+
if ($log === '') {
32+
return [];
33+
}
34+
35+
$paragraphs = preg_split('/\n\s*\n/', $log) ?: [];
36+
$hits = [];
37+
foreach ($paragraphs as $index => $paragraph) {
38+
$hasError = (bool)preg_match('/(?:^|] )ERROR /m', $paragraph);
39+
$hasTrace = (bool)preg_match('/^#\d+ /m', $paragraph);
40+
if (!$hasError && !$hasTrace) {
41+
continue;
42+
}
43+
if ($hasTrace && $index > 0 && !isset($hits[$index - 1])) {
44+
$previous = $paragraphs[$index - 1];
45+
if (trim($previous) !== '') {
46+
$hits[$index - 1] = $previous;
47+
}
48+
}
49+
$hits[$index] = $paragraph;
50+
}
51+
52+
if ($hits !== []) {
53+
return $hits;
54+
}
55+
56+
// No structured ERROR/trace matched. Drop INFO/DEBUG/NOTICE/WARN lines
57+
// (with or without an optional "[prefix] " section) plus a few known
58+
// operational status messages, and return whatever remains as a single
59+
// block — covers logs that use unfamiliar formats but still mark their
60+
// noise levels.
61+
$kept = [];
62+
foreach (explode("\n", $log) as $line) {
63+
if (preg_match('/^\s*(?:\[[^]]+]\s*)?(?:INFO|DEBUG|NOTICE|WARN(?:ING)?)\b/', $line)) {
64+
continue;
65+
}
66+
if (preg_match('/Restarting render worker\.?/', $line)) {
67+
continue;
68+
}
69+
$kept[] = $line;
70+
}
71+
$remaining = trim(implode("\n", $kept));
72+
return $remaining === '' ? [] : [$remaining];
73+
}
74+
75+
/**
76+
* Returns the last node reference found in a worker log — the one the
77+
* worker was rendering when it failed.
78+
*
79+
* NodeRenderer logs a DEBUG line "Rendering document node variant" before
80+
* each render and a logException(...) entry on failure; both include the
81+
* node identifier in the JSON payload. The very last "node":"..." match in
82+
* the log is therefore the most likely failing node, whether the worker
83+
* threw an exception or got killed mid-render.
84+
*/
85+
public function extractLastAttemptedNode(string $log): ?string
86+
{
87+
if (trim($log) === '') {
88+
return null;
89+
}
90+
91+
if (!preg_match_all(
92+
'/"node"\s*:\s*"((?:\\\\.|[^"\\\\])*)"(?:\s*,\s*"nodeUri"\s*:\s*"((?:\\\\.|[^"\\\\])*)")?/',
93+
$log,
94+
$matches,
95+
PREG_SET_ORDER
96+
)) {
97+
return null;
98+
}
99+
100+
$last = end($matches);
101+
$node = $this->decodeJsonString($last[1]);
102+
$uri = isset($last[2]) ? $this->decodeJsonString($last[2]) : '';
103+
return $uri !== '' ? sprintf('%s | %s', $node, $uri) : $node;
104+
}
105+
106+
private function decodeJsonString(string $raw): string
107+
{
108+
$decoded = json_decode('"' . $raw . '"');
109+
return is_string($decoded) ? $decoded : str_replace('\\/', '/', $raw);
110+
}
111+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flowpack\DecoupledContentStore\BackendUi;
6+
7+
use Flowpack\DecoupledContentStore\BackendUi\Dto\WorkerErrorLog;
8+
use Flowpack\Prunner\Dto\Job;
9+
use Flowpack\Prunner\Dto\TaskResult;
10+
use Flowpack\Prunner\PrunnerApiService;
11+
use Neos\Flow\Annotations as Flow;
12+
13+
/**
14+
* @Flow\Scope("singleton")
15+
*/
16+
class WorkerErrorLogAggregator
17+
{
18+
/**
19+
* @Flow\Inject
20+
* @var PrunnerApiService
21+
*/
22+
protected $prunnerApiService;
23+
24+
/**
25+
* @Flow\Inject
26+
* @var RenderingErrorExtractor
27+
*/
28+
protected $renderingErrorExtractor;
29+
30+
/**
31+
* Exit code 143 = 128 + SIGTERM(15): orchestrator killed this worker after another one failed.
32+
*/
33+
public const EXIT_CODE_SIGTERM = 143;
34+
35+
/**
36+
* @return WorkerErrorLog[]
37+
*/
38+
public function aggregate(Job $job): array
39+
{
40+
$renderTasks = $job->getTaskResults()
41+
->filteredByPrefix('render_')
42+
->withoutTasks('render_finished', 'render_orchestrator');
43+
44+
$erroredTasks = [];
45+
foreach ($renderTasks as $task) {
46+
if ($task->getStatus() === TaskResult::STATUS_ERROR) {
47+
$erroredTasks[] = $task;
48+
}
49+
}
50+
51+
if ($erroredTasks === []) {
52+
$orchestrator = $job->getTaskResults()->get('render_orchestrator');
53+
if ($orchestrator !== null && $orchestrator->getStatus() === TaskResult::STATUS_ERROR) {
54+
$erroredTasks[] = $orchestrator;
55+
}
56+
}
57+
58+
// Real failures (non-SIGTERM) carry the actual error — show them first.
59+
usort($erroredTasks, static function (TaskResult $a, TaskResult $b): int {
60+
return ($a->getExitCode() === self::EXIT_CODE_SIGTERM ? 1 : 0)
61+
<=> ($b->getExitCode() === self::EXIT_CODE_SIGTERM ? 1 : 0);
62+
});
63+
64+
$result = [];
65+
foreach ($erroredTasks as $task) {
66+
$blocks = [];
67+
$lastAttemptedNode = null;
68+
if ($task->getExitCode() !== self::EXIT_CODE_SIGTERM) {
69+
$logs = $this->prunnerApiService->loadJobLogs($job->getId(), $task->getName());
70+
$combined = $logs->getStderr() . "\n" . $logs->getStdout();
71+
$blocks = $this->renderingErrorExtractor->extractErrorBlocks($combined);
72+
$lastAttemptedNode = $this->renderingErrorExtractor->extractLastAttemptedNode($combined);
73+
}
74+
75+
$result[] = new WorkerErrorLog(
76+
$task->getName(),
77+
$task->getStatus(),
78+
$task->getExitCode(),
79+
$task->getError() ?: null,
80+
$blocks,
81+
$lastAttemptedNode
82+
);
83+
}
84+
85+
return $result;
86+
}
87+
}

Classes/Controller/BackendController.php

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
namespace Flowpack\DecoupledContentStore\Controller;
33

44
use Flowpack\DecoupledContentStore\BackendUi\BackendUiDataService;
5+
use Flowpack\DecoupledContentStore\BackendUi\WorkerErrorLogAggregator;
56
use Flowpack\DecoupledContentStore\ContentReleaseManager;
67
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\ContentReleaseIdentifier;
78
use Flowpack\DecoupledContentStore\Core\Domain\ValueObject\PrunnerJobId;
@@ -40,6 +41,12 @@ class BackendController extends \Neos\Flow\Mvc\Controller\ActionController
4041
*/
4142
protected $backendUiDataService;
4243

44+
/**
45+
* @Flow\Inject
46+
* @var WorkerErrorLogAggregator
47+
*/
48+
protected $workerErrorLogAggregator;
49+
4350
/**
4451
* @Flow\Inject
4552
* @var RedisClientManager
@@ -109,7 +116,7 @@ public function indexAction(?string $contentStore = null)
109116
$this->view->assign('showToggleConfigEpochButton', $showToggleConfigEpochButton);
110117
}
111118

112-
public function detailsAction(string $contentReleaseIdentifier, ?string $contentStore = null, ?string $detailTaskName = '', ?string $prunnerJobId = '')
119+
public function detailsAction(string $contentReleaseIdentifier, ?string $contentStore = null, ?string $detailTaskName = '', ?string $prunnerJobId = '', bool $showAllRenderingErrors = false)
113120
{
114121
$contentReleaseIdentifier = ContentReleaseIdentifier::fromString($contentReleaseIdentifier);
115122
$contentStore = $contentStore ? RedisInstanceIdentifier::fromString($contentStore) : RedisInstanceIdentifier::primary();
@@ -125,6 +132,8 @@ public function detailsAction(string $contentReleaseIdentifier, ?string $content
125132
if ($detailTaskName !== '') {
126133
$this->view->assign('detailTaskName', $detailTaskName);
127134
$this->view->assign('jobLogs', $this->prunnerApiService->loadJobLogs($prunnerJobId ? PrunnerJobId::fromString($prunnerJobId)->toJobId() : $detailsData->getJob()->getId(), $detailTaskName));
135+
} elseif ($showAllRenderingErrors && $detailsData->getJob() !== null) {
136+
$this->view->assign('workerErrorLogs', $this->workerErrorLogAggregator->aggregate($detailsData->getJob()));
128137
}
129138
}
130139

Resources/Private/BackendFusion/Integration/Backend.Details.fusion

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ Flowpack.DecoupledContentStore.BackendController.details = Neos.Fusion:Component
5353
<p @if.notData={!detailsData}>
5454
No data exists for this release in Redis.
5555
</p>
56+
<Flowpack.DecoupledContentStore:AggregatedRenderingErrors
57+
@if.hasErrorLogs={workerErrorLogs}
58+
workerErrorLogs={workerErrorLogs}
59+
/>
5660
</div>
5761
<Flowpack.DecoupledContentStore:DetailsFooter />
5862
`
@@ -137,6 +141,10 @@ prototype(Flowpack.DecoupledContentStore:ContentReleaseSteps) < prototype(Neos.F
137141
</Neos.Fusion:Loop>
138142
</div>
139143
<div class="pl-5 pr-10 py-2 flex justify-end">
144+
<Flowpack.DecoupledContentStore:ContentReleaseSteps.AllRenderingErrorsButton
145+
@if.hasError={props._taskResults.filteredByPrefix('render_').aggregatedStatus == 'error'}
146+
prunnerJobId={props.prunnerJobId}
147+
/>
140148
<Flowpack.DecoupledContentStore:ContentReleaseSteps.StepTask
141149
task={props._taskResults.get('render_orchestrator')}
142150
title="orchestrator"
@@ -330,6 +338,69 @@ prototype(Flowpack.DecoupledContentStore:ContentReleaseSteps.StepTask) < prototy
330338
`
331339
}
332340

341+
prototype(Flowpack.DecoupledContentStore:ContentReleaseSteps.AllRenderingErrorsButton) < prototype(Neos.Fusion:Component) {
342+
prunnerJobId = ''
343+
344+
_href = Neos.Fusion:UriBuilder {
345+
action = 'details'
346+
arguments.showAllRenderingErrors = 1
347+
arguments.detailTaskName = ''
348+
arguments.prunnerJobId = ${props.prunnerJobId}
349+
addQueryString = true
350+
}
351+
352+
renderer = afx`
353+
<a href={props._href} title="Show ERROR logs from all failed workers"
354+
class="inline-flex items-center mx-1 px-2.5 py-0.5 rounded-full border-2 text-xs font-medium text-white bg-red-500 border-red-500">
355+
all errors
356+
</a>
357+
`
358+
}
359+
360+
prototype(Flowpack.DecoupledContentStore:AggregatedRenderingErrors) < prototype(Neos.Fusion:Component) {
361+
workerErrorLogs = null
362+
363+
renderer = afx`
364+
<h2 class="text-3xl py-5">Rendering errors across all workers</h2>
365+
<Neos.Fusion:Loop items={props.workerErrorLogs} itemName="workerErrorLog">
366+
<Flowpack.DecoupledContentStore:AggregatedRenderingErrors.WorkerEntry workerErrorLog={workerErrorLog} />
367+
</Neos.Fusion:Loop>
368+
`
369+
}
370+
371+
prototype(Flowpack.DecoupledContentStore:AggregatedRenderingErrors.WorkerEntry) < prototype(Neos.Fusion:Component) {
372+
workerErrorLog = null
373+
374+
_statusClass = ${this.workerErrorLog.status}
375+
_statusClass.@process.replace = Flowpack.DecoupledContentStore:StatusToClassMapping
376+
377+
renderer = afx`
378+
<div class={(props.workerErrorLog.wasKilledByOrchestrator ? 'my-1 p-2 ' : 'my-4 p-4 ') + 'bg-white shadow rounded-lg text-gray-900'}>
379+
<div class="flex items-center mb-2">
380+
<span class={"inline-flex items-center mr-2 px-2.5 py-0.5 rounded-full border-2 text-xs font-medium text-white " + props._statusClass}>
381+
{props.workerErrorLog.status}
382+
</span>
383+
<span class="font-mono text-sm text-gray-900 mr-2">{props.workerErrorLog.workerName}</span>
384+
<span class="text-xs text-gray-500">exit {props.workerErrorLog.exitCode}</span>
385+
<span @if.killed={props.workerErrorLog.wasKilledByOrchestrator} class="ml-2 text-xs text-gray-500 italic">
386+
killed by orchestrator after another worker failed
387+
</span>
388+
</div>
389+
<p @if.hasTaskError={props.workerErrorLog.taskError && !props.workerErrorLog.wasKilledByOrchestrator} class="text-sm text-red-700 mb-2">{props.workerErrorLog.taskError}</p>
390+
<div @if.hasNode={props.workerErrorLog.lastAttemptedNode} class="mb-3 p-3 bg-red-100 border-l-4 border-red-500 rounded">
391+
<p class="text-xs font-semibold text-red-800 mb-1 uppercase tracking-wide">Last node attempted before failure</p>
392+
<p class="text-sm text-gray-900 font-mono break-all">{props.workerErrorLog.lastAttemptedNode}</p>
393+
</div>
394+
<Neos.Fusion:Loop items={props.workerErrorLog.errorBlocks} itemName="block">
395+
<pre class="text-gray-900 bg-gray-100 p-3 rounded text-xs overflow-x-auto whitespace-pre-wrap mb-2">{block}</pre>
396+
</Neos.Fusion:Loop>
397+
<p @if.noBlocks={!props.workerErrorLog.errorBlocks && !props.workerErrorLog.wasKilledByOrchestrator && !props.workerErrorLog.taskError} class="text-sm text-gray-500 italic">
398+
No log output from this worker.
399+
</p>
400+
</div>
401+
`
402+
}
403+
333404
prototype(Flowpack.DecoupledContentStore:ContentReleaseStats.StatsBox) < prototype(Neos.Fusion:Component) {
334405
renderer = afx`
335406
<div class="flex-1 mr-5 my-5 bg-white shadow rounded-lg overflow-hidden py-3 px-6">

0 commit comments

Comments
 (0)