Skip to content

Commit e629419

Browse files
diagnose
1 parent 6cd0e96 commit e629419

2 files changed

Lines changed: 87 additions & 10 deletions

File tree

packages/devextreme/docker-ci.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,16 +154,23 @@ function start_runner_watchdog {
154154
local last_suite_time_file="$PWD/testing/LastSuiteTime.txt"
155155
local raw_log_file="$PWD/testing/RawLog.txt"
156156
local last_suite_time=unknown
157+
local stall_count=0
157158

158159
while true; do
159160
sleep 300
160161

161162
if [ ! -f $last_suite_time_file ] || [ $(cat $last_suite_time_file) == $last_suite_time ]; then
162-
echo "Runner stalled"
163-
# tail -n 100 $raw_log_file
164-
# kill -9 $1
163+
stall_count=$((stall_count + 1))
164+
echo "Runner stalled (attempt $stall_count/2)"
165+
166+
if [ $stall_count -ge 2 ]; then
167+
echo "Runner stalled for 10 minutes, killing process..."
168+
tail -n 100 $raw_log_file
169+
kill -9 $1
170+
fi
165171
else
166172
last_suite_time=$(cat $last_suite_time_file)
173+
stall_count=0
167174
fi
168175
done &
169176
}

packages/devextreme/testing/runner/Views/Main/RunAll.cshtml

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
6262
var TEST_TIMEOUT_SECONDS = 45,
6363
TEST_TIMEOUT = TEST_TIMEOUT_SECONDS * 1000,
64+
WORKER_STUCK_TIMEOUT = 120 * 1000, // 2 minutes without any activity
6465
WORKER_NAME_PREFIX = "workerFrame",
6566
busyCount = 0,
6667
suitesDescription = {
@@ -69,6 +70,7 @@
6970
version: "@(Model.Version)"
7071
},
7172
suitesInProgress = [ ],
73+
workerLastActivity = [ ], // Track last activity time for each worker
7274
urls = @Html.Raw(Json.Serialize(Model.Suites)),
7375
originalUrls = urls.slice(0),
7476
noTryCatch = @Html.Raw(Json.Serialize(Model.NoTryCatch)),
@@ -92,7 +94,8 @@
9294
if(window.ActiveXObject !== undefined)
9395
return 1;
9496
95-
return 2;
97+
// Temporarily reduced to 1 worker to debug stalling issues
98+
return 4; // Was: return 2;
9699
};
97100
98101
WORKER_COUNT = calcWorkerFrameCount();
@@ -145,14 +148,23 @@
145148
var resultSaving = false;
146149
147150
var nextUrl = function(i) {
151+
console.log('nextUrl: worker=' + i + ', urls.length=' + urls.length + ', busyCount=' + busyCount +
152+
', currentSuite=' + (suitesInProgress[i] ? suitesInProgress[i].name : 'null'));
153+
148154
if(!urls.length) {
149155
// $.ajax(ROOT_URL + "run/something/FrameHasFinishedRunningASuite.js?frame=" + i);
150156
if(!resultSaving && !busyCount) {
157+
console.log('All tests completed, saving results...');
151158
resultSaving = true;
152159
rootSuite.time = roundTime((new Date() - rootStartTime) / 1000);
153160
rootSuite.pureTime = roundTime(rootSuite.pureTime);
154161
saveResults();
155162
window.onbeforeunload = $.noop;
163+
} else {
164+
console.log('Waiting for tests to complete: resultSaving=' + resultSaving + ', busyCount=' + busyCount);
165+
if(busyCount < 0) {
166+
console.error('ERROR: busyCount is negative! This should not happen.');
167+
}
156168
}
157169
return;
158170
}
@@ -191,8 +203,15 @@
191203
192204
startTime: new Date(),
193205
pureTime: 0,
206+
finalized: false, // Track if suite has been finalized
194207
195208
finalize: function(success) {
209+
if(this.finalized) {
210+
console.warn('Suite already finalized: ' + this.name + ', skipping duplicate finalize');
211+
return;
212+
}
213+
214+
this.finalized = true;
196215
this.time = roundTime((new Date() - this.startTime) / 1000);
197216
this.pureTime = roundTime(this.pureTime);
198217
delete this.startTime;
@@ -203,14 +222,18 @@
203222
rootSuite.results.push(this);
204223
suitesInProgress[i] = null;
205224
busyCount--;
225+
226+
console.log('Suite finalized: ' + this.name + ', busyCount=' + busyCount);
206227
207228
setTimeout(function() { nextUrl.call(that, _i); }, 0);
208229
}
209230
};
210231
211232
worker.name = WORKER_NAME_PREFIX + i;
212-
worker.location = urlInfo.Url + "?" + $.param(additionalParams);
213233
busyCount++;
234+
console.log('Loading test in worker ' + i + ': ' + urlInfo.FullName + ', busyCount=' + busyCount);
235+
worker.location = urlInfo.Url + "?" + $.param(additionalParams);
236+
workerLastActivity[i] = Date.now(); // Mark worker as active
214237
};
215238
216239
var workers = [ ];
@@ -250,6 +273,26 @@
250273
return workers[index];
251274
};
252275
276+
var checkStuckWorkers = function() {
277+
var now = Date.now();
278+
for(var i = 0; i < WORKER_COUNT; i++) {
279+
var lastActivity = workerLastActivity[i];
280+
var suite = suitesInProgress[i];
281+
282+
if(suite && !suite.finalized && lastActivity && (now - lastActivity) > WORKER_STUCK_TIMEOUT) {
283+
console.error('Worker ' + i + ' is stuck on test: ' + suite.name + ' (no activity for ' +
284+
Math.round((now - lastActivity) / 1000) + ' seconds), busyCount=' + busyCount);
285+
console.log('Force finalizing stuck worker ' + i);
286+
287+
// Force finalize the stuck suite (finalize checks for double-finalization)
288+
suite.finalize(false); // Mark as failed
289+
290+
// Reset worker state
291+
workerLastActivity[i] = now;
292+
}
293+
}
294+
};
295+
253296
var indexFromWorkerName = function(worker) {
254297
return Number(worker.name.substr(WORKER_NAME_PREFIX.length));
255298
};
@@ -276,6 +319,7 @@
276319
var i = indexFromWorkerName(worker),
277320
testSuite = suitesInProgress[i];
278321
322+
workerLastActivity[i] = Date.now(); // Mark worker activity
279323
notifyIsAlive();
280324
281325
var testCase = {
@@ -329,7 +373,10 @@
329373
);
330374
}
331375
332-
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyTestStarted"))), { name: getTestCaseName(testSuite, qunitData) });
376+
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyTestStarted"))), { name: getTestCaseName(testSuite, qunitData) })
377+
.fail(function(jqXHR, textStatus, errorThrown) {
378+
console.warn('NotifyTestStarted failed:', textStatus, errorThrown);
379+
});
333380
};
334381
335382
var indicateTestStatusInTitle = function(failed) {
@@ -384,6 +431,8 @@
384431
testCases,
385432
testCase;
386433
434+
workerLastActivity[i] = Date.now(); // Mark worker activity
435+
387436
// Always notify on test done (removed throttling to prevent stalling)
388437
notifyDeviceTestManager("QUnit.testCaseDone");
389438
notifyIsAlive();
@@ -416,16 +465,28 @@
416465
);
417466
}
418467
419-
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyTestCompleted"))), { name: getTestCaseName(testSuite, qunitData), passed: qunitData.passed === qunitData.total});
468+
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyTestCompleted"))), { name: getTestCaseName(testSuite, qunitData), passed: qunitData.passed === qunitData.total})
469+
.fail(function(jqXHR, textStatus, errorThrown) {
470+
console.warn('NotifyTestCompleted failed:', textStatus, errorThrown);
471+
});
420472
};
421473
422474
window.RUNNER_ON_DONE = function(worker, qunitData) {
423-
var suite = suitesInProgress[indexFromWorkerName(worker)],
475+
var i = indexFromWorkerName(worker),
476+
suite = suitesInProgress[i],
424477
passed = !qunitData.failed;
425478
479+
console.log('RUNNER_ON_DONE: worker=' + i + ', suite=' + (suite ? suite.name : 'null') + ', busyCount=' + busyCount);
480+
426481
if(suite) {
482+
// finalize() handles busyCount-- internally and prevents double-finalization
427483
suite.finalize(passed, qunitData.total);
428484
notifySuiteFinalized(suite.name, passed, qunitData.runtime);
485+
} else {
486+
console.warn('RUNNER_ON_DONE: suite is null for worker ' + i + ' - likely already finalized');
487+
// Suite is null - it was already finalized (by checkStuckWorkers or previous DONE)
488+
// busyCount was already decremented in finalize(), so we just call nextUrl
489+
setTimeout(function() { nextUrl.call(this, i); }, 0);
429490
}
430491
};
431492
@@ -459,12 +520,21 @@
459520
}
460521
461522
function notifySuiteFinalized(name, passed, runtime) {
462-
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifySuiteFinalized"))), { name: name, passed: passed, runtime: runtime });
523+
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifySuiteFinalized"))), { name: name, passed: passed, runtime: runtime })
524+
.fail(function(jqXHR, textStatus, errorThrown) {
525+
console.warn('NotifySuiteFinalized failed:', textStatus, errorThrown);
526+
});
463527
}
464528
function notifyIsAlive(){
465-
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyIsAlive"))));
529+
$.post(@Html.Raw(Json.Serialize(Url.Action("NotifyIsAlive"))))
530+
.fail(function(jqXHR, textStatus, errorThrown) {
531+
console.warn('NotifyIsAlive failed:', textStatus, errorThrown);
532+
});
466533
}
467534
535+
// Check for stuck workers every 30 seconds
536+
setInterval(checkStuckWorkers, 30000);
537+
468538
function roundTime(time) {
469539
return +(time.toFixed(3));
470540
}

0 commit comments

Comments
 (0)