Skip to content

Commit 0bb24c9

Browse files
committed
loadtest-controller: add more details to the report for retries
1 parent ed765fa commit 0bb24c9

13 files changed

Lines changed: 1180 additions & 220 deletions

File tree

e2e-tests/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ All smoke tests should:
117117
- "Sessions Created"
118118
- "Total Participants"
119119
- "User Connections" (mandatory)
120-
- Table columns: User, Session, Join Date, Disconnect Date, Retries
120+
- Table columns: User, Session, Join Date, Retries, Retry Details
121121
- Two user rows (User1 and User2) present
122122

123123
If validation fails, the result files are kept in the `results/` directory for debugging.

e2e-tests/scripts/validate-default.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,16 @@ if [ -f "$RESULTS_DIR/results.txt" ]; then
9696

9797
if grep -q "User Connections" "$HTML_FILE"; then
9898
echo "✓ HTML contains 'User Connections'"
99-
if grep -q "Join Date" "$HTML_FILE" && grep -q "Disconnect Date" "$HTML_FILE" && grep -q "Retries" "$HTML_FILE"; then
100-
echo "✓ HTML contains new table columns"
99+
if grep -q "Join Date" "$HTML_FILE" && grep -q "Retries" "$HTML_FILE" && grep -q "Retry Details" "$HTML_FILE"; then
100+
echo "✓ HTML contains expected table columns"
101101
if grep -q "User1" "$HTML_FILE" && grep -q "User2" "$HTML_FILE"; then
102102
echo "✓ HTML contains two user rows"
103103
else
104104
echo "✗ HTML missing user rows (expected User1 and User2)"
105105
HTML_VALIDATION_PASSED=false
106106
fi
107107
else
108-
echo "✗ HTML missing new table columns"
108+
echo "✗ HTML missing expected table columns (Join Date, Retries, Retry Details)"
109109
HTML_VALIDATION_PASSED=false
110110
fi
111111
else

loadtest-controller/src/main/java/io/openvidu/loadtest/models/testcase/ResultReport.java

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
import java.util.Map;
88
import java.util.TreeMap;
99
import java.util.concurrent.TimeUnit;
10+
import java.util.LinkedHashMap;
1011

1112
import org.slf4j.Logger;
1213
import org.slf4j.LoggerFactory;
1314

15+
import io.openvidu.loadtest.services.BrowserEmulatorClient.RetryAttempt;
16+
1417
public class ResultReport {
1518

1619
private static Logger log = LoggerFactory.getLogger(ResultReport.class);
@@ -42,7 +45,6 @@ public class ResultReport {
4245
private double retrySuccessRate = 0.0;
4346
private double avgRetriesPerParticipant = 0.0;
4447
private int maxRetriesInSingleParticipant = 0;
45-
private Map<String, Integer> errorCounts = new TreeMap<>();
4648
private Map<String, Double> workerCpuAvg = new TreeMap<>();
4749
private Map<String, Double> workerCpuMax = new TreeMap<>();
4850
private Map<String, Integer> workerStreams = new TreeMap<>();
@@ -51,6 +53,7 @@ public class ResultReport {
5153
private Map<String, Calendar> userSuccessTimestamps = new TreeMap<>(); // key: "session-user"
5254
private Map<String, Integer> userRetryCounts = new TreeMap<>(); // key: "session-user"
5355
private Map<String, Calendar> userDisconnectTimestamps = new TreeMap<>(); // key: "session-user"
56+
private Map<String, List<RetryAttempt>> userRetryAttempts = new LinkedHashMap<>(); // key: "session-user"
5457

5558
public ResultReport() {
5659
}
@@ -63,9 +66,9 @@ public ResultReport build() {
6366
this.kibanaUrl, this.s3BucketName, this.timePerWorker, this.timePerRecordingWorker,
6467
this.userStartTimes, this.participantResponses, this.totalRetries, this.successfulRetries,
6568
this.retrySuccessRate, this.avgRetriesPerParticipant, this.maxRetriesInSingleParticipant,
66-
this.errorCounts, this.workerCpuAvg, this.workerCpuMax, this.workerStreams, this.workerParticipants,
69+
this.workerCpuAvg, this.workerCpuMax, this.workerStreams, this.workerParticipants,
6770
this.userStartDelaysPercentiles, this.userDisconnectTimestamps,
68-
this.userSuccessTimestamps, this.userRetryCounts);
71+
this.userSuccessTimestamps, this.userRetryCounts, this.userRetryAttempts);
6972
}
7073

7174
public ResultReport setManualParticipantAllocation(boolean isManualParticipantAllocation) {
@@ -188,11 +191,6 @@ public ResultReport setMaxRetriesInSingleParticipant(int max) {
188191
return this;
189192
}
190193

191-
public ResultReport setErrorCounts(Map<String, Integer> errorCounts) {
192-
this.errorCounts = errorCounts;
193-
return this;
194-
}
195-
196194
public ResultReport setWorkerCpuStats(Map<String, Double> avg, Map<String, Double> max) {
197195
this.workerCpuAvg = avg;
198196
this.workerCpuMax = max;
@@ -230,14 +228,12 @@ public ResultReport setUserDisconnectTimestamps(Map<String, Calendar> userDiscon
230228
return this;
231229
}
232230

231+
public ResultReport setUserRetryAttempts(Map<String, List<RetryAttempt>> userRetryAttempts) {
232+
this.userRetryAttempts = userRetryAttempts;
233+
return this;
234+
}
235+
233236
private void computeAggregates() {
234-
// Compute error counts from participantResponses
235-
errorCounts.clear();
236-
for (CreateParticipantResponse resp : participantResponses) {
237-
if (!resp.isResponseOk() && resp.getStopReason() != null) {
238-
errorCounts.merge(resp.getStopReason(), 1, Integer::sum);
239-
}
240-
}
241237
// Compute per-worker and global CPU stats
242238
Map<String, List<Double>> cpuPerWorker = new TreeMap<>();
243239
double cpuSum = 0;
@@ -299,11 +295,6 @@ private double getPercentile(List<Long> sorted, double percentile) {
299295
return sorted.get(index);
300296
}
301297

302-
// Getters for new fields
303-
public Map<String, Integer> getErrorCounts() {
304-
return errorCounts;
305-
}
306-
307298
public Map<String, Double> getWorkerCpuAvg() {
308299
return workerCpuAvg;
309300
}
@@ -328,6 +319,10 @@ public Map<String, Calendar> getUserDisconnectTimestamps() {
328319
return userDisconnectTimestamps;
329320
}
330321

322+
public Map<String, List<RetryAttempt>> getUserRetryAttempts() {
323+
return userRetryAttempts;
324+
}
325+
331326
public int getTotalRetries() {
332327
return totalRetries;
333328
}
@@ -360,10 +355,11 @@ private ResultReport(int totalParticipants, int numSessionsCompleted, int numSes
360355
List<Long> timePerRecordingWorker, Map<Calendar, List<String>> userStartTimes,
361356
List<CreateParticipantResponse> participantResponses, int totalRetries, int successfulRetries,
362357
double retrySuccessRate, double avgRetriesPerParticipant, int maxRetriesInSingleParticipant,
363-
Map<String, Integer> errorCounts, Map<String, Double> workerCpuAvg, Map<String, Double> workerCpuMax,
358+
Map<String, Double> workerCpuAvg, Map<String, Double> workerCpuMax,
364359
Map<String, Integer> workerStreams, Map<String, Integer> workerParticipants,
365360
double[] userStartDelaysPercentiles, Map<String, Calendar> userDisconnectTimestamps,
366-
Map<String, Calendar> userSuccessTimestamps, Map<String, Integer> userRetryCounts) {
361+
Map<String, Calendar> userSuccessTimestamps, Map<String, Integer> userRetryCounts,
362+
Map<String, List<RetryAttempt>> userRetryAttempts) {
367363
this.totalParticipants = totalParticipants;
368364
this.numSessionsCompleted = numSessionsCompleted;
369365
this.numSessionsCreated = numSessionsCreated;
@@ -389,7 +385,6 @@ private ResultReport(int totalParticipants, int numSessionsCompleted, int numSes
389385
this.retrySuccessRate = retrySuccessRate;
390386
this.avgRetriesPerParticipant = avgRetriesPerParticipant;
391387
this.maxRetriesInSingleParticipant = maxRetriesInSingleParticipant;
392-
this.errorCounts = errorCounts;
393388
this.workerCpuAvg = workerCpuAvg;
394389
this.workerCpuMax = workerCpuMax;
395390
this.workerStreams = workerStreams;
@@ -398,6 +393,7 @@ private ResultReport(int totalParticipants, int numSessionsCompleted, int numSes
398393
this.userDisconnectTimestamps = userDisconnectTimestamps;
399394
this.userSuccessTimestamps = userSuccessTimestamps;
400395
this.userRetryCounts = userRetryCounts;
396+
this.userRetryAttempts = userRetryAttempts;
401397
}
402398

403399
private String getDuration() {

loadtest-controller/src/main/java/io/openvidu/loadtest/services/BrowserEmulatorClient.java

Lines changed: 79 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.net.ConnectException;
66
import java.util.ArrayList;
77
import java.util.HashMap;
8+
import java.util.LinkedHashMap;
89
import java.util.List;
910
import java.util.Calendar;
1011
import java.util.Map;
@@ -66,6 +67,7 @@ public class BrowserEmulatorClient {
6667
private WorkerUrlResolver workerUrlResolver;
6768

6869
private ConcurrentHashMap<String, ConcurrentHashMap<String, AtomicInteger>> clientFailures = new ConcurrentHashMap<>();
70+
private ConcurrentHashMap<String, ConcurrentHashMap<String, List<RetryAttempt>>> clientRetryAttempts = new ConcurrentHashMap<>();
6971
private ConcurrentHashMap<String, ConcurrentHashMap<String, Role>> clientRoles = new ConcurrentHashMap<>();
7072
private ConcurrentHashMap<String, TestCase> participantTestCases = new ConcurrentHashMap<>();
7173
private ConcurrentHashMap<String, AtomicBoolean> participantConnecting = new ConcurrentHashMap<>();
@@ -81,6 +83,33 @@ public class BrowserEmulatorClient {
8183

8284
private String httpProtocolPrefix;
8385

86+
public static class RetryAttempt {
87+
private final int attemptNumber;
88+
private final Calendar errorTimestamp;
89+
private Calendar reconnectTimestamp;
90+
91+
public RetryAttempt(int attemptNumber, Calendar errorTimestamp) {
92+
this.attemptNumber = attemptNumber;
93+
this.errorTimestamp = errorTimestamp;
94+
}
95+
96+
public void setReconnectTimestamp(Calendar reconnectTimestamp) {
97+
this.reconnectTimestamp = reconnectTimestamp;
98+
}
99+
100+
public int getAttemptNumber() {
101+
return attemptNumber;
102+
}
103+
104+
public Calendar getErrorTimestamp() {
105+
return errorTimestamp;
106+
}
107+
108+
public Calendar getReconnectTimestamp() {
109+
return reconnectTimestamp;
110+
}
111+
}
112+
84113
public BrowserEmulatorClient(LoadTestConfig loadTestConfig, CustomHttpClient httpClient, JsonUtils jsonUtils,
85114
Sleeper sleeper, WorkerUrlResolver workerUrlResolver) {
86115
this.loadTestConfig = loadTestConfig;
@@ -94,6 +123,7 @@ public BrowserEmulatorClient(LoadTestConfig loadTestConfig, CustomHttpClient htt
94123
public void clean() {
95124
this.isClean.set(true);
96125
this.clientFailures.clear();
126+
this.clientRetryAttempts.clear();
97127
this.clientRoles.clear();
98128
this.participantTestCases.clear();
99129
this.participantConnecting.clear();
@@ -227,8 +257,7 @@ public void addClientFailure(String workerUrl, String participant, String sessio
227257
log.debug("Stop reconnecting participant {} in session {}", participant, session);
228258
this.lastErrorReconnectingResponse = new CreateParticipantResponse()
229259
.setResponseOk(false)
230-
.setStopReason("Participant " + participant + "-" + session + " failed after "
231-
+ newFailures + " retries");
260+
.setStopReason(this.buildParticipantFailureReason(participant, session, newFailures, true));
232261
}
233262
}
234263
}
@@ -266,23 +295,27 @@ private void afterDisconnect(String workerUrl, String participant, String sessio
266295
try {
267296
ConcurrentHashMap<String, Role> workerRoles = this.clientRoles.get(workerUrl);
268297
if (workerRoles == null) {
269-
log.debug("Worker roles is null for {} in session {} in {}. Waiting ...", participant, session, workerUrl);
298+
log.debug("Worker roles is null for {} in session {} in {}. Waiting ...", participant, session,
299+
workerUrl);
270300
sleeper.sleep(WAIT_S, null);
271301
this.afterDisconnect(workerUrl, participant, session);
272302
return;
273303
}
274304
Role role = workerRoles.get(user);
275305
if (role == null) {
276-
log.warn("Role is null for {} in session {} in {}. This worker may not have this participant.", participant, session, workerUrl);
306+
log.warn("Role is null for {} in session {} in {}. This worker may not have this participant.",
307+
participant, session, workerUrl);
277308
return;
278309
}
279310
int userNumber = Integer.parseInt(participant.replace(loadTestConfig.getUserNamePrefix(), ""));
280311
int sessionNumber = Integer.parseInt(session.replace(loadTestConfig.getSessionNamePrefix(), ""));
281312
CreateParticipantResponse response = null;
282313
if (role.equals(Role.PUBLISHER)) {
283-
response = this.createPublisher(workerUrl, userNumber, sessionNumber, this.participantTestCases.get(user));
314+
response = this.createPublisher(workerUrl, userNumber, sessionNumber,
315+
this.participantTestCases.get(user));
284316
} else {
285-
response = this.createSubscriber(workerUrl, userNumber, sessionNumber, this.participantTestCases.get(user));
317+
response = this.createSubscriber(workerUrl, userNumber, sessionNumber,
318+
this.participantTestCases.get(user));
286319
}
287320
if (response.isResponseOk()) {
288321
this.participantReconnecting.remove(user);
@@ -295,7 +328,7 @@ private void afterDisconnect(String workerUrl, String participant, String sessio
295328
}
296329
}
297330

298-
private HttpResponse<String> disconnectUser(String workerUrl, String participant, String session) {
331+
private HttpResponse<String> disconnectUser(String workerUrl, String participant, String session) {
299332
try {
300333
log.info("Deleting participant {} from worker {}", participant, workerUrl);
301334
Map<String, String> headers = new HashMap<>();
@@ -464,22 +497,31 @@ private CreateParticipantResponse createParticipant(String workerUrl, int userNu
464497
key -> new ConcurrentHashMap<>());
465498
AtomicInteger userFailures = failuresMap.computeIfAbsent(user, key -> new AtomicInteger(0));
466499
int failures = userFailures.incrementAndGet();
500+
501+
ConcurrentHashMap<String, List<RetryAttempt>> retryAttemptsMap = this.clientRetryAttempts
502+
.computeIfAbsent(workerUrl, key -> new ConcurrentHashMap<>());
503+
List<RetryAttempt> userAttempts = retryAttemptsMap.computeIfAbsent(user, key -> new ArrayList<>());
504+
Calendar errorTime = Calendar.getInstance();
505+
userAttempts.add(new RetryAttempt(failures, errorTime));
467506
log.error("Participant {} in session {} failed {} times", userId, sessionId, failures);
468507
sleeper.sleep(WAIT_S, null);
469508
if (!loadTestConfig.isRetryMode() || isResponseLimitReached(failures) || endOfTest.get()) {
470-
String reason = "Participant " + userId + "-" + sessionId + " failed after "
471-
+ failures + " retries";
472-
// Set lastErrorReconnectingResponse to trigger test termination
509+
boolean isReconnecting = this.participantReconnecting.contains(user);
510+
String reason = this.buildParticipantFailureReason(userId, sessionId, failures, isReconnecting);
473511
this.lastErrorReconnectingResponse = new CreateParticipantResponse()
474512
.setResponseOk(false)
475513
.setStopReason(reason);
476-
// Also set stopReason on the returned response to avoid race condition
477-
// where getLastResponse() may return this object before lastErrorReconnectingResponse is visible
478514
return cpr.setResponseOk(false).setStopReason(reason);
479515
}
480516
log.warn("Retrying");
481517
return this.createParticipant(workerUrl, userNumber, sessionNumber, testCase, role);
482518
} else {
519+
ConcurrentHashMap<String, List<RetryAttempt>> retryAttemptsMap = this.clientRetryAttempts
520+
.computeIfAbsent(workerUrl, key -> new ConcurrentHashMap<>());
521+
List<RetryAttempt> userAttempts = retryAttemptsMap.computeIfAbsent(user, key -> new ArrayList<>());
522+
if (!userAttempts.isEmpty()) {
523+
userAttempts.get(userAttempts.size() - 1).setReconnectTimestamp(Calendar.getInstance());
524+
}
483525
this.participantConnecting.get(user).set(false);
484526
this.saveParticipantData(workerUrl, testCase.isTeaching() ? Role.PUBLISHER : role);
485527
}
@@ -580,6 +622,15 @@ private boolean isResponseLimitReached(int failures) {
580622
return failures == loadTestConfig.getRetryTimes();
581623
}
582624

625+
private String buildParticipantFailureReason(String participant, String session, int attempts,
626+
boolean reconnecting) {
627+
if (reconnecting) {
628+
return "Participant " + participant + "-" + session + " failed to reconnect after " + attempts
629+
+ " attempts";
630+
}
631+
return "Participant " + participant + "-" + session + " failed after " + attempts + " retries";
632+
}
633+
583634
private CreateUserRequestBody generateRequestBody(int userNumber, String sessionNumber, Role role,
584635
TestCase testCase) {
585636
boolean video = (testCase.isTeaching() && role.equals(Role.PUBLISHER)) || !testCase.isTeaching();
@@ -721,6 +772,22 @@ public Map<String, Integer> getPerUserRetryCounts() {
721772
return counts;
722773
}
723774

775+
public Map<String, List<RetryAttempt>> getPerUserRetryAttempts() {
776+
Map<String, List<RetryAttempt>> result = new LinkedHashMap<>();
777+
for (ConcurrentHashMap<String, List<RetryAttempt>> userAttempts : clientRetryAttempts.values()) {
778+
for (Map.Entry<String, List<RetryAttempt>> entry : userAttempts.entrySet()) {
779+
String userSession = entry.getKey();
780+
List<RetryAttempt> attempts = entry.getValue();
781+
result.merge(userSession, attempts, (existing, incoming) -> {
782+
existing.addAll(incoming);
783+
existing.sort((a, b) -> Integer.compare(a.getAttemptNumber(), b.getAttemptNumber()));
784+
return existing;
785+
});
786+
}
787+
}
788+
return result;
789+
}
790+
724791
public void shutdownWorkers(List<String> workerUrls, boolean waitForResponse) {
725792
if (workerUrls == null || workerUrls.isEmpty()) {
726793
return;

loadtest-controller/src/main/java/io/openvidu/loadtest/services/core/LoadTestService.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,11 @@ private void saveResultReport(TestCase testCase, String participantsBySession, C
359359
.setTimePerWorker(shutdownOrchestrator.getWorkerTimes())
360360
.setTimePerRecordingWorker(shutdownOrchestrator.getRecordingWorkerTimes())
361361
.setUserStartTimes(participantOrchestrator.getUserStartTimes())
362-
.setUserSuccessTimestamps(userSuccessTimestamps)
363-
.setParticipantResponses(participantOrchestrator.getAllParticipantResponses())
364-
.setUserRetryCounts(browserEmulatorClient.getPerUserRetryCounts())
365-
.build();
362+
.setUserSuccessTimestamps(userSuccessTimestamps)
363+
.setParticipantResponses(participantOrchestrator.getAllParticipantResponses())
364+
.setUserRetryCounts(browserEmulatorClient.getPerUserRetryCounts())
365+
.setUserRetryAttempts(browserEmulatorClient.getPerUserRetryAttempts())
366+
.build();
366367

367368
io.exportResults(rr);
368369

0 commit comments

Comments
 (0)