Skip to content

Commit 5e11ce7

Browse files
jeet1995Copilot
andcommitted
Replace in-process test with forked-JVM deadlock test
The <clinit> deadlock can only be reproduced in a fresh JVM where classes haven't been loaded yet. Replace the reflection-based in-process test with one that forks child JVM processes using ProcessBuilder, each running 6 concurrent threads that trigger <clinit> of different Cosmos classes simultaneously. A 30-second timeout detects deadlocks. Runs 3 iterations for reliability. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 96a042f commit 5e11ce7

1 file changed

Lines changed: 93 additions & 81 deletions

File tree

sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ImplementationBridgeHelpersTest.java

Lines changed: 93 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import org.slf4j.LoggerFactory;
1212
import org.testng.annotations.Test;
1313

14+
import java.io.BufferedReader;
15+
import java.io.InputStreamReader;
1416
import java.lang.reflect.Field;
1517
import java.util.ArrayList;
1618
import java.util.List;
@@ -19,7 +21,6 @@
1921
import java.util.concurrent.Executors;
2022
import java.util.concurrent.Future;
2123
import java.util.concurrent.TimeUnit;
22-
import java.util.concurrent.TimeoutException;
2324
import java.util.concurrent.atomic.AtomicBoolean;
2425
import java.util.concurrent.atomic.AtomicReference;
2526

@@ -110,103 +111,114 @@ public void accessorInitialization() {
110111
}
111112
}
112113

114+
/**
115+
* Regression test for <a href="https://github.com/Azure/azure-sdk-for-java/issues/48622">#48622</a>
116+
* and <a href="https://github.com/Azure/azure-sdk-for-java/issues/48585">#48585</a>.
117+
* <p>
118+
* Forks a fresh JVM that concurrently triggers {@code <clinit>} of different Cosmos classes
119+
* from 6 threads. In a fresh JVM, {@code <clinit>} runs for the first time — the only way
120+
* to exercise the real deadlock scenario. A 30-second timeout detects the hang.
121+
*/
113122
@Test(groups = { "unit" })
114123
public void concurrentAccessorInitializationShouldNotDeadlock() throws Exception {
115-
// Regression test for https://github.com/Azure/azure-sdk-for-java/issues/48622
116-
// and https://github.com/Azure/azure-sdk-for-java/issues/48585
117-
//
118-
// Verifies that concurrently calling different getXxxAccessor() methods from
119-
// multiple threads completes without deadlock and returns non-null accessors.
120-
//
121-
// Limitation: Since JVM <clinit> runs exactly once per class per JVM lifetime,
122-
// this in-process test validates accessor re-registration after a reflection
123-
// reset — not the actual first-load <clinit> deadlock scenario. The real
124-
// deadlock validation was performed via a 50-run fresh-JVM stress test
125-
// documented in the PR description.
126-
127-
// Reset all accessors to force re-initialization
128-
Class<?>[] declaredClasses = ImplementationBridgeHelpers.class.getDeclaredClasses();
129-
for (Class<?> declaredClass : declaredClasses) {
130-
if (declaredClass.getSimpleName().endsWith("Helper")) {
131-
for (Field field : declaredClass.getDeclaredFields()) {
132-
if (field.getName().contains("accessor")) {
133-
field.setAccessible(true);
134-
AtomicReference<?> value = (AtomicReference<?>) FieldUtils.readStaticField(field);
135-
value.set(null);
136-
}
137-
if (field.getName().contains("ClassLoaded")) {
138-
field.setAccessible(true);
139-
AtomicBoolean value = (AtomicBoolean) FieldUtils.readStaticField(field);
140-
value.set(false);
141-
}
124+
125+
String javaHome = System.getProperty("java.home");
126+
String javaBin = javaHome + java.io.File.separator + "bin" + java.io.File.separator + "java";
127+
String classpath = System.getProperty("java.class.path");
128+
129+
List<String> command = new ArrayList<>();
130+
command.add(javaBin);
131+
command.add("--add-opens");
132+
command.add("java.base/java.lang=ALL-UNNAMED");
133+
command.add("-cp");
134+
command.add(classpath);
135+
command.add(ConcurrentClinitChildProcess.class.getName());
136+
137+
int timeoutSeconds = 30;
138+
int runs = 3;
139+
140+
for (int run = 1; run <= runs; run++) {
141+
ProcessBuilder pb = new ProcessBuilder(command);
142+
pb.redirectErrorStream(true);
143+
Process process = pb.start();
144+
145+
StringBuilder output = new StringBuilder();
146+
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
147+
String line;
148+
while ((line = reader.readLine()) != null) {
149+
output.append(line).append(System.lineSeparator());
150+
logger.info("[child-jvm-run-{}] {}", run, line);
142151
}
143152
}
153+
154+
boolean completed = process.waitFor(timeoutSeconds, TimeUnit.SECONDS);
155+
156+
if (!completed) {
157+
process.destroyForcibly();
158+
fail("Run " + run + ": Child JVM did not complete within " + timeoutSeconds
159+
+ " seconds — <clinit> deadlock detected");
160+
}
161+
162+
int exitCode = process.exitValue();
163+
assertThat(exitCode)
164+
.as("Run " + run + ": Child JVM exited with non-zero code. Output:\n" + output)
165+
.isEqualTo(0);
144166
}
167+
}
145168

146-
try {
147-
final int threadCount = 6;
148-
final int timeoutSeconds = 30;
149-
final CyclicBarrier barrier = new CyclicBarrier(threadCount);
169+
/**
170+
* Entry point for the forked child JVM. Concurrently triggers {@code <clinit>} of 6 different
171+
* Cosmos classes that are involved in the circular initialization chain reported in the issues.
172+
* Exits 0 on success, 1 on deadlock (timeout), 2 on unexpected error.
173+
*/
174+
public static final class ConcurrentClinitChildProcess {
175+
public static void main(String[] args) {
176+
int timeoutSeconds = 20;
177+
int threadCount = 6;
178+
CyclicBarrier barrier = new CyclicBarrier(threadCount);
150179
ExecutorService executor = Executors.newFixedThreadPool(threadCount);
151180

181+
String[] classesToLoad = {
182+
"com.azure.cosmos.CosmosAsyncClient",
183+
"com.azure.cosmos.models.SqlParameter",
184+
"com.azure.cosmos.models.FeedResponse",
185+
"com.azure.cosmos.models.CosmosItemRequestOptions",
186+
"com.azure.cosmos.CosmosAsyncContainer",
187+
"com.azure.cosmos.util.CosmosPagedFluxDefaultImpl"
188+
};
189+
152190
List<Future<?>> futures = new ArrayList<>();
191+
for (int i = 0; i < classesToLoad.length; i++) {
192+
final String className = classesToLoad[i];
193+
final int idx = i;
194+
futures.add(executor.submit(() -> {
195+
try {
196+
barrier.await();
197+
System.out.println("[Thread-" + idx + "] Loading " + className);
198+
Class.forName(className);
199+
System.out.println("[Thread-" + idx + "] Done.");
200+
} catch (Exception e) {
201+
throw new RuntimeException("Failed to load " + className, e);
202+
}
203+
}));
204+
}
153205

154-
// Each thread triggers a different accessor getter concurrently
155-
futures.add(executor.submit(() -> {
156-
awaitBarrier(barrier);
157-
assertThat(ImplementationBridgeHelpers.CosmosAsyncClientHelper.getCosmosAsyncClientAccessor()).isNotNull();
158-
}));
159-
futures.add(executor.submit(() -> {
160-
awaitBarrier(barrier);
161-
assertThat(ImplementationBridgeHelpers.CosmosItemRequestOptionsHelper.getCosmosItemRequestOptionsAccessor()).isNotNull();
162-
}));
163-
futures.add(executor.submit(() -> {
164-
awaitBarrier(barrier);
165-
assertThat(ImplementationBridgeHelpers.FeedResponseHelper.getFeedResponseAccessor()).isNotNull();
166-
}));
167-
futures.add(executor.submit(() -> {
168-
awaitBarrier(barrier);
169-
assertThat(ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor()).isNotNull();
170-
}));
171-
futures.add(executor.submit(() -> {
172-
awaitBarrier(barrier);
173-
assertThat(ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor()).isNotNull();
174-
}));
175-
futures.add(executor.submit(() -> {
176-
awaitBarrier(barrier);
177-
assertThat(ImplementationBridgeHelpers.CosmosItemSerializerHelper.getCosmosItemSerializerAccessor()).isNotNull();
178-
}));
179-
180-
boolean deadlockDetected = false;
206+
boolean deadlock = false;
181207
for (int i = 0; i < futures.size(); i++) {
182208
try {
183209
futures.get(i).get(timeoutSeconds, TimeUnit.SECONDS);
184-
} catch (TimeoutException e) {
185-
deadlockDetected = true;
186-
logger.error("Thread {} did not complete within {} seconds - possible deadlock", i, timeoutSeconds);
187-
} catch (java.util.concurrent.ExecutionException e) {
188-
logger.error("Thread {} threw exception: {}", i, e.getCause().getMessage());
189-
fail("Unexpected exception in thread " + i + ": " + e.getCause());
210+
} catch (java.util.concurrent.TimeoutException e) {
211+
System.err.println("DEADLOCK: Thread-" + i + " timed out after " + timeoutSeconds + "s");
212+
deadlock = true;
213+
} catch (Exception e) {
214+
Throwable root = e;
215+
while (root.getCause() != null) root = root.getCause();
216+
System.err.println("Thread-" + i + " error: " + root);
190217
}
191218
}
192219

193220
executor.shutdownNow();
194-
assertThat(deadlockDetected)
195-
.as("Concurrent accessor initialization should complete without deadlock")
196-
.isFalse();
197-
} finally {
198-
// Restore all accessors so subsequent tests in the same JVM are not affected
199-
BridgeInternal.initializeAllAccessors();
200-
ModelBridgeInternal.initializeAllAccessors();
201-
UtilBridgeInternal.initializeAllAccessors();
202-
}
203-
}
204-
205-
private static void awaitBarrier(CyclicBarrier barrier) {
206-
try {
207-
barrier.await();
208-
} catch (Exception e) {
209-
throw new RuntimeException(e);
221+
System.exit(deadlock ? 1 : 0);
210222
}
211223
}
212224
}

0 commit comments

Comments
 (0)