|
11 | 11 | import org.slf4j.LoggerFactory; |
12 | 12 | import org.testng.annotations.Test; |
13 | 13 |
|
| 14 | +import java.io.BufferedReader; |
| 15 | +import java.io.InputStreamReader; |
14 | 16 | import java.lang.reflect.Field; |
15 | 17 | import java.util.ArrayList; |
16 | 18 | import java.util.List; |
|
19 | 21 | import java.util.concurrent.Executors; |
20 | 22 | import java.util.concurrent.Future; |
21 | 23 | import java.util.concurrent.TimeUnit; |
22 | | -import java.util.concurrent.TimeoutException; |
23 | 24 | import java.util.concurrent.atomic.AtomicBoolean; |
24 | 25 | import java.util.concurrent.atomic.AtomicReference; |
25 | 26 |
|
@@ -110,103 +111,114 @@ public void accessorInitialization() { |
110 | 111 | } |
111 | 112 | } |
112 | 113 |
|
| 114 | + /** |
| 115 | + * Regression test for <a href="https://github.com/Azure/azure-sdk-for-java/issues/48622">#48622</a> |
| 116 | + * and <a href="https://github.com/Azure/azure-sdk-for-java/issues/48585">#48585</a>. |
| 117 | + * <p> |
| 118 | + * Forks a fresh JVM that concurrently triggers {@code <clinit>} of different Cosmos classes |
| 119 | + * from 6 threads. In a fresh JVM, {@code <clinit>} runs for the first time — the only way |
| 120 | + * to exercise the real deadlock scenario. A 30-second timeout detects the hang. |
| 121 | + */ |
113 | 122 | @Test(groups = { "unit" }) |
114 | 123 | public void concurrentAccessorInitializationShouldNotDeadlock() throws Exception { |
115 | | - // Regression test for https://github.com/Azure/azure-sdk-for-java/issues/48622 |
116 | | - // and https://github.com/Azure/azure-sdk-for-java/issues/48585 |
117 | | - // |
118 | | - // Verifies that concurrently calling different getXxxAccessor() methods from |
119 | | - // multiple threads completes without deadlock and returns non-null accessors. |
120 | | - // |
121 | | - // Limitation: Since JVM <clinit> runs exactly once per class per JVM lifetime, |
122 | | - // this in-process test validates accessor re-registration after a reflection |
123 | | - // reset — not the actual first-load <clinit> deadlock scenario. The real |
124 | | - // deadlock validation was performed via a 50-run fresh-JVM stress test |
125 | | - // documented in the PR description. |
126 | | - |
127 | | - // Reset all accessors to force re-initialization |
128 | | - Class<?>[] declaredClasses = ImplementationBridgeHelpers.class.getDeclaredClasses(); |
129 | | - for (Class<?> declaredClass : declaredClasses) { |
130 | | - if (declaredClass.getSimpleName().endsWith("Helper")) { |
131 | | - for (Field field : declaredClass.getDeclaredFields()) { |
132 | | - if (field.getName().contains("accessor")) { |
133 | | - field.setAccessible(true); |
134 | | - AtomicReference<?> value = (AtomicReference<?>) FieldUtils.readStaticField(field); |
135 | | - value.set(null); |
136 | | - } |
137 | | - if (field.getName().contains("ClassLoaded")) { |
138 | | - field.setAccessible(true); |
139 | | - AtomicBoolean value = (AtomicBoolean) FieldUtils.readStaticField(field); |
140 | | - value.set(false); |
141 | | - } |
| 124 | + |
| 125 | + String javaHome = System.getProperty("java.home"); |
| 126 | + String javaBin = javaHome + java.io.File.separator + "bin" + java.io.File.separator + "java"; |
| 127 | + String classpath = System.getProperty("java.class.path"); |
| 128 | + |
| 129 | + List<String> command = new ArrayList<>(); |
| 130 | + command.add(javaBin); |
| 131 | + command.add("--add-opens"); |
| 132 | + command.add("java.base/java.lang=ALL-UNNAMED"); |
| 133 | + command.add("-cp"); |
| 134 | + command.add(classpath); |
| 135 | + command.add(ConcurrentClinitChildProcess.class.getName()); |
| 136 | + |
| 137 | + int timeoutSeconds = 30; |
| 138 | + int runs = 3; |
| 139 | + |
| 140 | + for (int run = 1; run <= runs; run++) { |
| 141 | + ProcessBuilder pb = new ProcessBuilder(command); |
| 142 | + pb.redirectErrorStream(true); |
| 143 | + Process process = pb.start(); |
| 144 | + |
| 145 | + StringBuilder output = new StringBuilder(); |
| 146 | + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { |
| 147 | + String line; |
| 148 | + while ((line = reader.readLine()) != null) { |
| 149 | + output.append(line).append(System.lineSeparator()); |
| 150 | + logger.info("[child-jvm-run-{}] {}", run, line); |
142 | 151 | } |
143 | 152 | } |
| 153 | + |
| 154 | + boolean completed = process.waitFor(timeoutSeconds, TimeUnit.SECONDS); |
| 155 | + |
| 156 | + if (!completed) { |
| 157 | + process.destroyForcibly(); |
| 158 | + fail("Run " + run + ": Child JVM did not complete within " + timeoutSeconds |
| 159 | + + " seconds — <clinit> deadlock detected"); |
| 160 | + } |
| 161 | + |
| 162 | + int exitCode = process.exitValue(); |
| 163 | + assertThat(exitCode) |
| 164 | + .as("Run " + run + ": Child JVM exited with non-zero code. Output:\n" + output) |
| 165 | + .isEqualTo(0); |
144 | 166 | } |
| 167 | + } |
145 | 168 |
|
146 | | - try { |
147 | | - final int threadCount = 6; |
148 | | - final int timeoutSeconds = 30; |
149 | | - final CyclicBarrier barrier = new CyclicBarrier(threadCount); |
| 169 | + /** |
| 170 | + * Entry point for the forked child JVM. Concurrently triggers {@code <clinit>} of 6 different |
| 171 | + * Cosmos classes that are involved in the circular initialization chain reported in the issues. |
| 172 | + * Exits 0 on success, 1 on deadlock (timeout), 2 on unexpected error. |
| 173 | + */ |
| 174 | + public static final class ConcurrentClinitChildProcess { |
| 175 | + public static void main(String[] args) { |
| 176 | + int timeoutSeconds = 20; |
| 177 | + int threadCount = 6; |
| 178 | + CyclicBarrier barrier = new CyclicBarrier(threadCount); |
150 | 179 | ExecutorService executor = Executors.newFixedThreadPool(threadCount); |
151 | 180 |
|
| 181 | + String[] classesToLoad = { |
| 182 | + "com.azure.cosmos.CosmosAsyncClient", |
| 183 | + "com.azure.cosmos.models.SqlParameter", |
| 184 | + "com.azure.cosmos.models.FeedResponse", |
| 185 | + "com.azure.cosmos.models.CosmosItemRequestOptions", |
| 186 | + "com.azure.cosmos.CosmosAsyncContainer", |
| 187 | + "com.azure.cosmos.util.CosmosPagedFluxDefaultImpl" |
| 188 | + }; |
| 189 | + |
152 | 190 | List<Future<?>> futures = new ArrayList<>(); |
| 191 | + for (int i = 0; i < classesToLoad.length; i++) { |
| 192 | + final String className = classesToLoad[i]; |
| 193 | + final int idx = i; |
| 194 | + futures.add(executor.submit(() -> { |
| 195 | + try { |
| 196 | + barrier.await(); |
| 197 | + System.out.println("[Thread-" + idx + "] Loading " + className); |
| 198 | + Class.forName(className); |
| 199 | + System.out.println("[Thread-" + idx + "] Done."); |
| 200 | + } catch (Exception e) { |
| 201 | + throw new RuntimeException("Failed to load " + className, e); |
| 202 | + } |
| 203 | + })); |
| 204 | + } |
153 | 205 |
|
154 | | - // Each thread triggers a different accessor getter concurrently |
155 | | - futures.add(executor.submit(() -> { |
156 | | - awaitBarrier(barrier); |
157 | | - assertThat(ImplementationBridgeHelpers.CosmosAsyncClientHelper.getCosmosAsyncClientAccessor()).isNotNull(); |
158 | | - })); |
159 | | - futures.add(executor.submit(() -> { |
160 | | - awaitBarrier(barrier); |
161 | | - assertThat(ImplementationBridgeHelpers.CosmosItemRequestOptionsHelper.getCosmosItemRequestOptionsAccessor()).isNotNull(); |
162 | | - })); |
163 | | - futures.add(executor.submit(() -> { |
164 | | - awaitBarrier(barrier); |
165 | | - assertThat(ImplementationBridgeHelpers.FeedResponseHelper.getFeedResponseAccessor()).isNotNull(); |
166 | | - })); |
167 | | - futures.add(executor.submit(() -> { |
168 | | - awaitBarrier(barrier); |
169 | | - assertThat(ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor()).isNotNull(); |
170 | | - })); |
171 | | - futures.add(executor.submit(() -> { |
172 | | - awaitBarrier(barrier); |
173 | | - assertThat(ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor()).isNotNull(); |
174 | | - })); |
175 | | - futures.add(executor.submit(() -> { |
176 | | - awaitBarrier(barrier); |
177 | | - assertThat(ImplementationBridgeHelpers.CosmosItemSerializerHelper.getCosmosItemSerializerAccessor()).isNotNull(); |
178 | | - })); |
179 | | - |
180 | | - boolean deadlockDetected = false; |
| 206 | + boolean deadlock = false; |
181 | 207 | for (int i = 0; i < futures.size(); i++) { |
182 | 208 | try { |
183 | 209 | futures.get(i).get(timeoutSeconds, TimeUnit.SECONDS); |
184 | | - } catch (TimeoutException e) { |
185 | | - deadlockDetected = true; |
186 | | - logger.error("Thread {} did not complete within {} seconds - possible deadlock", i, timeoutSeconds); |
187 | | - } catch (java.util.concurrent.ExecutionException e) { |
188 | | - logger.error("Thread {} threw exception: {}", i, e.getCause().getMessage()); |
189 | | - fail("Unexpected exception in thread " + i + ": " + e.getCause()); |
| 210 | + } catch (java.util.concurrent.TimeoutException e) { |
| 211 | + System.err.println("DEADLOCK: Thread-" + i + " timed out after " + timeoutSeconds + "s"); |
| 212 | + deadlock = true; |
| 213 | + } catch (Exception e) { |
| 214 | + Throwable root = e; |
| 215 | + while (root.getCause() != null) root = root.getCause(); |
| 216 | + System.err.println("Thread-" + i + " error: " + root); |
190 | 217 | } |
191 | 218 | } |
192 | 219 |
|
193 | 220 | executor.shutdownNow(); |
194 | | - assertThat(deadlockDetected) |
195 | | - .as("Concurrent accessor initialization should complete without deadlock") |
196 | | - .isFalse(); |
197 | | - } finally { |
198 | | - // Restore all accessors so subsequent tests in the same JVM are not affected |
199 | | - BridgeInternal.initializeAllAccessors(); |
200 | | - ModelBridgeInternal.initializeAllAccessors(); |
201 | | - UtilBridgeInternal.initializeAllAccessors(); |
202 | | - } |
203 | | - } |
204 | | - |
205 | | - private static void awaitBarrier(CyclicBarrier barrier) { |
206 | | - try { |
207 | | - barrier.await(); |
208 | | - } catch (Exception e) { |
209 | | - throw new RuntimeException(e); |
| 221 | + System.exit(deadlock ? 1 : 0); |
210 | 222 | } |
211 | 223 | } |
212 | 224 | } |
0 commit comments