Skip to content

Commit c4bc6bb

Browse files
[cosmos] Fix ClientTelemetry static-init NPE when IMDS access is disabled (#48888)
* Add regression test for IMDS-disabled ClientTelemetry init * Fix ClientTelemetry initialization when IMDS access is disabled * Update CHANGELOG.md * Update ClientConfigDiagnosticsTest.java
1 parent 09eccb0 commit c4bc6bb

4 files changed

Lines changed: 128 additions & 12 deletions

File tree

sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import com.azure.cosmos.CosmosRegionSwitchHint;
1212
import com.azure.cosmos.SessionRetryOptions;
1313
import com.azure.cosmos.SessionRetryOptionsBuilder;
14+
import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry;
1415
import com.azure.cosmos.implementation.perPartitionCircuitBreaker.PartitionLevelCircuitBreakerConfig;
1516
import com.azure.cosmos.implementation.directconnectivity.RntbdTransportClient;
1617
import com.azure.cosmos.implementation.guava25.collect.ImmutableList;
@@ -40,6 +41,8 @@
4041
import static org.assertj.core.api.Assertions.assertThat;
4142

4243
public class ClientConfigDiagnosticsTest {
44+
private static final String vmInstanceMachineId = ClientTelemetry.getMachineId(null);
45+
4346
private final ObjectMapper objectMapper = new ObjectMapper();
4447
private static final ImplementationBridgeHelpers.CosmosContainerIdentityHelper.CosmosContainerIdentityAccessor containerIdentityAccessor = ImplementationBridgeHelpers
4548
.CosmosContainerIdentityHelper
@@ -165,7 +168,7 @@ public void bareMinimum() throws Exception {
165168
ObjectNode objectNode = (ObjectNode) objectMapper.readTree(jsonWriter.toString());
166169

167170
assertThat(objectNode.get("id").asInt()).isEqualTo(1);
168-
assertThat(objectNode.get("machineId").asText()).isEqualTo(machineId);
171+
assertThat(objectNode.get("machineId").asText()).isEqualTo(Strings.isNullOrEmpty(vmInstanceMachineId) ? machineId : vmInstanceMachineId);
169172
assertThat(objectNode.get("numberOfClients").asInt()).isEqualTo(2);
170173
assertThat(objectNode.get("consistencyCfg").asText()).isEqualTo("(consistency: null, readConsistencyStrategy: null, mm: false, prgns: [null])");
171174
assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null");
@@ -198,7 +201,7 @@ public void rntbd() throws Exception {
198201
ObjectNode objectNode = (ObjectNode) objectMapper.readTree(jsonWriter.toString());
199202

200203
assertThat(objectNode.get("id").asInt()).isEqualTo(1);
201-
assertThat(objectNode.get("machineId").asText()).isEqualTo(machineId);
204+
assertThat(objectNode.get("machineId").asText()).isEqualTo(Strings.isNullOrEmpty(vmInstanceMachineId) ? machineId : vmInstanceMachineId);
202205
assertThat(objectNode.get("numberOfClients").asInt()).isEqualTo(2);
203206
assertThat(objectNode.get("consistencyCfg").asText()).isEqualTo("(consistency: null, readConsistencyStrategy: null, mm: false, prgns: [null])");
204207
assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("(cto:PT5S, nrto:PT5S, icto:PT0S, ieto:PT1H, mcpe:130, mrpc:30, cer:true)");
@@ -235,7 +238,7 @@ public void gw() throws Exception {
235238
ObjectNode objectNode = (ObjectNode) objectMapper.readTree(jsonWriter.toString());
236239

237240
assertThat(objectNode.get("id").asInt()).isEqualTo(1);
238-
assertThat(objectNode.get("machineId").asText()).isEqualTo(machineId);
241+
assertThat(objectNode.get("machineId").asText()).isEqualTo(Strings.isNullOrEmpty(vmInstanceMachineId) ? machineId : vmInstanceMachineId);
239242
assertThat(objectNode.get("numberOfClients").asInt()).isEqualTo(2);
240243
assertThat(objectNode.get("consistencyCfg").asText()).isEqualTo("(consistency: null, readConsistencyStrategy: null, mm: false, prgns: [null])");
241244
assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null");
@@ -309,7 +312,7 @@ public void full(
309312
ObjectNode objectNode = (ObjectNode) objectMapper.readTree(jsonWriter.toString());
310313

311314
assertThat(objectNode.get("id").asInt()).isEqualTo(1);
312-
assertThat(objectNode.get("machineId").asText()).isEqualTo(machineId);
315+
assertThat(objectNode.get("machineId").asText()).isEqualTo(Strings.isNullOrEmpty(vmInstanceMachineId) ? machineId : vmInstanceMachineId);
313316
assertThat(objectNode.get("numberOfClients").asInt()).isEqualTo(2);
314317
assertThat(objectNode.get("consistencyCfg").asText()).isEqualTo("(consistency: null, readConsistencyStrategy: null, mm: false, prgns: [westus1,westus2])");
315318
assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null");
@@ -362,7 +365,7 @@ public void sessionRetryOptionsInDiagnostics(SessionRetryOptions sessionRetryOpt
362365
ObjectNode objectNode = (ObjectNode) objectMapper.readTree(jsonWriter.toString());
363366

364367
assertThat(objectNode.get("id").asInt()).isEqualTo(1);
365-
assertThat(objectNode.get("machineId").asText()).isEqualTo(machineId);
368+
assertThat(objectNode.get("machineId").asText()).isEqualTo(Strings.isNullOrEmpty(vmInstanceMachineId) ? machineId : vmInstanceMachineId);
366369
assertThat(objectNode.get("numberOfClients").asInt()).isEqualTo(2);
367370
assertThat(objectNode.get("consistencyCfg").asText()).isEqualTo("(consistency: null, readConsistencyStrategy: null, mm: false, prgns: [null])");
368371
assertThat(objectNode.get("connCfg").get("rntbd").asText()).isEqualTo("null");

sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ImplementationBridgeHelpersTest.java

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,106 @@ public static void main(String[] args) {
285285
}
286286
}
287287

288+
/**
289+
* Regression test for the ClientTelemetry static-init failure when IMDS access is disabled.
290+
* A fresh JVM is required so the system property is in effect before any Cosmos classes load.
291+
*/
292+
@Test(groups = { "unit" })
293+
public void cosmosClientBuildShouldSucceedWhenImdsAccessIsDisabled() throws Exception {
294+
String javaHome = System.getProperty("java.home");
295+
String javaBin = javaHome + java.io.File.separator + "bin" + java.io.File.separator + "java";
296+
String classpath = System.getProperty("java.class.path");
297+
298+
List<String> command = new ArrayList<>();
299+
command.add(javaBin);
300+
command.add("-DCOSMOS.DISABLE_IMDS_ACCESS=true");
301+
302+
try {
303+
int majorVersion = Integer.parseInt(System.getProperty("java.specification.version").split("\\.")[0]);
304+
if (majorVersion >= 9) {
305+
command.add("--add-opens");
306+
command.add("java.base/java.lang=ALL-UNNAMED");
307+
}
308+
} catch (NumberFormatException e) {
309+
// JDK 8
310+
}
311+
312+
command.add("-cp");
313+
command.add(classpath);
314+
command.add(ClientTelemetryImdsDisabledChildProcess.class.getName());
315+
316+
ProcessBuilder pb = new ProcessBuilder(command);
317+
pb.redirectErrorStream(true);
318+
Process process = pb.start();
319+
320+
StringBuilder output = new StringBuilder();
321+
Thread gobbler = new Thread(() -> {
322+
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
323+
String line;
324+
while ((line = reader.readLine()) != null) {
325+
output.append(line).append(System.lineSeparator());
326+
logger.info("[client-telemetry-imds-disabled] {}", line);
327+
}
328+
} catch (Exception e) {
329+
// Process destroyed
330+
}
331+
});
332+
gobbler.setDaemon(true);
333+
gobbler.start();
334+
335+
boolean completed = process.waitFor(60, TimeUnit.SECONDS);
336+
if (!completed) {
337+
process.destroyForcibly();
338+
gobbler.join(5000);
339+
fail("ClientTelemetry IMDS-disabled regression check timed out after 60s. Output:\n" + output);
340+
}
341+
342+
gobbler.join(5000);
343+
int exitCode = process.exitValue();
344+
assertThat(exitCode)
345+
.as("Child JVM exited with non-zero code. Output:\n" + output)
346+
.isEqualTo(0);
347+
}
348+
349+
public static final class ClientTelemetryImdsDisabledChildProcess {
350+
public static void main(String[] args) {
351+
try {
352+
com.azure.cosmos.CosmosAsyncClient client = new com.azure.cosmos.CosmosClientBuilder()
353+
.endpoint(TestConfigurations.HOST)
354+
.key(TestConfigurations.MASTER_KEY)
355+
.buildAsyncClient();
356+
357+
client.close();
358+
System.out.println("Client built successfully with IMDS access disabled.");
359+
System.exit(0);
360+
} catch (Throwable throwable) {
361+
if (containsClientTelemetryStaticInitFailure(throwable)) {
362+
throwable.printStackTrace(System.err);
363+
System.exit(1);
364+
}
365+
366+
System.out.println("Client reached the expected network/auth path without a ClientTelemetry static-init failure.");
367+
throwable.printStackTrace(System.out);
368+
System.exit(0);
369+
}
370+
}
371+
372+
private static boolean containsClientTelemetryStaticInitFailure(Throwable throwable) {
373+
Throwable current = throwable;
374+
while (current != null) {
375+
if (current instanceof ExceptionInInitializerError
376+
|| current instanceof NoClassDefFoundError
377+
|| (current.getMessage() != null && current.getMessage().contains("ClientTelemetry"))) {
378+
return true;
379+
}
380+
381+
current = current.getCause();
382+
}
383+
384+
return false;
385+
}
386+
}
387+
288388
/**
289389
* Verifies that every {@code *Helper} inner class in
290390
* {@link ImplementationBridgeHelpers} has a resolvable accessor — i.e., calling

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Fixed JVM `<clinit>` deadlock when multiple threads concurrently trigger Cosmos SDK class loading for the first time. - See [PR 48689](https://github.com/Azure/azure-sdk-for-java/pull/48689)
1313
* Fixed an issue where `CustomItemSerializer` was incorrectly applied to internal SDK query pipeline structures (e.g., `OrderByRowResult`, `Document`), causing deserialization failures in ORDER BY, GROUP BY, aggregate, DISTINCT, and hybrid search queries. - See [PR 48811](https://github.com/Azure/azure-sdk-for-java/pull/48811)
1414
* Fixed an issue where `SqlParameter` ignored the configured `CustomItemSerializer`, always using the internal default serializer instead. - See [PR 48811](https://github.com/Azure/azure-sdk-for-java/pull/48811)
15+
* Fixed a `ClientTelemetry` static initialization failure when IMDS access is disabled, preventing `NoClassDefFoundError` during Cosmos client creation in non-Azure environments. - See [PR 48888](https://github.com/Azure/azure-sdk-for-java/pull/48888)
1516

1617
#### Other Changes
1718

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/clienttelemetry/ClientTelemetry.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,29 @@ private static ImplementationBridgeHelpers.CosmosClientTelemetryConfigHelper.Cos
5151
// - The fetch executes at most once
5252
// - All concurrent subscribers share the single result
5353
// - The HTTP client is created and disposed within the fetch
54-
private static final Mono<AzureVMMetadata> CACHED_METADATA = fetchAzureVmMetadata().cache();
54+
private static final Mono<AzureVMMetadata> CACHED_METADATA;
5555

5656
// Sentinel for "not on Azure VM" or "IMDS unreachable"
57-
private static final AzureVMMetadata METADATA_NOT_AVAILABLE = new AzureVMMetadata();
57+
private static final AzureVMMetadata METADATA_NOT_AVAILABLE;
5858

5959
// IMDS Constants
60-
private static final String IMDS_AZURE_VM_METADATA = "http://169.254.169.254:80/metadata/instance?api-version=2020-06-01";
61-
private static final Duration IMDS_DEFAULT_NETWORK_REQUEST_TIMEOUT = Duration.ofSeconds(5);
62-
private static final Duration IMDS_DEFAULT_IDLE_CONNECTION_TIMEOUT = Duration.ofSeconds(60);
63-
private static final Duration IMDS_DEFAULT_CONNECTION_ACQUIRE_TIMEOUT = Duration.ofSeconds(5);
64-
private static final int IMDS_DEFAULT_MAX_CONNECTION_POOL_SIZE = 5;
60+
private static final String IMDS_AZURE_VM_METADATA;
61+
private static final Duration IMDS_DEFAULT_NETWORK_REQUEST_TIMEOUT;
62+
private static final Duration IMDS_DEFAULT_IDLE_CONNECTION_TIMEOUT;
63+
private static final Duration IMDS_DEFAULT_CONNECTION_ACQUIRE_TIMEOUT;
64+
private static final int IMDS_DEFAULT_MAX_CONNECTION_POOL_SIZE;
65+
66+
static {
67+
// Initialize the sentinel and IMDS defaults before creating the cached Mono,
68+
// because fetchAzureVmMetadata() reads them during class initialization.
69+
METADATA_NOT_AVAILABLE = new AzureVMMetadata();
70+
IMDS_AZURE_VM_METADATA = "http://169.254.169.254:80/metadata/instance?api-version=2020-06-01";
71+
IMDS_DEFAULT_NETWORK_REQUEST_TIMEOUT = Duration.ofSeconds(5);
72+
IMDS_DEFAULT_IDLE_CONNECTION_TIMEOUT = Duration.ofSeconds(60);
73+
IMDS_DEFAULT_CONNECTION_ACQUIRE_TIMEOUT = Duration.ofSeconds(5);
74+
IMDS_DEFAULT_MAX_CONNECTION_POOL_SIZE = 5;
75+
CACHED_METADATA = fetchAzureVmMetadata().cache();
76+
}
6577

6678
// Per-instance fields
6779
private final ClientTelemetryInfo clientTelemetryInfo;

0 commit comments

Comments
 (0)