ibrandes
diff --git a/‎sdk/cosmos/azure-cosmos-benchmark/src/test/java/com/azure/cosmos/benchmark/WorkflowTest.java‎
Lines changed: 20 additions & 3 deletions b/‎sdk/cosmos/azure-cosmos-benchmark/src/test/java/com/azure/cosmos/benchmark/WorkflowTest.java‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala‎
Lines changed: 23 additions & 4 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterITest.scala‎
Lines changed: 23 additions & 4 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala‎
Lines changed: 1 addition & 1 deletion b/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/PointWriterSubpartitionITest.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EWriteITest.scala‎
Lines changed: 6 additions & 0 deletions b/‎sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EWriteITest.scala‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ClientMetricsTest.java‎
Lines changed: 15 additions & 9 deletions b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ClientMetricsTest.java‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosBulkAsyncTest.java‎
Lines changed: 11 additions & 6 deletions b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosBulkAsyncTest.java‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosConflictsTest.java‎
Lines changed: 2 additions & 1 deletion b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosConflictsTest.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosContainerOpenConnectionsAndInitCachesTest.java‎
Lines changed: 16 additions & 4 deletions b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosContainerOpenConnectionsAndInitCachesTest.java‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsE2ETest.java‎
Lines changed: 8 additions & 3 deletions b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsE2ETest.java‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java‎
Lines changed: 18 additions & 1 deletion b/‎sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java‎
Lines changed: 18 additions & 1 deletion
@@ -272,9 +272,26 @@ public void before_WorkflowTest() {
         options.setOfferThroughput(10000);
         AsyncDocumentClient housekeepingClient = Utils.housekeepingClient();
         database = Utils.createDatabaseForTest(housekeepingClient);
-        collection = housekeepingClient.createCollection("dbs/" + database.getId(),
-            getCollectionDefinitionWithRangeRangeIndex(),
-            options).block().getResource();
+        // Retry collection creation on transient failures (408, 429, 503)
+        int maxRetries = 3;
+        for (int attempt = 0; attempt <= maxRetries; attempt++) {
+            try {
+                collection = housekeepingClient.createCollection("dbs/" + database.getId(),
+                    getCollectionDefinitionWithRangeRangeIndex(),
+                    options).block().getResource();
+                break;
+            } catch (Exception e) {
+                if (attempt == maxRetries) {
+                    throw e;
+                }
+                try {
+                    Thread.sleep(5000);
+                } catch (InterruptedException ie) {
+                    Thread.currentThread().interrupt();
+                    throw new RuntimeException(ie);
+                }
+            }
+        }
         housekeepingClient.close();
     }
 
 
@@ -12,6 +12,9 @@ import com.fasterxml.jackson.databind.node.ObjectNode
 import org.apache.commons.lang3.RandomUtils
 import org.apache.spark.MockTaskContext
 import org.apache.spark.sql.types.{BooleanType, DoubleType, FloatType, IntegerType, LongType, StringType, StructField, StructType}
+import org.scalatest.concurrent.Eventually.eventually
+import org.scalatest.concurrent.Waiters.{interval, timeout}
+import org.scalatest.time.SpanSugar.convertIntToGrainOfTime
 
 import scala.collection.concurrent.TrieMap
 import scala.collection.mutable
@@ -218,7 +221,7 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana
     val container = getContainer
     val containerProperties = container.read().block().getProperties
     val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition
-    val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false, bulkTransactional = false)
+    val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false)
     val pointWriter = new PointWriter(
       container,
       partitionKeyDefinition,
@@ -274,10 +277,19 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana
     }
 
     pointWriter.flushAndClose()
-    val allItems = readAllItems()
 
-    allItems should have size items.size
-    metricsPublisher.getRecordsWrittenSnapshot() shouldEqual items.size
+    // Poll until all items are indexed and visible via query
+    // readAllItems() uses a query which depends on indexing completion
+    var allItems = readAllItems()
+    eventually(timeout(10.seconds), interval(500.milliseconds)) {
+      allItems = readAllItems()
+      allItems should have size items.size
+    }
+
+    // Poll until metrics are fully recorded after flush
+    eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      metricsPublisher.getRecordsWrittenSnapshot() shouldEqual items.size
+    }
     metricsPublisher.getBytesWrittenSnapshot() > 0 shouldEqual true
     metricsPublisher.getTotalRequestChargeSnapshot() > 5 * items.size shouldEqual true
     metricsPublisher.getTotalRequestChargeSnapshot() < 10 * items.size shouldEqual true
@@ -303,6 +315,13 @@ class PointWriterITest extends IntegrationSpec with CosmosClient with AutoCleana
 
     pointWriter.flushAndClose()
 
+    // Wait for metrics to be fully aggregated after flush
+    // This prevents race conditions where metrics snapshot is taken before all writes are recorded
+    // Use eventually block to poll until the expected count is reached
+    eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      metricsPublisher.getRecordsWrittenSnapshot() should be >= (2 * items.size).toLong
+    }
+
     metricsPublisher.getRecordsWrittenSnapshot() shouldEqual 2 * items.size
     metricsPublisher.getBytesWrittenSnapshot() > 0 shouldEqual true
     metricsPublisher.getTotalRequestChargeSnapshot() > 5 * 2 * items.size shouldEqual true
 
@@ -207,7 +207,7 @@ class PointWriterSubpartitionITest extends IntegrationSpec with CosmosClient wit
     val container = getContainer
     val containerProperties = container.read().block().getProperties
     val partitionKeyDefinition = containerProperties.getPartitionKeyDefinition
-    val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 0, bulkEnabled = false, bulkTransactional = false)
+    val writeConfig = CosmosWriteConfig(ItemWriteStrategy.ItemAppend, maxRetryCount = 3, bulkEnabled = false, bulkTransactional = false)
     val pointWriter = new PointWriter(
       container, partitionKeyDefinition, writeConfig, DiagnosticsConfig(), MockTaskContext.mockTaskContext(),new TestOutputMetricsPublisher)
     val items = new mutable.HashMap[String, mutable.Set[ObjectNode]] with mutable.MultiMap[String, ObjectNode]
 
@@ -166,6 +166,12 @@ class SparkE2EWriteITest
           statusStore.executionsList().last.metricValues != null)
       }
 
+      // Wait for onTaskEnd callback to update snapshot variables
+      // The callback fires asynchronously after metrics are computed
+      eventually(timeout(10.seconds), interval(10.milliseconds)) {
+        assert(recordsWrittenSnapshot > 0)
+      }
+
       recordsWrittenSnapshot shouldEqual 1
       bytesWrittenSnapshot > 0 shouldEqual  true
       if (!spark.sparkContext.version.startsWith("3.1.")) {
 
@@ -6,6 +6,7 @@
 
 package com.azure.cosmos;
 
+import com.azure.cosmos.FlakyTestRetryAnalyzer;
 import com.azure.cosmos.implementation.AsyncDocumentClient;
 import com.azure.cosmos.implementation.Configs;
 import com.azure.cosmos.implementation.DiagnosticsProvider;
@@ -85,7 +86,7 @@ public ClientMetricsTest(CosmosClientBuilder clientBuilder) {
         super(clientBuilder);
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT)
     public void maxValueExceedingDefinedLimitStillWorksWithoutException() throws Exception {
 
         // Expected behavior is that higher values than the expected max value can still be recorded
@@ -133,7 +134,7 @@ public void maxValueExceedingDefinedLimitStillWorksWithoutException() throws Exc
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void createItem() throws Exception {
         boolean[] disableLatencyMeterTestCases = { false, true };
 
@@ -274,7 +275,10 @@ public void createItemWithAllMetrics() throws Exception {
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    // Increased timeout from TIMEOUT to SETUP_TIMEOUT to account for collection creation time
+    // during TestState initialization, especially in CI environments where collection creation
+    // can take longer than 40 seconds
+    @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT)
     public void readItem() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {
             InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());
@@ -336,7 +340,7 @@ public void readNonExistingItem() throws Exception {
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void readManySingleItem() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {
             InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());
@@ -464,7 +468,9 @@ public void readItemWithThresholdsApplied() throws Exception {
         runReadItemTestWithThresholds(minThresholds, true);
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    // TestState constructor creates a new client and collection, which can exceed 40s in CI.
+    // Using SETUP_TIMEOUT (60s) instead of SuperFlakyTestRetryAnalyzer to give adequate time.
+    @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void replaceItem() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {
             InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());
@@ -657,7 +663,7 @@ <T> CosmosItemResponse verifyExists(TestState state, String id, PartitionKey pk,
         return response;
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)
+    @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void readAllItemsWithDetailMetricsWithExplicitPageSize() throws Exception {
         try (TestState state = new TestState(getClientBuilder(),
             CosmosMetricCategory.DEFAULT,
@@ -993,7 +999,7 @@ public void batchMultipleItemExecution() throws Exception {
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = TIMEOUT * 2)
     public void effectiveMetricCategoriesForDefault() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.fromString("DeFAult"))) {
             assertThat(state.getEffectiveMetricCategories().size()).isEqualTo(5);
@@ -1082,7 +1088,7 @@ public void effectiveMetricCategoriesForAll() throws Exception {
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void endpointMetricsAreDurable() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.ALL)){
             if (state.client.asyncClient().getConnectionPolicy().getConnectionMode() != ConnectionMode.DIRECT) {
@@ -1111,7 +1117,7 @@ public void endpointMetricsAreDurable() throws Exception {
         }
     }
 
-    @Test(groups = { "fast" }, timeOut = TIMEOUT)
+    @Test(groups = { "fast" }, timeOut = TIMEOUT * 2)
     public void effectiveMetricCategoriesForAllLatebound() throws Exception {
         try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {
             EnumSet<MetricCategory> effectiveMetricCategories =
 
@@ -45,6 +45,8 @@
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import com.azure.cosmos.FlakyTestRetryAnalyzer;
+
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.fail;
 
@@ -63,19 +65,22 @@ public CosmosBulkAsyncTest(CosmosClientBuilder clientBuilder) {
     @BeforeClass(groups = {"fast"}, timeOut = SETUP_TIMEOUT)
     public void before_CosmosBulkAsyncTest() {
         assertThat(this.bulkClient).isNull();
-        ThrottlingRetryOptions throttlingOptions = new ThrottlingRetryOptions()
-            .setMaxRetryAttemptsOnThrottledRequests(1000000)
-            .setMaxRetryWaitTime(Duration.ofDays(1));
-        this.bulkClient = getClientBuilder().throttlingRetryOptions(throttlingOptions).buildAsyncClient();
-        bulkAsyncContainer = getSharedMultiPartitionCosmosContainer(this.bulkClient);
+        executeWithRetry(() -> {
+            safeClose(this.bulkClient);
+            ThrottlingRetryOptions throttlingOptions = new ThrottlingRetryOptions()
+                .setMaxRetryAttemptsOnThrottledRequests(1000000)
+                .setMaxRetryWaitTime(Duration.ofDays(1));
+            this.bulkClient = getClientBuilder().throttlingRetryOptions(throttlingOptions).buildAsyncClient();
+            bulkAsyncContainer = getSharedMultiPartitionCosmosContainer(this.bulkClient);
+        }, 3, "CosmosBulkAsyncTest setup");
     }
 
     @AfterClass(groups = {"fast"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true)
     public void afterClass() {
         safeClose(this.bulkClient);
     }
 
-    @Test(groups = {"fast"}, timeOut = TIMEOUT * 2)
+    @Test(groups = {"fast"}, timeOut = TIMEOUT * 2, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void createItem_withBulkAndThroughputControlAsDefaultGroup() throws InterruptedException {
         runBulkTest(true);
     }
 
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 package com.azure.cosmos;
 
+import com.azure.cosmos.FlakyTestRetryAnalyzer;
 import com.azure.cosmos.implementation.DatabaseAccount;
 import com.azure.cosmos.implementation.DatabaseAccountLocation;
 import com.azure.cosmos.implementation.GlobalEndpointManager;
@@ -170,7 +171,7 @@ public void conflictCustomLWW() throws InterruptedException {
         }
     }
 
-    @Test(groups = {"flaky-multi-master"}, timeOut = CONFLICT_TIMEOUT)
+    @Test(groups = {"flaky-multi-master"}, timeOut = CONFLICT_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void conflictCustomSproc() throws InterruptedException {
         if (this.regionalClients.size() > 1) {
             CosmosAsyncDatabase database = getSharedCosmosDatabase(globalClient);
 
@@ -116,8 +116,8 @@ public Object[][] useAsyncParameterProvider() {
         };
     }
 
-    @Test(groups = {"fast"}, dataProvider = "useAsyncParameterProvider")
-    public void openConnectionsAndInitCachesForDirectMode(boolean useAsync) {
+    @Test(groups = {"fast"}, dataProvider = "useAsyncParameterProvider", retryAnalyzer = FlakyTestRetryAnalyzer.class)
+    public void openConnectionsAndInitCachesForDirectMode(boolean useAsync) throws InterruptedException {
         CosmosAsyncContainer asyncContainer = useAsync ? directCosmosAsyncContainer : directCosmosContainer.asyncContainer;
         CosmosAsyncClient asyncClient = useAsync ? directCosmosAsyncClient : directCosmosClient.asyncClient();
 
@@ -180,8 +180,20 @@ public void openConnectionsAndInitCachesForDirectMode(boolean useAsync) {
 
         assertThat(provider.count()).isEqualTo(endpoints.size());
 
+        // Wait for channels to be established - connection opening is asynchronous
+        int minChannels = Configs.getMinConnectionPoolSizePerEndpoint();
+        int maxWaitIterations = 20;
+        for (int i = 0; i < maxWaitIterations; i++) {
+            boolean allReady = provider.list()
+                .allMatch(ep -> ep.channelsMetrics() >= minChannels);
+            if (allReady) {
+                break;
+            }
+            Thread.sleep(500);
+        }
+
         // Validate for each RntbdServiceEndpoint, is at least Configs.getMinConnectionPoolSizePerEndpoint()) channel is being opened
-        provider.list().forEach(rntbdEndpoint -> assertThat(rntbdEndpoint.channelsMetrics()).isGreaterThanOrEqualTo(Configs.getMinConnectionPoolSizePerEndpoint()));
+        provider.list().forEach(rntbdEndpoint -> assertThat(rntbdEndpoint.channelsMetrics()).isGreaterThanOrEqualTo(minChannels));
 
         // Test for real document requests, it will not open new channels
         for (int i = 0; i < 5; i++) {
@@ -191,7 +203,7 @@ public void openConnectionsAndInitCachesForDirectMode(boolean useAsync) {
                 directCosmosContainer.createItem(TestObject.create());
             }
         }
-        provider.list().forEach(rntbdEndpoint -> assertThat(rntbdEndpoint.channelsMetrics()).isGreaterThanOrEqualTo(Configs.getMinConnectionPoolSizePerEndpoint()));
+        provider.list().forEach(rntbdEndpoint -> assertThat(rntbdEndpoint.channelsMetrics()).isGreaterThanOrEqualTo(minChannels));
     }
 
     @Test(groups = {"fast"}, dataProvider = "useAsyncParameterProvider")
 
@@ -495,9 +495,14 @@ private CosmosContainer getContainer(CosmosClientBuilder builder) {
         this.safeCloseCosmosClient();
 
         assertThat(builder).isNotNull();
-        this.client = builder.buildClient();
-        CosmosAsyncContainer asyncContainer = getSharedMultiPartitionCosmosContainer(this.client.asyncClient());
-        return this.client.getDatabase(asyncContainer.getDatabase().getId()).getContainer(asyncContainer.getId());
+        final CosmosContainer[] result = new CosmosContainer[1];
+        executeWithRetry(() -> {
+            this.safeCloseCosmosClient();
+            this.client = builder.buildClient();
+            CosmosAsyncContainer asyncContainer = getSharedMultiPartitionCosmosContainer(this.client.asyncClient());
+            result[0] = this.client.getDatabase(asyncContainer.getDatabase().getId()).getContainer(asyncContainer.getId());
+        }, 3, "CosmosDiagnosticsE2ETest getContainer");
+        return result[0];
     }
 
     private CosmosDiagnostics executeDocumentOperation(
 
@@ -1071,6 +1071,23 @@ public void directDiagnosticsOnException() throws Exception {
         CosmosItemResponse<InternalObjectNode> createResponse = null;
         try {
             createResponse = containerDirect.createItem(internalObjectNode);
+            
+            // Verify item creation is fully propagated before testing with wrong partition key
+            // Use retry-based polling instead of fixed sleep for CI resilience
+            String itemId = BridgeInternal.getProperties(createResponse).getId();
+            int maxRetries = 5;
+            int retryCount = 0;
+            boolean itemReadable = false;
+            while (retryCount < maxRetries && !itemReadable) {
+                try {
+                    containerDirect.readItem(itemId, new PartitionKey(itemId), InternalObjectNode.class);
+                    itemReadable = true;
+                } catch (CosmosException e) {
+                    retryCount++;
+                    Thread.sleep(200);
+                }
+            }
+            
             CosmosItemRequestOptions cosmosItemRequestOptions = new CosmosItemRequestOptions();
             ModelBridgeInternal.setPartitionKey(cosmosItemRequestOptions, new PartitionKey("wrongPartitionKey"));
             CosmosItemResponse<InternalObjectNode> readResponse =
@@ -1108,7 +1125,7 @@ public void directDiagnosticsOnException() throws Exception {
         }
     }
 
-    @Test(groups = {"fast"}, dataProvider = "gatewayAndDirect", timeOut = TIMEOUT)
+    @Test(groups = {"fast"}, dataProvider = "gatewayAndDirect", timeOut = TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)
     public void diagnosticsKeywordIdentifiers(CosmosContainer container) {
         InternalObjectNode internalObjectNode = getInternalObjectNode();
         HashSet<String> keywordIdentifiers = new HashSet<>();
Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@`
`6`	`6`
`7`	`7`	`package com.azure.cosmos;`
`8`	`8`
	`9`	`+import com.azure.cosmos.FlakyTestRetryAnalyzer;`
`9`	`10`	`import com.azure.cosmos.implementation.AsyncDocumentClient;`
`10`	`11`	`import com.azure.cosmos.implementation.Configs;`
`11`	`12`	`import com.azure.cosmos.implementation.DiagnosticsProvider;`
`@@ -85,7 +86,7 @@ public ClientMetricsTest(CosmosClientBuilder clientBuilder) {`
`85`	`86`	`super(clientBuilder);`
`86`	`87`	`}`
`87`	`88`
`88`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`89`	`+ @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT)`
`89`	`90`	`public void maxValueExceedingDefinedLimitStillWorksWithoutException() throws Exception {`
`90`	`91`
`91`	`92`	`// Expected behavior is that higher values than the expected max value can still be recorded`
`@@ -133,7 +134,7 @@ public void maxValueExceedingDefinedLimitStillWorksWithoutException() throws Exc`
`133`	`134`	`}`
`134`	`135`	`}`
`135`	`136`
`136`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`137`	`+ @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`137`	`138`	`public void createItem() throws Exception {`
`138`	`139`	`boolean[] disableLatencyMeterTestCases = { false, true };`
`139`	`140`
`@@ -274,7 +275,10 @@ public void createItemWithAllMetrics() throws Exception {`
`274`	`275`	`}`
`275`	`276`	`}`
`276`	`277`
`277`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`278`	`+ // Increased timeout from TIMEOUT to SETUP_TIMEOUT to account for collection creation time`
	`279`	`+ // during TestState initialization, especially in CI environments where collection creation`
	`280`	`+ // can take longer than 40 seconds`
	`281`	`+ @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT)`
`278`	`282`	`public void readItem() throws Exception {`
`279`	`283`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {`
`280`	`284`	`InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());`
`@@ -336,7 +340,7 @@ public void readNonExistingItem() throws Exception {`
`336`	`340`	`}`
`337`	`341`	`}`
`338`	`342`
`339`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`343`	`+ @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`340`	`344`	`public void readManySingleItem() throws Exception {`
`341`	`345`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {`
`342`	`346`	`InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());`
`@@ -464,7 +468,9 @@ public void readItemWithThresholdsApplied() throws Exception {`
`464`	`468`	`runReadItemTestWithThresholds(minThresholds, true);`
`465`	`469`	`}`
`466`	`470`
`467`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`471`	`+ // TestState constructor creates a new client and collection, which can exceed 40s in CI.`
	`472`	`+ // Using SETUP_TIMEOUT (60s) instead of SuperFlakyTestRetryAnalyzer to give adequate time.`
	`473`	`+ @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`468`	`474`	`public void replaceItem() throws Exception {`
`469`	`475`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {`
`470`	`476`	`InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString());`
`@@ -657,7 +663,7 @@ <T> CosmosItemResponse verifyExists(TestState state, String id, PartitionKey pk,`
`657`	`663`	`return response;`
`658`	`664`	`}`
`659`	`665`
`660`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT, retryAnalyzer = SuperFlakyTestRetryAnalyzer.class)`
	`666`	`+ @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`661`	`667`	`public void readAllItemsWithDetailMetricsWithExplicitPageSize() throws Exception {`
`662`	`668`	`try (TestState state = new TestState(getClientBuilder(),`
`663`	`669`	`CosmosMetricCategory.DEFAULT,`
`@@ -993,7 +999,7 @@ public void batchMultipleItemExecution() throws Exception {`
`993`	`999`	`}`
`994`	`1000`	`}`
`995`	`1001`
`996`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`1002`	`+ @Test(groups = { "fast" }, timeOut = TIMEOUT * 2)`
`997`	`1003`	`public void effectiveMetricCategoriesForDefault() throws Exception {`
`998`	`1004`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.fromString("DeFAult"))) {`
`999`	`1005`	`assertThat(state.getEffectiveMetricCategories().size()).isEqualTo(5);`
`@@ -1082,7 +1088,7 @@ public void effectiveMetricCategoriesForAll() throws Exception {`
`1082`	`1088`	`}`
`1083`	`1089`	`}`
`1084`	`1090`
`1085`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`1091`	`+ @Test(groups = { "fast" }, timeOut = SETUP_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`1086`	`1092`	`public void endpointMetricsAreDurable() throws Exception {`
`1087`	`1093`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.ALL)){`
`1088`	`1094`	`if (state.client.asyncClient().getConnectionPolicy().getConnectionMode() != ConnectionMode.DIRECT) {`
`@@ -1111,7 +1117,7 @@ public void endpointMetricsAreDurable() throws Exception {`
`1111`	`1117`	`}`
`1112`	`1118`	`}`
`1113`	`1119`
`1114`		`- @Test(groups = { "fast" }, timeOut = TIMEOUT)`
	`1120`	`+ @Test(groups = { "fast" }, timeOut = TIMEOUT * 2)`
`1115`	`1121`	`public void effectiveMetricCategoriesForAllLatebound() throws Exception {`
`1116`	`1122`	`try (TestState state = new TestState(getClientBuilder(), CosmosMetricCategory.DEFAULT)) {`
`1117`	`1123`	`EnumSet<MetricCategory> effectiveMetricCategories =`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`// Licensed under the MIT License.`
`3`	`3`	`package com.azure.cosmos;`
`4`	`4`
	`5`	`+import com.azure.cosmos.FlakyTestRetryAnalyzer;`
`5`	`6`	`import com.azure.cosmos.implementation.DatabaseAccount;`
`6`	`7`	`import com.azure.cosmos.implementation.DatabaseAccountLocation;`
`7`	`8`	`import com.azure.cosmos.implementation.GlobalEndpointManager;`
`@@ -170,7 +171,7 @@ public void conflictCustomLWW() throws InterruptedException {`
`170`	`171`	`}`
`171`	`172`	`}`
`172`	`173`
`173`		`- @Test(groups = {"flaky-multi-master"}, timeOut = CONFLICT_TIMEOUT)`
	`174`	`+ @Test(groups = {"flaky-multi-master"}, timeOut = CONFLICT_TIMEOUT, retryAnalyzer = FlakyTestRetryAnalyzer.class)`
`174`	`175`	`public void conflictCustomSproc() throws InterruptedException {`
`175`	`176`	`if (this.regionalClients.size() > 1) {`
`176`	`177`	`CosmosAsyncDatabase database = getSharedCosmosDatabase(globalClient);`