diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalEndPointManagerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalEndPointManagerTest.java index 43a9e237e5bb..57429fa25bbf 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalEndPointManagerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalEndPointManagerTest.java @@ -223,7 +223,7 @@ public void refreshLocationAsyncForWriteForbidden() throws Exception { } /** - * Test for background refresh disable for multimaster + * Test for background refresh in multi-master: timer must keep running */ @Test(groups = {"unit"}, timeOut = TIMEOUT) public void backgroundRefreshForMultiMaster() throws Exception { @@ -236,8 +236,57 @@ public void backgroundRefreshForMultiMaster() throws Exception { GlobalEndpointManager globalEndPointManager = new GlobalEndpointManager(databaseAccountManagerInternal, connectionPolicy, new Configs()); globalEndPointManager.init(); + // Background refresh timer must keep running even for multi-master accounts where + // shouldRefreshEndpoints() returns false. This ensures topology changes (e.g., + // multi-write <-> single-write transitions) are detected. AtomicBoolean isRefreshInBackground = getRefreshInBackground(globalEndPointManager); - Assert.assertFalse(isRefreshInBackground.get()); + Assert.assertTrue(isRefreshInBackground.get()); + LifeCycleUtils.closeQuietly(globalEndPointManager); + } + + /** + * Validates that a multi-master account's background refresh timer detects a topology + * change from multi-write to single-write. Without the fix in refreshLocationPrivateAsync, + * the timer stops after init and the transition is never detected. + */ + @Test(groups = {"unit"}, timeOut = TIMEOUT) + public void backgroundRefreshDetectsTopologyChangeForMultiMaster() throws Exception { + // Start with a multi-writer account (dbAccountJson4: MW, East US + East Asia) + ConnectionPolicy connectionPolicy = new ConnectionPolicy(DirectConnectionConfig.getDefaultConfig()); + connectionPolicy.setEndpointDiscoveryEnabled(true); + connectionPolicy.setMultipleWriteRegionsEnabled(true); + DatabaseAccount multiWriterAccount = new DatabaseAccount(dbAccountJson4); + Mockito.when(databaseAccountManagerInternal.getDatabaseAccountFromEndpoint(ArgumentMatchers.any())) + .thenReturn(Flux.just(multiWriterAccount)); + Mockito.when(databaseAccountManagerInternal.getServiceEndpoint()) + .thenReturn(new URI("https://testaccount.documents.azure.com:443")); + + GlobalEndpointManager globalEndPointManager = new GlobalEndpointManager( + databaseAccountManagerInternal, connectionPolicy, new Configs()); + setBackgroundRefreshLocationTimeIntervalInMS(globalEndPointManager, 500); + globalEndPointManager.init(); + + // Verify multi-writer state: 2 write regions available + LocationCache locationCache = this.getLocationCache(globalEndPointManager); + Map availableWriteEndpoints = this.getAvailableWriteEndpointByLocation(locationCache); + Assert.assertEquals(availableWriteEndpoints.size(), 2, "Expected 2 write regions for multi-writer account"); + Assert.assertTrue(availableWriteEndpoints.containsKey("East US")); + Assert.assertTrue(availableWriteEndpoints.containsKey("East Asia")); + + // Transition to single-writer account (dbAccountJson1: SW, East US only for writes) + DatabaseAccount singleWriterAccount = new DatabaseAccount(dbAccountJson1); + Mockito.when(databaseAccountManagerInternal.getDatabaseAccountFromEndpoint(ArgumentMatchers.any())) + .thenReturn(Flux.just(singleWriterAccount)); + + // Wait for background refresh to detect the topology change + Thread.sleep(2000); + + // Verify single-writer state: write endpoints updated to reflect single-writer topology + locationCache = this.getLocationCache(globalEndPointManager); + availableWriteEndpoints = this.getAvailableWriteEndpointByLocation(locationCache); + Assert.assertEquals(availableWriteEndpoints.size(), 1, "Expected 1 write region after transition to single-writer"); + Assert.assertTrue(availableWriteEndpoints.containsKey("East US")); + LifeCycleUtils.closeQuietly(globalEndPointManager); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index a580582c07a9..87735baf0225 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -302,6 +302,16 @@ private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) return Mono.empty(); } else { logger.debug("shouldRefreshEndpoints: false, nothing to do."); + + // Even when no endpoint refresh is needed right now, we must keep the + // background refresh timer running so that future topology changes + // (e.g., multi-write <-> single-write transitions) are detected. + // This aligns with the .NET SDK behavior where the background loop + // continues unconditionally as long as the client is alive. + if (!this.refreshInBackground.get()) { + this.startRefreshLocationTimerAsync(); + } + this.isRefreshing.set(false); return Mono.empty(); }